Script started on Tue 11 Sep 2012 06:11:59 PM CEST ]0;svc@kb-prod-udv-001:~/heritrix-3.1.0/bin[svc@kb-prod-udv-001 bin]$ ./migrate_templates.sh ~svc/workspace/netarchivesuite/harvestdefinitionbase dir/order_templates_dist/frontpages.xml ~/frontpages.dir2 H1 source: /home/svc/workspace/netarchivesuite/harvestdefinitionbasedir/order_templates_dist/frontpages.xml H3 destination: /home/svc/frontpages.dir2 Migrating settings.................................................................................................................................................... 12 settings skipped as not-applicable These are probably harmless, but if the following settings were important to your crawl process, investigate other options. //meta/date 20080118111217 //controller/*[@name="settings-directory"] settings //controller/*[@name="scope"]/*[@name="enabled"] true //controller/*[@name="scope"]/*[@name="reread-seeds-on-config"] true //controller/*[@name="frontier"]/*[@name="total-bandwidth-usage-KB-sec"] 1500 //controller/*[@name="frontier"]/*[@name="target-ready-backlog"] 50 //controller/*[@name="fetch-processors"]/*[@name="HTTP"]/*[@name="use-bdb-for-cookies"] true //controller/*[@name="post-processors"]/*[@name="Updater"]/*[@name="enabled"] true //controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="enabled"] true //controller/*[@name="post-processors"]/*[@name="LinksScoper"]/*[@name="override-logger"] false //controller/*[@name="post-processors"]/*[@name="Scheduler"]/*[@name="enabled"] true //controller/*[@name="recover-retain-failures"] false 39 settings may need attention Please review your original crawl and the created H3 job, for each of the following, and manually update as needed. //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptURIFromSeedDomains"]/*[@name="decision"] ACCEPT //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptURIFromSeedDomains"]/*[@name="seeds-as-surt-prefixes"] true //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptURIFromSeedDomains"]/*[@name="also-check-via"] false //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="acceptURIFromSeedDomains"]/*[@name="rebuild-on-reconfig"] true //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="pathdepthfilter"]/*[@name="max-path-depth"] 20 //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="global_crawlertraps"]/*[@name="decision"] REJECT //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="global_crawlertraps"]/*[@name="list-logic"] OR //controller/*[@name="scope"]/*[@name="decide-rules"]/*[@name="rules"]/*[@name="global_crawlertraps"]/*[@name="regexp-list"]/string .*\/stories.*\/stories.*\/stories.* //controller/*[@name="frontier"]/*[@name="queue-assignment-policy"] dk.netarkivet.harvester.harvesting.DomainnameQueueAssignmentPolicy //controller/*[@name="frontier"]/*[@name="cost-policy"] org.archive.crawler.frontier.UnitCostAssignmentPolicy //controller/*[@name="frontier"]/*[@name="uri-included-structure"] org.archive.crawler.util.BdbUriUniqFilter //controller/*[@name="uri-canonicalization-rules"]/*[@name="WWW"]/*[@name="enabled"] false //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="force-retire"] false //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="enabled"] true //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="server-max-fetch-successes"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="server-max-success-kb"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="server-max-fetch-responses"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="server-max-all-kb"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="host-max-fetch-successes"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="host-max-success-kb"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="host-max-fetch-responses"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="host-max-all-kb"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="group-max-fetch-successes"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="group-max-success-kb"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="group-max-fetch-responses"] -1 //controller/*[@name="pre-fetch-processors"]/*[@name="QuotaEnforcer"]/*[@name="group-max-all-kb"] -1 //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="enabled"] true //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="matching-method"] By URL //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="try-equivalent"] true //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="change-content-size"] false //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="mime-filter"] ^text/.* //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="filter-mode"] Blacklist //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="analysis-mode"] Timestamp //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="log-level"] SEVERE //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="origin-handling"] Use index information //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="stats-per-host"] true //controller/*[@name="write-processors"]/*[@name="DeDuplicator"]/*[@name="use-sparse-range-filter"] true //controller/*[@name="write-processors"]/*[@name="Archiver"]/*[@name="path"]/string arcs //controller/*[@name="post-processors"]/*[@name="ContentSize"]/*[@name="enabled"] true 95 H1 settings successfully migrated to H3 configuration Review your converted crawler-beans.cxml at: /home/svc/frontpages.dir2/crawler-beans.cxml ]0;svc@kb-prod-udv-001:~/heritrix-3.1.0/bin[svc@kb-prod-udv-001 bin]$ exit Script done on Tue 11 Sep 2012 06:12:10 PM CEST