# This Properties map is specified in the Java 'property list' text format # http://java.sun.com/javase/6/docs/api/java/util/Properties.html#load%28java.io.Reader%29 metadata.jobName=frontpages metadata.description=Special template harvesting only seeds. metadata.operator=Admin loggerModule.path=logs crawlController.checkpointsDir=checkpoints bdb.dir=state crawlController.scratchDir=scratch crawlLimiter.maxBytesDownload=0 crawlLimiter.maxDocumentsDownload=0 crawlLimiter.maxTimeSeconds=0 crawlController.maxToeThreads=100 crawlController.recorderOutBufferBytes=4096 crawlController.recorderInBufferBytes=65536 bdb.cachePercent=0 seeds.textSource.path=seeds.txt scope.rules[2].maxHops=0 scope.rules[6].maxRepetitions=3 scope.rules[3].maxTransHops=3 scope.rules[3].maxSpeculativeHops=1 metadata.operatorContactUrl=http://my_website.com/my_infopage.html metadata.userAgentTemplate=Mozilla/5.0 (compatible; heritrix/1.12.1 +@OPERATOR_CONTACT_URL@) metadata.operatorFrom=my_email@my_website.com metadata.robotsHonoringPolicy.type=IGNORE metadata.robotsHonoringPolicy.masquerade=false disposition.delayFactor=1.0 disposition.maxDelayMs=1000 disposition.minDelayMs=300 frontier.maxRetries=5 frontier.retryDelaySeconds=300 preparer.preferenceEmbedHops=1 disposition.maxPerHostBandwidthUsageKbSec=500 crawlController.pauseAtStart=false crawlController.pauseAtFinish=false seeds.sourceTagSeeds=false frontier.recoveryLogEnabled=false frontier.holdQueues=true frontier.balanceReplenishAmount=3000 frontier.errorPenaltyAmount=100 frontier.queueTotalBudget=-1 frontier.snoozeLongMs=300000 frontier.dumpPendingAtClose=false preparer.uriCanonicalizationPolicy.rules[0].enabled=true preparer.uriCanonicalizationPolicy.rules[1].enabled=true preparer.uriCanonicalizationPolicy.rules[3].enabled=true preparer.uriCanonicalizationPolicy.rules[5].enabled=true preselector.enabled=true preselector.logToFile=false preselector.recheckScope=true preselector.blockAll=false preconditions.enabled=true preconditions.ipValidityDurationSeconds=21600 preconditions.robotsValidityDurationSeconds=86400 preconditions.calculateRobotsOnly=false fetchDns.enabled=true fetchDns.acceptNonDnsResolves=false fetchDns.digestContent=true fetchDns.digestAlgorithm=sha1 fetchHttp.enabled=true fetchHttp.timeoutSeconds=300 fetchHttp.soTimeoutMs=20000 fetchHttp.maxFetchKBSec=0 fetchHttp.maxLengthBytes=0 fetchHttp.ignoreCookies=false fetchHttp.sslTrustLevel=OPEN fetchHttp.defaultEncoding=ISO-8859-1 fetchHttp.digestContent=true fetchHttp.digestAlgorithm=sha1 fetchHttp.sendIfModifiedSince=true fetchHttp.sendIfNoneMatch=true fetchHttp.sendConnectionClose=true fetchHttp.sendReferer=true fetchHttp.sendRange=false extractorHttp.enabled=true extractorHtml.enabled=true extractorHtml.extractJavascript=true extractorHtml.treatFramesAsEmbedLinks=true extractorHtml.ignoreFormActionUrls=true extractorHtml.extractValueAttributes=true extractorHtml.ignoreUnexpectedHtml=true extractorCss.enabled=true extractorJs.enabled=true extractorSwf.enabled=true arcWriter.enabled=true arcWriter.compress=false arcWriter.prefix=IAH arcWriter.suffix=${HOSTNAME} arcWriter.maxFileSizeBytes=100000000 arcWriter.poolMaxActive=1 arcWriter.poolMaxWaitMs=300000 arcWriter.maxTotalBytesToWrite=0 arcWriter.skipIdenticalDigests=false candidates.seedsRedirectNewSeeds=true preparer.preferenceDepthHops=-1 statisticsTracker.intervalSeconds=20 bdb.checkpointCopyLogs=true