|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectdk.netarkivet.harvester.HarvesterSettings
public class HarvesterSettings
Settings specific to the harvester module of NetarchiveSuite.
Field Summary | |
---|---|
static java.lang.String |
ABORT_IF_CONNECTION_LOST
settings.harvester.harvesting.heritrix.abortIfConnectionLost: Boolean flag. |
static java.lang.String |
ALIAS_TIMEOUT
settings.harvester.aliases.timeout The amount of time in seconds before an alias times out, and needs to be re-evaluated. |
static java.lang.String |
CRAWL_LOOP_WAIT_TIME
settings.harvester.harvesting.heritrix.crawlLoopWaitTime: Time interval in seconds to wait during a crawl loop in the harvest controller. |
static java.lang.String |
CRAWLER_TIMEOUT_NON_RESPONDING
settings.harvester.harvesting.heritrix.noresponseTimeout: The timeout value (in seconds) used in HeritrixLauncher for aborting crawl when no bytes are being received from web servers. |
static java.lang.String |
DEDUPLICATION_ENABLED
settings.harvester.harvesting.deduplication.enabled: This setting tells the system whether or not to use deduplication. |
static java.lang.String |
DEFAULT_SEEDLIST
settings.harvester.datamodel.domain.defaultSeedlist: Default name of the seedlist to use when new domains are created. |
static java.lang.String |
DISREGARD_SEEDURL_INFORMATION_IN_CRAWLLOG
settings.harvester.harvesting.harvestReport.disregardSeedsURLInfo: Should we disregard seedURL-information and thus assign the harvested bytes to the domain of the harvested URL instead of the seed url domain? The default is false; |
static java.lang.String |
DOMAIN_CONFIG_MAXBYTES
settings.harvester.datamodel.domain.defaultMaxbytes: Default byte limit for domain configuration. |
static java.lang.String |
DOMAIN_CONFIG_MAXOBJECTS
settings.harvester.datamodel.domain.defaultMaxobjects: Default object limit for domain configuration. |
static java.lang.String |
DOMAIN_CONFIG_MAXRATE
settings.harvester.datamodel.domain.defaultMaxrate: Default download rate for domain configuration. |
static java.lang.String |
DOMAIN_DEFAULT_CONFIG
settings.harvester.datamodel.domain.defaultConfig: The name of a configuration that is created by default and which is initially used for snapshot harvests. |
static java.lang.String |
DOMAIN_DEFAULT_ORDERXML
settings.harvester.datamodel.domain.defaultOrderxml: Name of order xml template used for domains if nothing else is specified. |
static java.lang.String |
ERRORFACTOR_PERMITTED_BESTGUESS
settings.harvester.scheduler.jobGen.config.errorFactorBestGuess: Used when calculating expected size of a harvest of some configuration during job-creation process. |
static java.lang.String |
ERRORFACTOR_PERMITTED_PREVRESULT
settings.harvester.scheduler.jobGen.config.errorFactorPrevResult: Used when calculating expected size of a harvest of some configuration during job-creation process. |
static java.lang.String |
EXPECTED_AVERAGE_BYTES_PER_OBJECT
settings.harvester.scheduler.jobGen.config.expectedAverageBytesPerObject: How many bytes the average object is expected to be on domains where we don't know any better. |
static java.lang.String |
FRONTIER_REPORT_FILTER_ARGS
settings.harvester.harvesting.frontier.filter.args Defines a frontier report filter's arguments. |
static java.lang.String |
FRONTIER_REPORT_FILTER_CLASS
settings.harvester.harvesting.frontier.filter.class Defines a filter to apply to the full frontier report. |
static java.lang.String |
FRONTIER_REPORT_WAIT_TIME
settings.harvester.harvesting.frontier.frontierReportWaitTime: Time interval in seconds to wait between two requests to generate a full frontier report. |
static java.lang.String |
GENERATE_JOBS_PERIOD
settings.harvester.scheduler.jobgenerationperiode: The period between checking if new jobs should be generated, in seconds. |
static java.lang.String |
HARVEST_CONTROLLER_CHANNEL
settings.harvester.harvesting.channel: Harvest channel to take jobs from. |
static java.lang.String |
HARVEST_CONTROLLER_OLDJOBSDIR
settings.harvester.harvesting.oldjobsDir: The directory in which data from old jobs is kept after uploading. |
static java.lang.String |
HARVEST_CONTROLLER_SERVERDIR
settings.harvester.harvesting.serverDir: Each job gets a subdir of this dir. |
static java.lang.String |
HARVEST_MONITOR_DISPLAYED_HISTORY_SIZE
settings.harvester.monitor.displayedHistorySize: Maximum number of most recent history records displayed on the running job details page. |
static java.lang.String |
HARVEST_MONITOR_HISTORY_CHART_GEN_INTERVAL
settings.harvester.monitor.historyChartGenIntervall: Time interval in seconds between regenerating the chart of historical data for a running job. |
static java.lang.String |
HARVEST_MONITOR_HISTORY_SAMPLE_RATE
settings.harvester.monitor.historySampleRate: Time interval in seconds between historical records stores in the DB. |
static java.lang.String |
HARVEST_MONITOR_REFRESH_INTERVAL
settings.harvester.monitor.refreshInterval: Time interval in seconds after which the harvest monitor pages will be automatically refreshed. |
static java.lang.String |
HARVEST_REPORT_CLASS
settings.harvester.harvesting.harvestReport: The implementation of HarvestReport interface to be used. |
static java.lang.String |
HARVEST_SERVERDIR_MINSPACE
settings.harvester.harvesting.minSpaceLeft: The minimum amount of free bytes in the serverDir required before accepting any harvest-jobs. |
static java.lang.String |
HERITRIX_ADMIN_NAME
settings.harvester.harvesting.heritrix.adminName: The name used to access the Heritrix GUI. |
static java.lang.String |
HERITRIX_ADMIN_PASSWORD
settings.harvester.harvesting.heritrix.adminPassword: The password used to access the Heritrix GUI. |
static java.lang.String |
HERITRIX_ARCHIVE_FORMAT
settings.harvester.harvesting.heritrix.archiveFormat The dataformat used by heritrix to write the harvested data. |
static java.lang.String |
HERITRIX_ARCHIVE_NAMING_CLASS
settings.harvester.harvesting.heritrix.archiveNaming.class The class implementing the chosen way of naming your archive-files default: LegacyNamingConvention. |
static java.lang.String |
HERITRIX_CONTROLLER_CLASS
settings.harvester.harvesting.heritrixControllerClass: The implementation of the HeritrixController interface to be used. |
static java.lang.String |
HERITRIX_GUI_PORT
settings.harvester.harvesting.heritrix.guiPort: Port used to access the Heritrix web user interface. |
static java.lang.String |
HERITRIX_HEAP_SIZE
settings.harvester.harvesting.heritrix.heapSize: The heap size to use for the Heritrix sub-process. |
static java.lang.String |
HERITRIX_JMX_PASSWORD
settings.harvester.harvesting.heritrix.jmxPassword: The password used to connect to Heritrix JMX interface The password must correspond to the value stored in the jmxremote.password file (name defined in setting settings.common.jmx.passwordFile). |
static java.lang.String |
HERITRIX_JMX_PORT
settings.harvester.harvesting.heritrix.jmxPort: The port that Heritrix uses to expose its JMX interface. |
static java.lang.String |
HERITRIX_JMX_USERNAME
settings.harvester.harvesting.heritrix.jmxUsername: The username used to connect to Heritrix JMX interface The username must correspond to the value stored in the jmxremote.password file (name defined in setting settings.common.jmx.passwordFile). |
static java.lang.String |
HERITRIX_JVM_OPTS
settings.harvester.harvesting.heritrix.javaOpts: Additional JVM options for the Heritrix sub-process. |
static java.lang.String |
HERITRIX_LAUNCHER_CLASS
settings.harvester.harvesting.heritrixLauncherClass: The implementation of the HeritrixLauncher abstract class to be used. |
static java.lang.String |
HERITRIX_WARC_PARAMETERS_OVERRIDE
settings.harvester.harvesting.heritrix.warc.writeMetadataOutlinks This paramater define NAS behaviour regarding warc parameters (write request, write metadata, etc.) : if this parameter is true, the warc parameters defined in harvester templates are not considered. |
static java.lang.String |
HERITRIX_WARC_SKIP_IDENTICAL_DIGESTS
settings.harvester.harvesting.heritrix.warc.skipIdenticalDigests Represents the 'skip-identical-digests' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
HERITRIX_WARC_WRITE_METADATA
settings.harvester.harvesting.heritrix.warc.writeMetadata Represents the 'write-metadata' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
HERITRIX_WARC_WRITE_METADATA_OUTLINKS
settings.harvester.harvesting.heritrix.warc.writeMetadataOutlinks Represents the 'write-metadata-outlinks' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
HERITRIX_WARC_WRITE_REQUESTS
settings.harvester.harvesting.heritrix.warc.writeRequests Represents the 'write-requests' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
HERITRIX_WARC_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS
settings.harvester.harvesting.heritrix.warc.writeRevisitForIdenticalDigests Represents the 'write-revisit-for-identical-digests' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
HERITRIX_WARC_WRITE_REVISIT_FOR_NOT_MODIFIED
settings.harvester.harvesting.heritrix.warc.writeRevisitForNotModified Represents the 'write-revisit-for-not-modified' setting in the Heritrix WARCWriterProcessor. |
static java.lang.String |
INACTIVITY_TIMEOUT_IN_SECS
settings.harvester.harvesting.heritrix.inactivityTimeout: The timeout setting for aborting a crawl based on crawler-inactivity. |
static java.lang.String |
INDEXREQUEST_SERVER_CLASS
settings.harvester.indexserver.indexrequestserver.class: Setting for which type of indexrequestserver to use. |
static java.lang.String |
INDEXSERVER_INDEXING_CHECKINTERVAL
settings.harvester.indexserver.checkinterval: Setting for the time in milliseconds between each check of the state of sub-indexing. |
static java.lang.String |
INDEXSERVER_INDEXING_LISTENING_INTERVAL
settings.harvester.indexserver.listeningcheckinterval: Setting for the interval between each listening check in milliseconds. |
static java.lang.String |
INDEXSERVER_INDEXING_LOOKFORDATAINOTHERBITARCHIVEREPLICAS
b>settings.harvester.indexserver.lookfordataInAllBitarchiveReplicas: Setting for whether or not data not found in the default bitarchive replica shall be looked for in other bitarchive replicas. |
static java.lang.String |
INDEXSERVER_INDEXING_MAX_SEGMENTS
settings.harvester.indexserver.maxsegments: Setting for how many segments we will accept in our lucene indices. |
static java.lang.String |
INDEXSERVER_INDEXING_MAXCLIENTS
settings.harvester.indexserver.maxclients: Setting for the max number of clients the indexserver can handle simultaneously. |
static java.lang.String |
INDEXSERVER_INDEXING_MAXTHREADS
settings.harvester.indexserver.maxthreads: Setting for the max number of threads the deduplication indexer shall use. |
static java.lang.String |
INDEXSERVER_INDEXING_REQUESTDIR
settings.harvester.indexserver.requestdir: Setting for where the requests of the indexserver are stored. |
static java.lang.String |
INDEXSERVER_INDEXING_SATISFACTORYTHRESHOLD_PERCENTAGE
settings.archive.indexserver.satisfactorythresholdpercentage: Setting for the satisfactory threshold of the indexing result as a percentage. |
static java.lang.String |
INDEXSERVER_INDEXING_TIMEOUT
settings.harvester.indexserver.indexingtimeout: Setting for the indexing timeout in milliseconds. |
static java.lang.String |
JOB_TIMEOUT_TIME
settings.harvester.scheduler.jobtimeouttime: Time before a STARTED job times out and change status to FAILED. |
static java.lang.String |
JOBGEN_CLASS
settings.harvester.scheduler.jobGen.class: The fully qualified class name of the chosen job generator implementation, currently either DefaultJobGenerator
or FixedDomainConfigurationCountJobGenerator . |
static java.lang.String |
JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE
settings.harvester.scheduler.jobGen.domainConfigSubsetSize: How many domain configurations we will process in one go before making jobs out of them. |
static java.lang.String |
JOBGEN_FIXED_CONFIG_COUNT_EXCLUDE_ZERO_BUDGET
settings.harvester.scheduler.jobGen.config.excludeDomainsWithZeroBudget: If the job generator is FixedDomainConfigurationCountJobGenerator ,
then this parameter toggles whether or not domain configurations with a budget of zero
(byte or objects) should be excluded from jobs. |
static java.lang.String |
JOBGEN_FIXED_CONFIG_COUNT_FOCUSED
settings.harvester.scheduler.jobGen.config.fixedDomainCountFocused: If the job generator is FixedDomainConfigurationCountJobGenerator ,
then this parameter represents the maximum number of domain configurations
in a partial harvest job. |
static java.lang.String |
JOBGEN_FIXED_CONFIG_COUNT_SNAPSHOT
settings.harvester.scheduler.jobGen.config.fixedDomainCountSnapshot: If the job generator is FixedDomainConfigurationCountJobGenerator ,
then this parameter represents the maximum number of domain configurations
in a full harvest job. |
static java.lang.String |
JOBGEN_POSTPONE_UNREGISTERED_HARVEST_CHANNEL
settings.harvester.scheduler.jobGen.config.postponeUnregisteredChannel: If this property is true, then the job generator will postpone job generation for harvest definitions that are mapped to a harvest channel not registered to at least one harvester. |
static java.lang.String |
JOBS_MAX_RELATIVE_SIZE_DIFFERENCE
settings.harvester.scheduler.jobGen.config.maxRelativeSizeDifference: The maximum allowed relative difference in expected number of objects retrieved in a single job definition. |
static java.lang.String |
JOBS_MAX_TIME_TO_COMPLETE
settings.harvester.scheduler.jobGen.maxTimeToCompleteJob: The limit on how many seconds Heritrix should continue on each job. |
static java.lang.String |
JOBS_MAX_TOTAL_JOBSIZE
settings.harvester.scheduler.jobGen.config.maxTotalSize: When this limit is exceeded no more configurations may be added to a job. |
static java.lang.String |
JOBS_MIN_ABSOLUTE_SIZE_DIFFERENCE
settings.harvester.scheduler.jobGen.config.minAbsoluteSizeDifference: Size differences for jobs below this threshold are ignored, regardless of the limits for the relative size difference. |
static java.lang.String |
MAX_CRAWLLOG_IN_BROWSER
The maximum length (in lines) of crawllog to be displayed in a browser window. |
static java.lang.String |
MAX_DOMAIN_SIZE
settings.harvester.scheduler.jobGen.config.maxDomainSize: The initial guess of the domain size (number of objects) of an unknown domain. |
static java.lang.String |
MAXIMUM_OBJECT_IN_BROWSER
settings.viewerproxy.maxSizeInBrowser The size (in bytes) of the largest object to be returned for viewing in the browser window. |
static java.lang.String |
METADATA_ARCHIVE_FILES_REPORT_HEADER
settings.harvester.harvesting.metadata.archiveFilesReportName If METADATA_GENERATE_ARCHIVE_FILES_REPORT is set to true, sets the header of the
generated report file. |
static java.lang.String |
METADATA_ARCHIVE_FILES_REPORT_NAME
settings.harvester.harvesting.metadata.archiveFilesReportName If METADATA_GENERATE_ARCHIVE_FILES_REPORT is set to true, sets the name of the
generated report file. |
static java.lang.String |
METADATA_FILENAME_FORMAT
settings.harvester.harvesting.metadata.metadataFileNameFormat The format of the name of the metadata file : By default, it will be jobID-metadata.1.extension for example 3161-metadata-1.warc If the value is "prefix", it will be named like a warc file : Prefix-61-3161-metadata-1.warc If a prefix is defined, the name of the metadata file will default: default (alternative: prefix) |
static java.lang.String |
METADATA_FORMAT
settings.harvester.harvesting.metadata.metadataFormat The dataformat used by Netarchivesuite to write the metadata associated with a given harvest job. |
static java.lang.String |
METADATA_GENERATE_ARCHIVE_FILES_REPORT
settings.harvester.harvesting.metadata.generateArchiveFilesReport This setting is a boolean flag that enables/disables the generation of an ARC/WARC files report. |
static java.lang.String |
METADATA_HERITRIX_FILE_PATTERN
settings.harvester.harvesting.metadata.heritrixFilePattern This setting allows to filter which Heritrix files should be stored in the metadata ARC. |
static java.lang.String |
METADATA_LOG_FILE_PATTERN
settings.harvester.harvesting.metadata.logFilePattern This setting allows to filter which Heritrix log files should be stored in the metadata ARC. |
static java.lang.String |
METADATA_REPORT_FILE_PATTERN
settings.harvester.harvesting.metadata.reportFilePattern This setting allows to filter which Heritrix files that should be stored in the metadata ARC are to be classified as a report. |
static java.lang.String |
OBJECT_LIMIT_SET_BY_QUOTA_ENFORCER
settings.harvester.scheduler.jobGen.objectLimitIsSetByQuotaEnforcer: Controls whether the domain configuration object limit should be set in Heritrix's crawl order through the QuotaEnforcer configuration (parameter set to true) or through the frontier parameter 'queue-total-budget' ( parameter set to false). |
static java.lang.String |
PERFORMER
settings.harvester.performer: The agent performing these harvests. |
static java.lang.String |
RECOVERlOG_CONTINUATION_ENABLED
settings.harvester.harvesting.continuationFromHeritrixRecoverlogEnabled: Setting for whether or not a restarted job should try fetching the recoverlog of the previous failed job, and ask Heritrix to continue from this log. |
static java.lang.String |
SEND_READY_DELAY
settings.harvester.harvesting.sendReadyDelay: Time in milliseconds to wait from starting to listen on the job queue to a potential ready message is sent to the HarvestJobManager. |
static java.lang.String |
SEND_READY_INTERVAL
settings.harvester.harvesting.sendReadyInterval: Time interval in seconds to wait before transmitting a HarvesterReadyMessage to the JobDispatcher . |
static java.lang.String |
SPLIT_BY_OBJECTLIMIT
settings.harvester.scheduler.jobGen.config.splitByObjectLimit: By default the byte limit is used as the base criterion for how many domain configurations are put into one harvest job. |
static java.lang.String |
TRY_LOOKUP_URI_AS_FTP
settings.viewerproxy.tryLookupUriAsFtp: If we fail to lookup an URI, we will try changing the protocol to ftp, if this setting is set to true. |
static java.lang.String |
VALID_SEED_REGEX
settings.harvester.datamodel.domain.validSeedRegex: Regular expression used to validate a seed within a seedlist. |
static java.lang.String |
VIEWERPROXY_DIR
settings.viewerproxy.baseDir: The main directory for the ViewerProxy, used for storing the Lucene index for the jobs being viewed. |
static java.lang.String |
WAIT_FOR_REPORT_GENERATION_TIMEOUT
settings.harvester.harvesting.heritrix.waitForReportGenerationTimeout: Maximum time in seconds to wait for Heritrix to generate report files once crawling is over. |
Constructor Summary | |
---|---|
HarvesterSettings()
|
Method Summary |
---|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static java.lang.String DEFAULT_SEEDLIST
public static java.lang.String VALID_SEED_REGEX
public static java.lang.String DOMAIN_DEFAULT_CONFIG
public static java.lang.String DOMAIN_DEFAULT_ORDERXML
public static java.lang.String DOMAIN_CONFIG_MAXRATE
public static java.lang.String DOMAIN_CONFIG_MAXBYTES
public static java.lang.String DOMAIN_CONFIG_MAXOBJECTS
public static java.lang.String ERRORFACTOR_PERMITTED_PREVRESULT
public static java.lang.String ERRORFACTOR_PERMITTED_BESTGUESS
public static java.lang.String EXPECTED_AVERAGE_BYTES_PER_OBJECT
public static java.lang.String MAX_DOMAIN_SIZE
public static java.lang.String JOBS_MAX_RELATIVE_SIZE_DIFFERENCE
public static java.lang.String JOBS_MIN_ABSOLUTE_SIZE_DIFFERENCE
public static java.lang.String JOBS_MAX_TOTAL_JOBSIZE
public static java.lang.String JOBS_MAX_TIME_TO_COMPLETE
public static java.lang.String JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE
public static java.lang.String JOBGEN_FIXED_CONFIG_COUNT_FOCUSED
FixedDomainConfigurationCountJobGenerator
,
then this parameter represents the maximum number of domain configurations
in a partial harvest job.
public static java.lang.String JOBGEN_FIXED_CONFIG_COUNT_SNAPSHOT
FixedDomainConfigurationCountJobGenerator
,
then this parameter represents the maximum number of domain configurations
in a full harvest job.
public static java.lang.String JOBGEN_FIXED_CONFIG_COUNT_EXCLUDE_ZERO_BUDGET
FixedDomainConfigurationCountJobGenerator
,
then this parameter toggles whether or not domain configurations with a budget of zero
(byte or objects) should be excluded from jobs.
The default value is 'false'.
public static java.lang.String JOBGEN_POSTPONE_UNREGISTERED_HARVEST_CHANNEL
public static java.lang.String JOBGEN_CLASS
DefaultJobGenerator
or FixedDomainConfigurationCountJobGenerator
.
The default is DefaultJobGenerator
.
public static java.lang.String SPLIT_BY_OBJECTLIMIT
public static java.lang.String OBJECT_LIMIT_SET_BY_QUOTA_ENFORCER
public static java.lang.String JOB_TIMEOUT_TIME
public static java.lang.String GENERATE_JOBS_PERIOD
public static java.lang.String HARVEST_CONTROLLER_SERVERDIR
public static java.lang.String HARVEST_SERVERDIR_MINSPACE
public static java.lang.String HARVEST_CONTROLLER_OLDJOBSDIR
public static java.lang.String HARVEST_CONTROLLER_CHANNEL
NOTE: this one is also used in SingleMBeanObject parsing information to
System state
public static java.lang.String INACTIVITY_TIMEOUT_IN_SECS
public static java.lang.String CRAWLER_TIMEOUT_NON_RESPONDING
public static java.lang.String HARVEST_MONITOR_REFRESH_INTERVAL
public static java.lang.String HARVEST_MONITOR_HISTORY_SAMPLE_RATE
public static java.lang.String HARVEST_MONITOR_HISTORY_CHART_GEN_INTERVAL
public static java.lang.String HARVEST_MONITOR_DISPLAYED_HISTORY_SIZE
public static java.lang.String CRAWL_LOOP_WAIT_TIME
public static java.lang.String SEND_READY_INTERVAL
HarvesterReadyMessage
to the JobDispatcher
.Lower values will make the JobDispatcher detect ready harvester faster, but will make it more likely that the harvester may send two ready messages before a job is received, causing the JobDispatcher to dispatch two jobs. Default value is 30 second.
public static java.lang.String SEND_READY_DELAY
public static java.lang.String FRONTIER_REPORT_WAIT_TIME
public static java.lang.String FRONTIER_REPORT_FILTER_CLASS
TopTotalEnqueuesFilter
public static java.lang.String FRONTIER_REPORT_FILTER_ARGS
public static java.lang.String ABORT_IF_CONNECTION_LOST
BnfHeritrixController
public static java.lang.String WAIT_FOR_REPORT_GENERATION_TIMEOUT
public static java.lang.String HERITRIX_ADMIN_NAME
public static java.lang.String HERITRIX_ADMIN_PASSWORD
public static java.lang.String HERITRIX_GUI_PORT
public static java.lang.String HERITRIX_JMX_PORT
public static java.lang.String HERITRIX_JMX_USERNAME
public static java.lang.String HERITRIX_JMX_PASSWORD
public static java.lang.String HERITRIX_HEAP_SIZE
public static java.lang.String HERITRIX_JVM_OPTS
public static java.lang.String HERITRIX_CONTROLLER_CLASS
public static java.lang.String HERITRIX_LAUNCHER_CLASS
public static java.lang.String HARVEST_REPORT_CLASS
HarvestReport
interface to be used.
public static java.lang.String DISREGARD_SEEDURL_INFORMATION_IN_CRAWLLOG
public static java.lang.String DEDUPLICATION_ENABLED
public static java.lang.String METADATA_HERITRIX_FILE_PATTERN
Pattern
public static java.lang.String METADATA_REPORT_FILE_PATTERN
Pattern
public static java.lang.String METADATA_LOG_FILE_PATTERN
Pattern
public static java.lang.String METADATA_GENERATE_ARCHIVE_FILES_REPORT
HarvestDocumentation.documentHarvest(dk.netarkivet.harvester.harvesting.IngestableFiles)
public static java.lang.String METADATA_ARCHIVE_FILES_REPORT_NAME
METADATA_GENERATE_ARCHIVE_FILES_REPORT
is set to true, sets the name of the
generated report file.
Default value is 'archivefiles-report.txt'.
HarvestDocumentation.documentHarvest(dk.netarkivet.harvester.harvesting.IngestableFiles)
public static java.lang.String METADATA_ARCHIVE_FILES_REPORT_HEADER
METADATA_GENERATE_ARCHIVE_FILES_REPORT
is set to true, sets the header of the
generated report file. This setting should generally be left to its default value,
which is '[ARCHIVEFILE] [Opened] [Closed] [Size]'.
HarvestDocumentation.documentHarvest(dk.netarkivet.harvester.harvesting.IngestableFiles)
public static java.lang.String ALIAS_TIMEOUT
public static java.lang.String RECOVERlOG_CONTINUATION_ENABLED
public static java.lang.String METADATA_FORMAT
public static java.lang.String METADATA_FILENAME_FORMAT
public static java.lang.String HERITRIX_ARCHIVE_FORMAT
public static java.lang.String HERITRIX_ARCHIVE_NAMING_CLASS
public static java.lang.String HERITRIX_WARC_PARAMETERS_OVERRIDE
public static java.lang.String HERITRIX_WARC_SKIP_IDENTICAL_DIGESTS
public static java.lang.String HERITRIX_WARC_WRITE_REQUESTS
public static java.lang.String HERITRIX_WARC_WRITE_METADATA
public static java.lang.String HERITRIX_WARC_WRITE_METADATA_OUTLINKS
public static java.lang.String HERITRIX_WARC_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS
public static java.lang.String HERITRIX_WARC_WRITE_REVISIT_FOR_NOT_MODIFIED
public static java.lang.String PERFORMER
public static java.lang.String INDEXSERVER_INDEXING_REQUESTDIR
public static java.lang.String INDEXSERVER_INDEXING_MAXCLIENTS
public static java.lang.String INDEXSERVER_INDEXING_MAXTHREADS
public static java.lang.String INDEXSERVER_INDEXING_CHECKINTERVAL
public static java.lang.String INDEXSERVER_INDEXING_TIMEOUT
public static java.lang.String INDEXSERVER_INDEXING_MAX_SEGMENTS
public static java.lang.String INDEXSERVER_INDEXING_LISTENING_INTERVAL
public static java.lang.String INDEXSERVER_INDEXING_SATISFACTORYTHRESHOLD_PERCENTAGE
public static java.lang.String INDEXREQUEST_SERVER_CLASS
IndexRequestServer
public static java.lang.String INDEXSERVER_INDEXING_LOOKFORDATAINOTHERBITARCHIVEREPLICAS
public static java.lang.String VIEWERPROXY_DIR
public static java.lang.String TRY_LOOKUP_URI_AS_FTP
public static java.lang.String MAXIMUM_OBJECT_IN_BROWSER
public static java.lang.String MAX_CRAWLLOG_IN_BROWSER
Constructor Detail |
---|
public HarvesterSettings()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |