|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectdk.netarkivet.harvester.HarvesterSettings
public class HarvesterSettings
Settings specific to the harvester module of NetarchiveSuite.
Field Summary | |
---|---|
static java.lang.String |
CRAWLER_TIMEOUT_NON_RESPONDING
settings.harvester.harvesting.heritrix.noresponseTimeout: The timeout value (in seconds) used in HeritrixLauncher for aborting crawl when no bytes are being received from web servers. |
static java.lang.String |
DEDUPLICATION_ENABLED
settings.harvester.harvesting.deduplication.enabled: This setting tells the system whether or not to use deduplication. |
static java.lang.String |
DEFAULT_SEEDLIST
settings.harvester.datamodel.domain.defaultSeedlist: Default name of the seedlist to use when new domains are created. |
static java.lang.String |
DOMAIN_CONFIG_MAXBYTES
settings.harvester.datamodel.domain.defaultMaxbytes: Default byte limit for domain configuration. |
static java.lang.String |
DOMAIN_CONFIG_MAXOBJECTS
settings.harvester.datamodel.domain.defaultMaxobjects: Default object limit for domain configuration. |
static java.lang.String |
DOMAIN_CONFIG_MAXRATE
settings.harvester.datamodel.domain.defaultMaxrate: Default download rate for domain configuration. |
static java.lang.String |
DOMAIN_DEFAULT_CONFIG
settings.harvester.datamodel.domain.defaultConfig: The name of a configuration that is created by default and which is initially used for snapshot harvests. |
static java.lang.String |
DOMAIN_DEFAULT_ORDERXML
settings.harvester.datamodel.domain.defaultOrderxml: Name of order xml template used for domains if nothing else is specified. |
static java.lang.String |
ERRORFACTOR_PERMITTED_BESTGUESS
settings.harvester.scheduler.errorFactorBestGuess: Used when calculating expected size of a harvest of some configuration during job-creation process. |
static java.lang.String |
ERRORFACTOR_PERMITTED_PREVRESULT
settings.harvester.scheduler.errorFactorPrevResult: Used when calculating expected size of a harvest of some configuration during job-creation process. |
static java.lang.String |
EXPECTED_AVERAGE_BYTES_PER_OBJECT
settings.harvester.scheduler.expectedAverageBytesPerObject: How many bytes the average object is expected to be on domains where we don't know any better. |
static java.lang.String |
HARVEST_CONTROLLER_OLDJOBSDIR
settings.harvester.harvesting.oldjobsDir: The directory in which data from old jobs is kept after uploading. |
static java.lang.String |
HARVEST_CONTROLLER_PRIORITY
settings.harvester.harvesting.queuePriority: Pool to take jobs from. |
static java.lang.String |
HARVEST_CONTROLLER_SERVERDIR
settings.harvester.harvesting.serverDir: Each job gets a subdir of this dir. |
static java.lang.String |
HARVEST_SERVERDIR_MINSPACE
settings.harvester.harvesting.minSpaceLeft: The minimum amount of free bytes in the serverDir required before accepting any harvest-jobs. |
static java.lang.String |
HERITRIX_ADMIN_NAME
settings.harvester.harvesting.heritrix.adminName: The name used to access the Heritrix GUI. |
static java.lang.String |
HERITRIX_ADMIN_PASSWORD
settings.harvester.harvesting.heritrix.adminPassword: The password used to access the Heritrix GUI. |
static java.lang.String |
HERITRIX_CONTROLLER_CLASS
settings.harvester.harvesting.heritrixControllerClass: The implementation of the HeritrixController interface to be used. |
static java.lang.String |
HERITRIX_GUI_PORT
settings.harvester.harvesting.heritrix.guiPort: Port used to access the Heritrix web user interface. |
static java.lang.String |
HERITRIX_HEAP_SIZE
settings.harvester.harvesting.heritrix.heapSize: The heap size to use for the Heritrix sub-process. |
static java.lang.String |
HERITRIX_JMX_PASSWORD
settings.harvester.harvesting.heritrix.jmxPassword: The password used to connect to Heritrix JMX interface The password must correspond to the value stored in the jmxremote.password file (name defined in setting settings.common.jmx.passwordFile). |
static java.lang.String |
HERITRIX_JMX_PORT
settings.harvester.harvesting.heritrix.jmxPort: The port that Heritrix uses to expose its JMX interface. |
static java.lang.String |
HERITRIX_JMX_USERNAME
settings.harvester.harvesting.heritrix.jmxUsername: The username used to connect to Heritrix JMX interface The username must correspond to the value stored in the jmxremote.password file (name defined in setting settings.common.jmx.passwordFile). |
static java.lang.String |
HERITRIX_JVM_OPTS
settings.harvester.harvesting.heritrix.javaOpts: Additional JVM options for the Heritrix sub-process. |
static java.lang.String |
INACTIVITY_TIMEOUT_IN_SECS
settings.harvester.harvesting.heritrix.inactivityTimeout: The timeout setting for aborting a crawl based on crawler-inactivity. |
static java.lang.String |
JOB_TIMEOUT_TIME
settings.harvester.scheduler.jobtimeouttime: Time before a STARTED job times out and change status to FAILED. |
static java.lang.String |
JOBS_MAX_RELATIVE_SIZE_DIFFERENCE
settings.harvester.scheduler.jobs.maxRelativeSizeDifference: The maximum allowed relative difference in expected number of objects retrieved in a single job definition. |
static java.lang.String |
JOBS_MAX_TOTAL_JOBSIZE
settings.harvester.scheduler.jobs.maxTotalSize: When this limit is exceeded no more configurations may be added to a job. |
static java.lang.String |
JOBS_MIN_ABSOLUTE_SIZE_DIFFERENCE
settings.harvester.scheduler.jobs.minAbsoluteSizeDifference: Size differences for jobs below this threshold are ignored, regardless of the limits for the relative size difference. |
static java.lang.String |
MAX_CONFIGS_PER_JOB_CREATION
settings.harvester.scheduler.configChunkSize: How many domain configurations we will process in one go before making jobs out of them. |
static java.lang.String |
MAX_DOMAIN_SIZE
settings.harvester.scheduler.maxDomainSize: The initial guess of the domain size (number of objects) of an unknown domain. |
static java.lang.String |
METADATA_HERITRIX_FILE_PATTERN
settings.harvester.harvesting.metadata.heritrixFilePattern This setting allows to filter which Heritrix files should be stored in the metadata ARC. |
static java.lang.String |
METADATA_LOG_FILE_PATTERN
settings.harvester.harvesting.metadata.logFilePattern This setting allows to filter which Heritrix log files should be stored in the metadata ARC. |
static java.lang.String |
METADATA_REPORT_FILE_PATTERN
settings.harvester.harvesting.metadata.reportFilePattern This setting allows to filter which Heritrix files that should be stored in the metadata ARC are to be classified as a report. |
static java.lang.String |
SPLIT_BY_OBJECTLIMIT
settings.harvester.scheduler.splitByObjectLimit: By default the byte limit is used as the base criterion for how many domain configurations are put into one harvest job. |
Constructor Summary | |
---|---|
HarvesterSettings()
|
Method Summary |
---|
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static java.lang.String DEFAULT_SEEDLIST
public static java.lang.String DOMAIN_DEFAULT_CONFIG
public static java.lang.String DOMAIN_DEFAULT_ORDERXML
public static java.lang.String DOMAIN_CONFIG_MAXRATE
public static java.lang.String DOMAIN_CONFIG_MAXBYTES
public static java.lang.String DOMAIN_CONFIG_MAXOBJECTS
public static java.lang.String ERRORFACTOR_PERMITTED_PREVRESULT
public static java.lang.String ERRORFACTOR_PERMITTED_BESTGUESS
public static java.lang.String EXPECTED_AVERAGE_BYTES_PER_OBJECT
public static java.lang.String MAX_DOMAIN_SIZE
public static java.lang.String JOBS_MAX_RELATIVE_SIZE_DIFFERENCE
public static java.lang.String JOBS_MIN_ABSOLUTE_SIZE_DIFFERENCE
public static java.lang.String JOBS_MAX_TOTAL_JOBSIZE
public static java.lang.String MAX_CONFIGS_PER_JOB_CREATION
public static java.lang.String SPLIT_BY_OBJECTLIMIT
public static java.lang.String JOB_TIMEOUT_TIME
public static java.lang.String HARVEST_CONTROLLER_SERVERDIR
public static java.lang.String HARVEST_SERVERDIR_MINSPACE
public static java.lang.String HARVEST_CONTROLLER_OLDJOBSDIR
public static java.lang.String HARVEST_CONTROLLER_PRIORITY
public static java.lang.String INACTIVITY_TIMEOUT_IN_SECS
public static java.lang.String CRAWLER_TIMEOUT_NON_RESPONDING
public static java.lang.String HERITRIX_ADMIN_NAME
public static java.lang.String HERITRIX_ADMIN_PASSWORD
public static java.lang.String HERITRIX_GUI_PORT
public static java.lang.String HERITRIX_JMX_PORT
public static java.lang.String HERITRIX_JMX_USERNAME
public static java.lang.String HERITRIX_JMX_PASSWORD
public static java.lang.String HERITRIX_HEAP_SIZE
public static java.lang.String HERITRIX_JVM_OPTS
public static java.lang.String HERITRIX_CONTROLLER_CLASS
public static java.lang.String DEDUPLICATION_ENABLED
public static final java.lang.String METADATA_HERITRIX_FILE_PATTERN
Pattern
,
Constant Field Valuespublic static final java.lang.String METADATA_REPORT_FILE_PATTERN
Pattern
,
Constant Field Valuespublic static final java.lang.String METADATA_LOG_FILE_PATTERN
Pattern
,
Constant Field ValuesConstructor Detail |
---|
public HarvesterSettings()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |