|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectdk.netarkivet.harvester.datamodel.Job
public class Job
This class represents one job to run by Heritrix. It's based on a number of configurations all based on the same order.xml and at most one configuration for each domain. Each job consists of configurations of the approximate same size; that is the difference in expectation from the smallest configuration to the largest configuration is within a factor of each other defined as limMaxRelSize (although differences smaller than limMinAbsSize are ignored) There is a limit limMaxTotalSize on the total size of the job in objects. A job may also be limited on bytes or objects, defined either by the configurations in the job or the harvest definition the job is generated by. The job contains the order file, the seedlist and the current status of the job, as well as the ID of the harvest definition that defined it and names of all the configurations it is based on.
Field Summary | |
---|---|
(package private) boolean |
configsChanged
A hint to the DAO that configurations have changed. |
Constructor Summary | |
---|---|
Job(java.lang.Long harvestID,
DomainConfiguration cfg,
HarvestChannel channel,
long forceMaxObjectsPerDomain,
long forceMaxBytesPerDomain,
long forceMaxJobRunningTime,
int harvestNum)
Package private constructor for common initialisation. |
|
Job(java.lang.Long harvestID,
java.util.Map<java.lang.String,java.lang.String> configurations,
java.lang.String channel,
boolean snapshot,
long forceMaxObjectsPerDomain,
long forceMaxBytesPerDomain,
long forceMaxJobRunningTime,
JobStatus status,
java.lang.String orderXMLname,
org.dom4j.Document orderXMLdoc,
java.lang.String seedlist,
int harvestNum,
java.lang.Long continuationOf)
Create a new Job object from basic information storable in the DAO. |
Method Summary | |
---|---|
void |
addConfiguration(DomainConfiguration cfg)
Adds a configuration to this Job. |
void |
appendHarvestErrorDetails(java.lang.String harvestErrorDetails)
Append to the list of harvest error details for this job. |
void |
appendHarvestErrors(java.lang.String harvestErrors)
Append to the list of harvest errors for this job. |
void |
appendUploadErrorDetails(java.lang.String uploadErrorDetails)
Append to the list of upload error details. |
void |
appendUploadErrors(java.lang.String uploadErrors)
Append to the list of upload errors. |
static Job |
createJob(java.lang.Long harvestID,
HarvestChannel channel,
DomainConfiguration cfg,
int harvestNum)
Create new Job configured according to the properties of the supplied DomainConfiguration. |
static Job |
createSnapShotJob(java.lang.Long harvestID,
HarvestChannel channel,
DomainConfiguration cfg,
long maxObjectsPerDomain,
long maxBytesPerDomain,
long maxJobRunningTime,
int harvestNum)
Create new instance of Job suitable for snapshot harvesting. |
java.util.Date |
getActualStart()
Get the actual time when this job was started. |
java.util.Date |
getActualStop()
Get the actual time when this job was stopped/completed. |
java.lang.String |
getChannel()
|
java.lang.Long |
getContinuationOf()
|
int |
getCountDomains()
Get's the total number of different domains harvested by this job. |
java.util.Date |
getCreationDate()
Get the time when this job was created. |
java.util.Map<java.lang.String,java.lang.String> |
getDomainConfigurationMap()
Returns a map of domain names and name of their corresponding configuration. |
(package private) long |
getEdition()
Get the edition number. |
long |
getForceMaxBytesPerDomain()
|
long |
getForceMaxObjectsPerDomain()
|
java.lang.String |
getHarvestAudience()
|
java.lang.String |
getHarvestErrorDetails()
Get the list of harvest error details for this job. |
java.lang.String |
getHarvestErrors()
Get the list of harvest errors for this job. |
java.lang.String |
getHarvestFilenamePrefix()
Get the harvestFilename prefix. |
int |
getHarvestNum()
Get the harvestNum for this job. |
java.util.List<AliasInfo> |
getJobAliasInfo()
Get a list of AliasInfo objects for all the domains included in the job. |
java.lang.Long |
getJobID()
Get the id of this Job. |
long |
getMaxBytesPerDomain()
Gets the maximum number of bytes harvested per domain. |
long |
getMaxCountObjects()
|
long |
getMaxJobRunningTime()
|
long |
getMaxObjectsPerDomain()
Gets the maximum number of objects harvested per domain. |
long |
getMinCountObjects()
|
org.dom4j.Document |
getOrderXMLdoc()
Gets a document representation of the order.xml associated with this Job. |
java.lang.String |
getOrderXMLName()
Get the name of the order XML file used by this Job. |
java.lang.Long |
getOrigHarvestDefinitionID()
Get the id of the HarvestDefinition from which this job originates. |
java.lang.Long |
getResubmittedAsJob()
Get the ID for the job which this job was resubmitted as. |
java.lang.String |
getSeedListAsString()
Get the seedlist as a String. |
org.dom4j.Document[] |
getSettingsXMLdocs()
Gets a list of document representations of the settings.xml's associated with this Job. |
java.io.File[] |
getSettingsXMLfiles()
Get a list of Heritrix settings.xml files. |
java.util.List<java.lang.String> |
getSortedSeedList()
Returns a list of sorted seeds for this job. |
JobStatus |
getStatus()
Get the current status of this Job. |
java.util.Date |
getSubmittedDate()
Get the time when this job was submitted. |
long |
getTotalCountObjects()
|
java.lang.String |
getUploadErrorDetails()
Get the list of upload error details. |
java.lang.String |
getUploadErrors()
Get the list of upload errors. |
boolean |
isConfigurationSetsByteLimit()
|
boolean |
isConfigurationSetsObjectLimit()
|
boolean |
isSnapshot()
|
void |
setActualStart(java.util.Date actualStart)
Set the actual time when this job was started. |
void |
setActualStop(java.util.Date actualStop)
Set the actual time when this job was stopped/completed. |
void |
setChannel(java.lang.String channel)
Sets the associated HarvestChannel name. |
void |
setCreationDate(java.util.Date creationDate)
Set the Date for when this job was created. |
(package private) void |
setDefaultHarvestNamePrefix()
|
(package private) void |
setEdition(long edition)
Set the edition number. |
void |
setHarvestAudience(java.lang.String theAudience)
Set the harvest audience for this job. |
void |
setHarvestFilenamePrefix(java.lang.String prefix)
|
void |
setHarvestNum(int harvestNum)
Set the harvestNum for this job. |
void |
setJobID(java.lang.Long id)
Set the id of this Job. |
void |
setOrderXMLDoc(org.dom4j.Document doc)
Set the orderxml for this job. |
void |
setResubmittedAsJob(java.lang.Long resubmittedAsJob)
Set the ID for the job which this job was resubmitted as. |
void |
setSeedList(java.lang.String seedList)
Set the seedlist of the job from the seedList argument. |
void |
setSnapshot(boolean isSnapshot)
Sets whether job belongs to a snapshot or focused harvest. |
void |
setStatus(JobStatus newStatus)
Sets status of this job. |
void |
setSubmittedDate(java.util.Date submittedDate)
Set the Date for when this job was submitted. |
java.lang.String |
toString()
toString method for the Job class. |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
boolean configsChanged
Constructor Detail |
---|
Job(java.lang.Long harvestID, DomainConfiguration cfg, HarvestChannel channel, long forceMaxObjectsPerDomain, long forceMaxBytesPerDomain, long forceMaxJobRunningTime, int harvestNum) throws ArgumentNotValid
harvestID
- the id of the harvestdefinitioncfg
- the configuration to base the Job onchannel
- the channel on which the job will be submitted.forceMaxObjectsPerDomain
- the maximum number of objects harvested
from a domain, overrides individual
configuration settings.
-1 means no limitforceMaxBytesPerDomain
- The maximum number of objects harvested
from a domain, or -1 for no limit.forceMaxJobRunningTime
- The max time in seconds given to the
harvester for this jobharvestNum
- the run number of the harvest definition
ArgumentNotValid
- if cfg or priority is null or harvestID is
invalid, or if any limit < -1
UnknownID
- If the priority is invalid.Job(java.lang.Long harvestID, java.util.Map<java.lang.String,java.lang.String> configurations, java.lang.String channel, boolean snapshot, long forceMaxObjectsPerDomain, long forceMaxBytesPerDomain, long forceMaxJobRunningTime, JobStatus status, java.lang.String orderXMLname, org.dom4j.Document orderXMLdoc, java.lang.String seedlist, int harvestNum, java.lang.Long continuationOf)
harvestID
- the id of the harvestdefinitionconfigurations
- the configurations to base the Job onchannel
- the name of the channel on which the job will be submitted.snapshot
- whether the job belongs to a snapshot harvestforceMaxObjectsPerDomain
- the maximum number of objects harvested
from a domain, overrides individual
configuration settings. 0 means no limit.forceMaxBytesPerDomain
- The maximum number of objects harvested
from a domain, or -1 for no limit.forceMaxJobRunningTime
- The max time in seconds given to the
harvester for this jobstatus
- the current status of the job.orderXMLname
- the name of the order template used.orderXMLdoc
- the (possibly modified) templateseedlist
- the combined seedlist from all configs.harvestNum
- the run number of the harvest definitionMethod Detail |
---|
public static Job createJob(java.lang.Long harvestID, HarvestChannel channel, DomainConfiguration cfg, int harvestNum)
harvestID
- the id of the harvestdefinitionchannel
- the HarvestChannel
cfg
- the configuration to base the Job onharvestNum
- Which run of the harvest definition this is.
ArgumentNotValid
- if cfg is null or harvestID is invalidpublic static Job createSnapShotJob(java.lang.Long harvestID, HarvestChannel channel, DomainConfiguration cfg, long maxObjectsPerDomain, long maxBytesPerDomain, long maxJobRunningTime, int harvestNum) throws ArgumentNotValid
harvestID
- the id of the harvestdefinitionchannel
- the channel for the jobcfg
- the configuration to base the Job onmaxObjectsPerDomain
- the maximum number of objects to harvest from
a domain, overrides individual configuration
settings unless the domain has overrideLimits
set. 0 means no limit.maxBytesPerDomain
- the maximum number of bytes to harvest from a
domain, overrides individual configuration
settings unless the domain has overrideLimits
set. -1 means no limit.maxJobRunningTime
- The maximum of seconds which the harvest can
spend on the harvest. 0 means no limit.harvestNum
- Which run of the harvest definition this is
(should always be 1).
ArgumentNotValid
- if cfg is null or harvestID is invalidpublic void addConfiguration(DomainConfiguration cfg)
cfg
- the configuration to add
ArgumentNotValid
- if cfg is null or cfg uses a
different orderxml than this job
or if this job already contains a configuration
associated with domain of configuration cfg.public java.lang.String getOrderXMLName()
public java.util.Date getActualStop()
public java.util.Date getActualStart()
public java.util.Date getSubmittedDate()
public java.util.Date getCreationDate()
Date
public java.io.File[] getSettingsXMLfiles()
public java.lang.Long getOrigHarvestDefinitionID()
getOrigHarvestDefinitionID
in interface JobInfo
public java.lang.Long getJobID()
getJobID
in interface JobInfo
public void setJobID(java.lang.Long id)
id
- The Id for this job.public int getCountDomains()
public void setActualStart(java.util.Date actualStart)
actualStart
- A Date object representing the time
when this job was started.public void setActualStop(java.util.Date actualStop)
actualStop
- A Date object representing the time
when this job was stopped.public void setOrderXMLDoc(org.dom4j.Document doc)
doc
- A orderxml to be used by this jobpublic org.dom4j.Document getOrderXMLdoc()
public org.dom4j.Document[] getSettingsXMLdocs()
public java.util.List<java.lang.String> getSortedSeedList()
public void setSeedList(java.lang.String seedList)
seedList
- List of seeds as one Stringpublic java.lang.String getSeedListAsString()
public JobStatus getStatus()
public void setStatus(JobStatus newStatus)
newStatus
- Must be one of the values STATUS_NEW, ..., STATUS_FAILED
ArgumentNotValid
- in case of invalid status argument or invalid status changepublic java.util.Map<java.lang.String,java.lang.String> getDomainConfigurationMap()
public long getMaxObjectsPerDomain()
public long getMaxBytesPerDomain()
long getEdition()
void setEdition(long edition)
edition
- the new edition numberpublic java.lang.String getChannel()
HarvestChannel
name.public void setChannel(java.lang.String channel)
HarvestChannel
name.
channel
- the channel namepublic boolean isSnapshot()
public void setSnapshot(boolean isSnapshot)
isSnapshot
- true if the job belongs to a snapshot harvest,
false if it belongs to a focused harvest.public java.lang.String toString()
toString
in class java.lang.Object
Object.toString()
public long getForceMaxObjectsPerDomain()
public long getMaxJobRunningTime()
public int getHarvestNum()
public void setHarvestNum(int harvestNum)
harvestNum
- a given harvestNumpublic java.lang.String getHarvestErrors()
public void appendHarvestErrors(java.lang.String harvestErrors)
harvestErrors
- a string containing harvest errors (may be null)public java.lang.String getHarvestErrorDetails()
public void appendHarvestErrorDetails(java.lang.String harvestErrorDetails)
harvestErrorDetails
- a string containing harvest error details.public java.lang.String getUploadErrors()
public void appendUploadErrors(java.lang.String uploadErrors)
uploadErrors
- a string containing upload errors.public java.lang.String getUploadErrorDetails()
public void appendUploadErrorDetails(java.lang.String uploadErrorDetails)
uploadErrorDetails
- a string containing upload error details.public java.util.List<AliasInfo> getJobAliasInfo()
public java.lang.Long getResubmittedAsJob()
public void setSubmittedDate(java.util.Date submittedDate)
submittedDate
- The date when this was submittedpublic void setCreationDate(java.util.Date creationDate)
creationDate
- The date when this was createdpublic void setResubmittedAsJob(java.lang.Long resubmittedAsJob)
resubmittedAsJob
- An Id for a new job.public java.lang.Long getContinuationOf()
public java.lang.String getHarvestFilenamePrefix()
JobInfo
getHarvestFilenamePrefix
in interface JobInfo
public void setHarvestFilenamePrefix(java.lang.String prefix)
prefix
- public long getForceMaxBytesPerDomain()
public boolean isConfigurationSetsObjectLimit()
public boolean isConfigurationSetsByteLimit()
public long getMinCountObjects()
public long getMaxCountObjects()
public long getTotalCountObjects()
void setDefaultHarvestNamePrefix()
public java.lang.String getHarvestAudience()
public void setHarvestAudience(java.lang.String theAudience)
theAudience
- the harvestaudience.
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |