001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.harvesting;
024
025import java.io.File;
026import java.io.IOException;
027
028import org.slf4j.Logger;
029import org.slf4j.LoggerFactory;
030
031import dk.netarkivet.common.exceptions.ArgumentNotValid;
032import dk.netarkivet.common.exceptions.IOFailure;
033import dk.netarkivet.common.utils.FileUtils;
034import dk.netarkivet.common.utils.Settings;
035import dk.netarkivet.common.utils.SimpleXml;
036import dk.netarkivet.common.utils.archive.ArchiveDateConverter;
037import dk.netarkivet.harvester.HarvesterSettings;
038import dk.netarkivet.harvester.datamodel.H3HeritrixTemplate;
039import dk.netarkivet.harvester.datamodel.H3HeritrixTemplate.MetadataInfo;
040import dk.netarkivet.harvester.datamodel.HarvestDefinitionInfo;
041import dk.netarkivet.harvester.datamodel.Job;
042import dk.netarkivet.harvester.harvesting.PersistentJobData.XmlState.OKSTATE;
043
044/**
045 * Class PersistentJobData holds information about an ongoing harvest. Presently the information is stored in a
046 * XML-file.
047 */
048public class PersistentJobData implements JobInfo {
049
050    /** The logger to use. */
051    private static final Logger log = LoggerFactory.getLogger(PersistentJobData.class);
052
053    /** the crawlDir. */
054    private final File crawlDir;
055
056    /**
057     * The filename for the file containing the persistent job data, stored in crawlDir.
058     */
059    private static final String HARVEST_INFO_FILENAME = "harvestInfo.xml";
060    /** XML-root element for the persistent Job Data. */
061    private static final String ROOT_ELEMENT = "harvestInfo";
062    /** Key in harvestinfo file for the ID of the job. */
063    private static final String JOBID_KEY = ROOT_ELEMENT + ".jobId";
064    /** Key in harvestinfo file for the harvestNum of the job. */
065    private static final String HARVESTNUM_KEY = ROOT_ELEMENT + ".harvestNum";
066    /** Key in harvestinfo file for the maxBytesPerDomain value for the job. */
067    private static final String MAXBYTESPERDOMAIN_KEY = ROOT_ELEMENT + ".maxBytesPerDomain";
068    /**
069     * Key in harvestinfo file for the maxObjectsPerDomain value for the job.
070     */
071    private static final String MAXOBJECTSPERDOMAIN_KEY = ROOT_ELEMENT + ".maxObjectsPerDomain";
072    /** Key in harvestinfo file for the orderXMLName of the job. */
073    private static final String TEMPLATENAME_KEY = ROOT_ELEMENT + ".templateName";
074    /** Key in harvestinfo file for the orderXMLName of the job. */
075    private static final String OLDORDERXMLNAME_KEY = ROOT_ELEMENT + ".orderXMLName";
076    /** Key in harvestinfo file for the orderXMLName update date. */
077    private static final String ORDERXML_UPDATE_DATE_KEY = ROOT_ELEMENT + ".templateLastUpdateDate";
078    /** Key in harvestinfo file for the orderXMLName description. */
079    private static final String ORDERXML_DESCRIPTION_KEY = ROOT_ELEMENT + ".templateDescription";
080    /** Key in harvestinfo file for the harvestID of the job. */
081    private static final String ORIGHARVESTDEFINITIONID_KEY = ROOT_ELEMENT + ".origHarvestDefinitionID";
082    /** Key in harvestinfo file for the harvest channel of the job. */
083    private static final String CHANNEL_KEY = ROOT_ELEMENT + ".channel";
084
085    private static final String PRIORITY_KEY = ROOT_ELEMENT + ".priority";
086
087    /** Key in harvestinfo file for the original harvest definition name. */
088    private static final String HARVEST_NAME_KEY = ROOT_ELEMENT + ".origHarvestDefinitionName";
089
090    /**
091     * Key in harvestinfo file for the original harvest definition description.
092     */
093    private static final String HARVEST_DESC_KEY = ROOT_ELEMENT + ".origHarvestDefinitionComments";
094
095    /**
096     * Key in harvestinfo file for the original harvest definition schedule, will be empty for broad crawls.
097     */
098    private static final String HARVEST_SCHED_KEY = ROOT_ELEMENT + ".scheduleName";
099    /** The harvestfilename prefix used by this job set in the Job class. */
100    private static final String HARVEST_FILENAME_PREFIX_KEY = ROOT_ELEMENT + ".harvestFilenamePrefix";
101    /** The submitted date of this job. */
102    private static final String JOB_SUBMIT_DATE_KEY = ROOT_ELEMENT + ".jobSubmitDate";
103    /** The performer of this harvest. */
104    private static final String HARVEST_PERFORMER_KEY = ROOT_ELEMENT + ".performer";
105    /** The audience of this harvest. */
106    private static final String HARVEST_AUDIENCE_KEY = ROOT_ELEMENT + ".audience";
107    /** The operator of this harvest. */
108    private static final String HARVEST_OPERATOR_KEY = ROOT_ELEMENT + ".operator";
109    
110
111    /** Key in harvestinfo file for the file version. */
112    private static final String HARVESTINFO_VERSION_KEY = "harvestInfo.version";
113    /** Value for current version number. */
114    private static final String HARVESTINFO_VERSION_NUMBER = "0.6";
115
116    /**
117     * Also support for version 0.4 of harvestInfo xml. In the previous format the channel and snapshot keys were
118     * absent. Instead there was the priority key.
119     */
120    private static final String OLD_HARVESTINFO_VERSION_NUMBER_4 = "0.4";
121    
122    /**
123     * Also support for version 0.5 of harvestInfo xml. In this previous format, templateName was named orderXMLName
124     * and fields templateLastUpdateDate & templateDescription didn't exist.
125     */
126    private static final String OLD_HARVESTINFO_VERSION_NUMBER_5 = "0.5";
127
128    /** String array containing all mandatory keys contained in valid version 0.6 xml.
129     *  ORDERXML_UPDATE_DATE_KEY, ORDERXML_DESCRIPTION_KEY are optionals 
130     */
131    private static final String[] ALL_KEYS = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY,
132            MAXOBJECTSPERDOMAIN_KEY, TEMPLATENAME_KEY,  ORIGHARVESTDEFINITIONID_KEY, CHANNEL_KEY,
133            HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY};
134
135    /**
136     * Optional keys are HARVEST_DESC_KEY representing harvest comments, and HARVEST_SCHED_KEY representing the
137     * scheduleName behind the harvest, only applicable for selective harvests.
138     */
139    
140    /** String array containing all mandatory keys contained in valid version 0.5 xml. */
141    private static final String[] ALL_KEYS_5 = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY,
142            MAXOBJECTSPERDOMAIN_KEY, OLDORDERXMLNAME_KEY, ORIGHARVESTDEFINITIONID_KEY, CHANNEL_KEY,
143            HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY};
144
145    /**
146     * String array containing all mandatory keys contained in old valid version 0.4 xml.
147     */
148    private static final String[] ALL_KEYS_4 = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY,
149            MAXOBJECTSPERDOMAIN_KEY, OLDORDERXMLNAME_KEY, ORIGHARVESTDEFINITIONID_KEY, PRIORITY_KEY,
150            HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY};
151
152    /** the SimpleXml object, that contains the XML in HARVEST_INFO_FILENAME. */
153    private SimpleXml theXML = null;
154
155    /**
156     * Constructor for class PersistentJobData.
157     *
158     * @param crawlDir The directory where the harvestInfo can be found
159     * @throws ArgumentNotValid if crawlDir is null or does not exist.
160     */
161    public PersistentJobData(File crawlDir) {
162        ArgumentNotValid.checkExistsDirectory(crawlDir, "crawlDir");
163
164        this.crawlDir = crawlDir;
165    }
166
167    /**
168     * Returns true, if harvestInfo exists in crawDir, otherwise false.
169     *
170     * @return true, if harvestInfo exists, otherwise false
171     */
172    public boolean exists() {
173        return getHarvestInfoFile().isFile();
174    }
175
176    /**
177     * Returns true if the given directory exists and contains a harvestInfo file.
178     *
179     * @param crawlDir A directory that may contain harvestInfo file.
180     * @return True if the harvestInfo file exists.
181     */
182    public static boolean existsIn(File crawlDir) {
183        return new File(crawlDir, HARVEST_INFO_FILENAME).exists();
184    }
185
186    /**
187     * @return the location of the harvestInfo File in the crawlDir.
188     */
189    public static File getHarvestInfoFile(File crawlDir) {
190        return new File(crawlDir, HARVEST_INFO_FILENAME);
191    }
192    
193
194    /**
195     * Read harvestInfo into SimpleXML object.
196     *
197     * @return SimpleXml object for harvestInfo
198     * @throws IOFailure if HarvestInfoFile does not exist or if HarvestInfoFile is invalid
199     */
200    private synchronized SimpleXml read() {
201        if (theXML != null) {
202            return theXML;
203        }
204        if (!exists()) {
205            throw new IOFailure("The harvestInfo file '" + getHarvestInfoFile().getAbsolutePath() + "' does not exist!");
206        }
207        SimpleXml sx = new SimpleXml(getHarvestInfoFile());
208        XmlState validationResult = validateHarvestInfo(sx);
209        if (validationResult.getOkState().equals(XmlState.OKSTATE.NOTOK)) {
210            try {
211                String errorMsg = "The harvestInfoFile '" + getHarvestInfoFile().getAbsolutePath() + "' is invalid: "
212                        + validationResult.getError() + ". The contents of the file is this: "
213                        + FileUtils.readFile(getHarvestInfoFile());
214                throw new IOFailure(errorMsg);
215            } catch (IOException e) {
216                String errorMsg = "Unable to read HarvestInfoFile: '" + getHarvestInfoFile().getAbsolutePath() + "'";
217                throw new IOFailure(errorMsg);
218            }
219        } else { // The xml is valid
220            theXML = sx;
221            return sx;
222        }
223    }
224
225    /**
226     * Write information about given Job to XML-structure.
227     *
228     * @param harvestJob the given Job
229     * @param hdi Information about the harvestJob.
230     * @throws IOFailure if any failure occurs while persisting data, or if the file has already been written.
231     */
232    public synchronized void write(Job harvestJob, HarvestDefinitionInfo hdi) {
233        ArgumentNotValid.checkNotNull(harvestJob, "Job harvestJob");
234        ArgumentNotValid.checkNotNull(hdi, "HarvestDefinitionInfo hdi");
235        if (exists()) {
236            String errorMsg = "Persistent Job data already exists in '" + crawlDir + "'. Aborting";
237            log.warn(errorMsg);
238            throw new IOFailure(errorMsg);
239        }
240
241        SimpleXml sx = new SimpleXml(ROOT_ELEMENT);
242        sx.add(HARVESTINFO_VERSION_KEY, HARVESTINFO_VERSION_NUMBER);
243        sx.add(JOBID_KEY, harvestJob.getJobID().toString());
244        sx.add(CHANNEL_KEY, harvestJob.getChannel());
245        sx.add(HARVESTNUM_KEY, Integer.toString(harvestJob.getHarvestNum()));
246        sx.add(ORIGHARVESTDEFINITIONID_KEY, Long.toString(harvestJob.getOrigHarvestDefinitionID()));
247        sx.add(MAXBYTESPERDOMAIN_KEY, Long.toString(harvestJob.getMaxBytesPerDomain()));
248        sx.add(MAXOBJECTSPERDOMAIN_KEY, Long.toString(harvestJob.getMaxObjectsPerDomain()));
249        sx.add(TEMPLATENAME_KEY, harvestJob.getOrderXMLName());
250        // insert fields got from crawler-beans.cxml and add them into
251        // harvestInfo.xml for preservation purpose
252        if(harvestJob.getOrderXMLdoc() instanceof H3HeritrixTemplate) {
253                H3HeritrixTemplate template = (H3HeritrixTemplate) harvestJob.getOrderXMLdoc();
254                String tmp = null;
255                tmp = template.getMetadataInfo(MetadataInfo.TEMPLATE_UPDATE_DATE);
256                if (tmp != null && !tmp.isEmpty()) {
257                sx.add(ORDERXML_UPDATE_DATE_KEY, tmp);
258            }
259                tmp = template.getMetadataInfo(MetadataInfo.TEMPLATE_DESCRIPTION);
260                if (tmp != null && !tmp.isEmpty()) {
261                sx.add(ORDERXML_DESCRIPTION_KEY, tmp);
262            }
263        }
264
265        sx.add(HARVEST_NAME_KEY, hdi.getOrigHarvestName());
266
267        String comments = hdi.getOrigHarvestDesc();
268        if (!comments.isEmpty()) {
269            sx.add(HARVEST_DESC_KEY, comments);
270        }
271
272        String schedName = hdi.getScheduleName();
273        if (!schedName.isEmpty()) {
274            sx.add(HARVEST_SCHED_KEY, schedName);
275        }
276        // Store the harvestname prefix selected by the used Naming Strategy.
277        sx.add(HARVEST_FILENAME_PREFIX_KEY, harvestJob.getHarvestFilenamePrefix());
278
279        // store the submitted date in WARC Date format
280        sx.add(JOB_SUBMIT_DATE_KEY, ArchiveDateConverter.getWarcDateFormat().format(harvestJob.getSubmittedDate()));
281        // if performer set to something different from the empty String
282        if (!Settings.get(HarvesterSettings.PERFORMER).isEmpty()) {
283            sx.add(HARVEST_PERFORMER_KEY, Settings.get(HarvesterSettings.PERFORMER));
284        }
285        // insert fields got from crawler-beans.cxml and add them into
286        // harvestInfo.xml for preservation purpose
287        if(harvestJob.getOrderXMLdoc() instanceof H3HeritrixTemplate) {
288                H3HeritrixTemplate template = (H3HeritrixTemplate) harvestJob.getOrderXMLdoc();
289                String temp = null;
290                temp = template.getMetadataInfo(MetadataInfo.OPERATOR);
291                if (temp != null && !temp.isEmpty()) {
292                sx.add(HARVEST_OPERATOR_KEY, temp);
293            }
294        }
295        if (harvestJob.getHarvestAudience() != null && !harvestJob.getHarvestAudience().isEmpty()) {
296            sx.add(HARVEST_AUDIENCE_KEY, harvestJob.getHarvestAudience());
297        }
298
299        XmlState validationResult = validateHarvestInfo(sx);
300        if (validationResult.getOkState().equals(XmlState.OKSTATE.NOTOK)) {
301            String msg = "Could not create a valid harvestinfo file for job " + harvestJob.getJobID() + ": "
302                    + validationResult.getError();
303            throw new IOFailure(msg);
304        } else {
305            sx.save(getHarvestInfoFile());
306        }
307    }
308
309    /**
310     * Checks that the xml data in the persistent job data file is valid.
311     *
312     * @param sx the SimpleXml object containing the persistent job data
313     * @return empty string, if valid persistent job data, otherwise a string containing the problem.
314     */
315    private static XmlState validateHarvestInfo(SimpleXml sx) {
316        final String version;
317        if (sx.hasKey(HARVESTINFO_VERSION_KEY)) {
318            version = sx.getString(HARVESTINFO_VERSION_KEY);
319        } else {
320            final String errMsg = "Missing version information";
321            return new XmlState(OKSTATE.NOTOK, errMsg);
322        }
323
324        String[] keysToCheck = new String[0];
325        if (version.equals(HARVESTINFO_VERSION_NUMBER)) {
326            keysToCheck = ALL_KEYS;
327        } else if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5)) {
328            keysToCheck = ALL_KEYS_5;
329        } else if(version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4)) {
330                version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4);
331        } else {
332            final String errMsg = "Invalid version: " + version;
333            return new XmlState(OKSTATE.NOTOK, errMsg);
334        }
335
336        /* Check, if all necessary components exist in the SimpleXml */
337
338        for (String key : keysToCheck) {
339            if (!sx.hasKey(key)) {
340                final String errMsg = "Could not find key " + key + " in harvestInfoFile, version " + version;
341                return new XmlState(OKSTATE.NOTOK, errMsg);
342            }
343        }
344
345        /* Check, if the jobId element contains a long value */
346        final String jobidAsString = sx.getString(JOBID_KEY);
347        try {
348            Long.valueOf(jobidAsString);
349        } catch (Throwable t) {
350            final String errMsg = "The id '" + jobidAsString + "' in harvestInfoFile must be a long value";
351            return new XmlState(OKSTATE.NOTOK, errMsg);
352        }
353
354        // Verify, that the job channel and snapshot elements are not the empty String (version 0.5+)
355        if ((version.equals(HARVESTINFO_VERSION_NUMBER) || version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5))
356                        && sx.getString(CHANNEL_KEY).isEmpty()) {
357            final String errMsg = "The channel and/or the snapshot value of the job is undefined";
358            return new XmlState(OKSTATE.NOTOK, errMsg);
359        }
360
361        if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4) && sx.getString(PRIORITY_KEY).isEmpty()) {
362            final String errMsg = "The priority value of the job is undefined";
363            return new XmlState(OKSTATE.NOTOK, errMsg);
364        }
365
366        // Verify, that the job channel element is not the empty String
367        if ((version.equals(HARVESTINFO_VERSION_NUMBER) || version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5))
368                        && sx.getString(CHANNEL_KEY).isEmpty()) {
369            final String errMsg = "The channel and/or the snapshot value of the job is undefined";
370            return new XmlState(OKSTATE.NOTOK, errMsg);
371        }
372
373        // Verify, that the ORDERXMLNAME element is not the empty String (V.0.6)
374        if (version.equals(HARVESTINFO_VERSION_NUMBER) && sx.getString(TEMPLATENAME_KEY).isEmpty()) {
375            final String errMsg = "The orderxmlname of the job is undefined";
376            return new XmlState(OKSTATE.NOTOK, errMsg);
377        }
378        
379        // Verify, that the ORDERXMLNAME element is not the empty String (V.0.5)
380        if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5) && sx.getString(OLDORDERXMLNAME_KEY).isEmpty()) {
381            final String errMsg = "The orderxmlname of the job is undefined";
382            return new XmlState(OKSTATE.NOTOK, errMsg);
383        }
384
385        // Verify that the HARVESTNUM element is an integer
386        final String harvestNumAsString = sx.getString(HARVESTNUM_KEY);
387        try {
388            Integer.valueOf(harvestNumAsString);
389        } catch (Throwable t) {
390            final String errMsg = "The HARVESTNUM in harvestInfoFile must be a Integer "
391                    + "value. The value given is '" + harvestNumAsString + "'.";
392            return new XmlState(OKSTATE.NOTOK, errMsg);
393        }
394
395        /*
396         * Check, if the OrigHarvestDefinitionID element contains a long value.
397         */
398        final String origHarvestDefinitionIDAsString = sx.getString(ORIGHARVESTDEFINITIONID_KEY);
399        try {
400            Long.valueOf(origHarvestDefinitionIDAsString);
401        } catch (Throwable t) {
402            final String errMsg = "The OrigHarvestDefinitionID in harvestInfoFile must be a long value. "
403                    + "The value given is: '" + origHarvestDefinitionIDAsString + "'.";
404            return new XmlState(OKSTATE.NOTOK, errMsg);
405        }
406
407        /* Check, if the MaxBytesPerDomain element contains a long value */
408        final String maxBytesPerDomainAsString = sx.getString(MAXBYTESPERDOMAIN_KEY);
409        try {
410            Long.valueOf(maxBytesPerDomainAsString);
411        } catch (Throwable t) {
412            final String errMsg = "The MaxBytesPerDomain element in harvestInfoFile must be a long value. "
413                    + "The value given is: '" + maxBytesPerDomainAsString + "'.";
414            return new XmlState(OKSTATE.NOTOK, errMsg);
415        }
416
417        /* Check, if the MaxObjectsPerDomain element contains a long value */
418        final String maxObjectsPerDomainAsString = sx.getString(MAXOBJECTSPERDOMAIN_KEY);
419        try {
420            Long.valueOf(maxObjectsPerDomainAsString);
421        } catch (Throwable t) {
422            final String errMsg = "The MaxObjectsPerDomain element in harvestInfoFile must be a long value. "
423                    + "The value given is: '" + maxObjectsPerDomainAsString + "'.";
424            return new XmlState(OKSTATE.NOTOK, errMsg);
425        }
426
427        return new XmlState(OKSTATE.OK, "");
428    }
429
430    /**
431     * @return the harvestInfoFile.
432     */
433    private File getHarvestInfoFile() {
434        return new File(crawlDir, HARVEST_INFO_FILENAME);
435    }
436
437    /**
438     * Return the harvestInfo jobID.
439     *
440     * @return the harvestInfo JobID
441     * @throws IOFailure if no harvestInfo exists or it is invalid.
442     */
443    public Long getJobID() {
444        SimpleXml sx = read(); // reads and validates XML
445        String jobIDString = sx.getString(JOBID_KEY);
446        return Long.parseLong(jobIDString);
447    }
448
449    /**
450     * Return the job's harvest channel name.
451     *
452     * @return the job's harvest channel name
453     * @throws IOFailure if no harvestInfo exists or it is invalid.
454     */
455    public String getChannel() {
456        SimpleXml sx = read(); // reads and validates XML
457        return sx.getString(CHANNEL_KEY);
458    }
459
460    /**
461     * Return the job harvestNum.
462     *
463     * @return the job harvestNum
464     * @throws IOFailure if no harvestInfo exists or it is invalid.
465     */
466    public int getJobHarvestNum() {
467        SimpleXml sx = read(); // reads and validates XML
468        String harvestNumString = sx.getString(HARVESTNUM_KEY);
469        return Integer.parseInt(harvestNumString);
470    }
471
472    /**
473     * Return the job origHarvestDefinitionID.
474     *
475     * @return the job origHarvestDefinitionID
476     * @throws IOFailure if no harvestInfo exists or it is invalid.
477     */
478    public Long getOrigHarvestDefinitionID() {
479        SimpleXml sx = read(); // reads and validates XML
480        String origHarvestDefinitionIDString = sx.getString(ORIGHARVESTDEFINITIONID_KEY);
481        return Long.parseLong(origHarvestDefinitionIDString);
482    }
483
484    /**
485     * Return the job maxBytesPerDomain value.
486     *
487     * @return the job maxBytesPerDomain value.
488     * @throws IOFailure if no harvestInfo exists or it is invalid.
489     */
490    public long getMaxBytesPerDomain() {
491        SimpleXml sx = read(); // reads and validates XML
492        String maxBytesPerDomainString = sx.getString(MAXBYTESPERDOMAIN_KEY);
493        return Long.parseLong(maxBytesPerDomainString);
494    }
495
496    /**
497     * Return the job maxObjectsPerDomain value.
498     *
499     * @return the job maxObjectsPerDomain value.
500     * @throws IOFailure if no harvestInfo exists or it is invalid.
501     */
502    public long getMaxObjectsPerDomain() {
503        SimpleXml sx = read(); // reads and validates XML
504        String maxObjectsPerDomainString = sx.getString(MAXOBJECTSPERDOMAIN_KEY);
505        return Long.parseLong(maxObjectsPerDomainString);
506    }
507
508    /**
509     * Return the job orderXMLName.
510     *
511     * @return the job orderXMLName.
512     * @throws IOFailure if no harvestInfo exists or it is invalid.
513     */
514    public String getOrderXMLName() {
515        SimpleXml sx = read(); // reads and validates XML
516        return sx.getString(TEMPLATENAME_KEY);
517    }
518
519    /**
520     * Return the version of the xml.
521     *
522     * @return the version of the xml
523     * @throws IOFailure if no harvestInfo exists or it is invalid.
524     */
525    public String getVersion() {
526        SimpleXml sx = read(); // reads and validates XML
527        return sx.getString(HARVESTINFO_VERSION_KEY);
528    }
529
530    /**
531     * Helper class for returning the OK-state back to the caller.
532     */
533    protected static class XmlState {
534        /** enum for holding OK/NOTOK values. */
535        public enum OKSTATE {
536            OK, NOTOK
537        }
538
539        /** the state of the XML. */
540        private OKSTATE ok;
541        /** The error coming from an xml-validation. */
542        private String error;;
543
544        /**
545         * Constructor of an XmlState object.
546         *
547         * @param ok Is the XML OK or not OKAY?
548         * @param error The error found during validation, if any.
549         */
550        public XmlState(OKSTATE ok, String error) {
551            this.ok = ok;
552            this.error = error;
553        }
554
555        /**
556         * @return the OK value of this object.
557         */
558        public OKSTATE getOkState() {
559            return ok;
560        }
561
562        /**
563         * @return the error value of this object (maybe null).
564         */
565        public String getError() {
566            return error;
567        }
568    }
569
570    /**
571     * If not set in persistentJobData, fall back to the standard way.
572     * jobid-harvestid.
573     */
574    public String getHarvestFilenamePrefix() {
575        SimpleXml sx = read(); // reads and validates XML
576        String prefix = null;
577        if (!sx.hasKey(HARVEST_FILENAME_PREFIX_KEY)) {
578            prefix = this.getJobID() + "-" + this.getOrigHarvestDefinitionID();
579            log.warn("harvestFilenamePrefix not part of persistentJobData. Using old standard naming: {}", prefix);
580        } else {
581            prefix = sx.getString(HARVEST_FILENAME_PREFIX_KEY);
582        }
583        return prefix;
584    }
585
586    /**
587     * Return the harvestname in this xml.
588     *
589     * @return the harvestname in this xml.
590     * @throws IOFailure if no harvestInfo exists or it is invalid.
591     */
592    public String getharvestName() {
593        SimpleXml sx = read(); // reads and validates XML
594        return sx.getString(HARVEST_NAME_KEY);
595    }
596
597    /**
598     * Return the schedulename in this xml.
599     *
600     * @return the schedulename in this xml (or null, if undefined for this job)
601     * @throws IOFailure if no harvestInfo exists or it is invalid.
602     */
603    public String getScheduleName() {
604        SimpleXml sx = read(); // reads and validates XML
605        if (sx.hasKey(HARVEST_SCHED_KEY)) {
606            return sx.getString(HARVEST_SCHED_KEY);
607        } else {
608            return null;
609        }
610    }
611
612    /**
613     * Return the submit date of the job in this xml.
614     *
615     * @return the submit date of the job in this xml.
616     * @throws IOFailure if no harvestInfo exists or it is invalid.
617     */
618    public String getJobSubmitDate() {
619        SimpleXml sx = read(); // reads and validates XML
620        return sx.getString(JOB_SUBMIT_DATE_KEY);
621    }
622
623    /**
624     * Return the performer information in this xml.
625     *
626     * @return the performer information in this xml or null if value undefined
627     * @throws IOFailure if no harvestInfo exists or it is invalid.
628     */
629    public String getPerformer() {
630        SimpleXml sx = read(); // reads and validates XML
631        if (sx.hasKey(HARVEST_PERFORMER_KEY)) {
632            return sx.getString(HARVEST_PERFORMER_KEY);
633        } else {
634            return null;
635        }
636    }
637
638    /**
639     * Return the audience information in this xml.
640     *
641     * @return the audience information in this xml or null if value undefined
642     * @throws IOFailure if no harvestInfo exists or it is invalid.
643     */
644    public String getAudience() {
645        SimpleXml sx = read(); // reads and validates XML
646        if (sx.hasKey(HARVEST_AUDIENCE_KEY)) {
647            return sx.getString(HARVEST_AUDIENCE_KEY);
648        } else {
649            return null;
650        }
651    }
652
653}