001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.harvesting; 024 025import java.io.File; 026import java.io.IOException; 027 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import dk.netarkivet.common.exceptions.ArgumentNotValid; 032import dk.netarkivet.common.exceptions.IOFailure; 033import dk.netarkivet.common.utils.FileUtils; 034import dk.netarkivet.common.utils.Settings; 035import dk.netarkivet.common.utils.SimpleXml; 036import dk.netarkivet.common.utils.archive.ArchiveDateConverter; 037import dk.netarkivet.harvester.HarvesterSettings; 038import dk.netarkivet.harvester.datamodel.H3HeritrixTemplate; 039import dk.netarkivet.harvester.datamodel.H3HeritrixTemplate.MetadataInfo; 040import dk.netarkivet.harvester.datamodel.HarvestDefinitionInfo; 041import dk.netarkivet.harvester.datamodel.Job; 042import dk.netarkivet.harvester.harvesting.PersistentJobData.XmlState.OKSTATE; 043 044/** 045 * Class PersistentJobData holds information about an ongoing harvest. Presently the information is stored in a 046 * XML-file. 047 */ 048public class PersistentJobData implements JobInfo { 049 050 /** The logger to use. */ 051 private static final Logger log = LoggerFactory.getLogger(PersistentJobData.class); 052 053 /** the crawlDir. */ 054 private final File crawlDir; 055 056 /** 057 * The filename for the file containing the persistent job data, stored in crawlDir. 058 */ 059 private static final String HARVEST_INFO_FILENAME = "harvestInfo.xml"; 060 /** XML-root element for the persistent Job Data. */ 061 private static final String ROOT_ELEMENT = "harvestInfo"; 062 /** Key in harvestinfo file for the ID of the job. */ 063 private static final String JOBID_KEY = ROOT_ELEMENT + ".jobId"; 064 /** Key in harvestinfo file for the harvestNum of the job. */ 065 private static final String HARVESTNUM_KEY = ROOT_ELEMENT + ".harvestNum"; 066 /** Key in harvestinfo file for the maxBytesPerDomain value for the job. */ 067 private static final String MAXBYTESPERDOMAIN_KEY = ROOT_ELEMENT + ".maxBytesPerDomain"; 068 /** 069 * Key in harvestinfo file for the maxObjectsPerDomain value for the job. 070 */ 071 private static final String MAXOBJECTSPERDOMAIN_KEY = ROOT_ELEMENT + ".maxObjectsPerDomain"; 072 /** Key in harvestinfo file for the orderXMLName of the job. */ 073 private static final String TEMPLATENAME_KEY = ROOT_ELEMENT + ".templateName"; 074 /** Key in harvestinfo file for the orderXMLName of the job. */ 075 private static final String OLDORDERXMLNAME_KEY = ROOT_ELEMENT + ".orderXMLName"; 076 /** Key in harvestinfo file for the orderXMLName update date. */ 077 private static final String ORDERXML_UPDATE_DATE_KEY = ROOT_ELEMENT + ".templateLastUpdateDate"; 078 /** Key in harvestinfo file for the orderXMLName description. */ 079 private static final String ORDERXML_DESCRIPTION_KEY = ROOT_ELEMENT + ".templateDescription"; 080 /** Key in harvestinfo file for the harvestID of the job. */ 081 private static final String ORIGHARVESTDEFINITIONID_KEY = ROOT_ELEMENT + ".origHarvestDefinitionID"; 082 /** Key in harvestinfo file for the harvest channel of the job. */ 083 private static final String CHANNEL_KEY = ROOT_ELEMENT + ".channel"; 084 085 private static final String PRIORITY_KEY = ROOT_ELEMENT + ".priority"; 086 087 /** Key in harvestinfo file for the original harvest definition name. */ 088 private static final String HARVEST_NAME_KEY = ROOT_ELEMENT + ".origHarvestDefinitionName"; 089 090 /** 091 * Key in harvestinfo file for the original harvest definition description. 092 */ 093 private static final String HARVEST_DESC_KEY = ROOT_ELEMENT + ".origHarvestDefinitionComments"; 094 095 /** 096 * Key in harvestinfo file for the original harvest definition schedule, will be empty for broad crawls. 097 */ 098 private static final String HARVEST_SCHED_KEY = ROOT_ELEMENT + ".scheduleName"; 099 /** The harvestfilename prefix used by this job set in the Job class. */ 100 private static final String HARVEST_FILENAME_PREFIX_KEY = ROOT_ELEMENT + ".harvestFilenamePrefix"; 101 /** The submitted date of this job. */ 102 private static final String JOB_SUBMIT_DATE_KEY = ROOT_ELEMENT + ".jobSubmitDate"; 103 /** The performer of this harvest. */ 104 private static final String HARVEST_PERFORMER_KEY = ROOT_ELEMENT + ".performer"; 105 /** The audience of this harvest. */ 106 private static final String HARVEST_AUDIENCE_KEY = ROOT_ELEMENT + ".audience"; 107 /** The operator of this harvest. */ 108 private static final String HARVEST_OPERATOR_KEY = ROOT_ELEMENT + ".operator"; 109 110 111 /** Key in harvestinfo file for the file version. */ 112 private static final String HARVESTINFO_VERSION_KEY = "harvestInfo.version"; 113 /** Value for current version number. */ 114 private static final String HARVESTINFO_VERSION_NUMBER = "0.6"; 115 116 /** 117 * Also support for version 0.4 of harvestInfo xml. In the previous format the channel and snapshot keys were 118 * absent. Instead there was the priority key. 119 */ 120 private static final String OLD_HARVESTINFO_VERSION_NUMBER_4 = "0.4"; 121 122 /** 123 * Also support for version 0.5 of harvestInfo xml. In this previous format, templateName was named orderXMLName 124 * and fields templateLastUpdateDate & templateDescription didn't exist. 125 */ 126 private static final String OLD_HARVESTINFO_VERSION_NUMBER_5 = "0.5"; 127 128 /** String array containing all mandatory keys contained in valid version 0.6 xml. 129 * ORDERXML_UPDATE_DATE_KEY, ORDERXML_DESCRIPTION_KEY are optionals 130 */ 131 private static final String[] ALL_KEYS = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY, 132 MAXOBJECTSPERDOMAIN_KEY, TEMPLATENAME_KEY, ORIGHARVESTDEFINITIONID_KEY, CHANNEL_KEY, 133 HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY}; 134 135 /** 136 * Optional keys are HARVEST_DESC_KEY representing harvest comments, and HARVEST_SCHED_KEY representing the 137 * scheduleName behind the harvest, only applicable for selective harvests. 138 */ 139 140 /** String array containing all mandatory keys contained in valid version 0.5 xml. */ 141 private static final String[] ALL_KEYS_5 = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY, 142 MAXOBJECTSPERDOMAIN_KEY, OLDORDERXMLNAME_KEY, ORIGHARVESTDEFINITIONID_KEY, CHANNEL_KEY, 143 HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY}; 144 145 /** 146 * String array containing all mandatory keys contained in old valid version 0.4 xml. 147 */ 148 private static final String[] ALL_KEYS_4 = {JOBID_KEY, HARVESTNUM_KEY, MAXBYTESPERDOMAIN_KEY, 149 MAXOBJECTSPERDOMAIN_KEY, OLDORDERXMLNAME_KEY, ORIGHARVESTDEFINITIONID_KEY, PRIORITY_KEY, 150 HARVESTINFO_VERSION_KEY, HARVEST_NAME_KEY, HARVEST_FILENAME_PREFIX_KEY, JOB_SUBMIT_DATE_KEY}; 151 152 /** the SimpleXml object, that contains the XML in HARVEST_INFO_FILENAME. */ 153 private SimpleXml theXML = null; 154 155 /** 156 * Constructor for class PersistentJobData. 157 * 158 * @param crawlDir The directory where the harvestInfo can be found 159 * @throws ArgumentNotValid if crawlDir is null or does not exist. 160 */ 161 public PersistentJobData(File crawlDir) { 162 ArgumentNotValid.checkExistsDirectory(crawlDir, "crawlDir"); 163 164 this.crawlDir = crawlDir; 165 } 166 167 /** 168 * Returns true, if harvestInfo exists in crawDir, otherwise false. 169 * 170 * @return true, if harvestInfo exists, otherwise false 171 */ 172 public boolean exists() { 173 return getHarvestInfoFile().isFile(); 174 } 175 176 /** 177 * Returns true if the given directory exists and contains a harvestInfo file. 178 * 179 * @param crawlDir A directory that may contain harvestInfo file. 180 * @return True if the harvestInfo file exists. 181 */ 182 public static boolean existsIn(File crawlDir) { 183 return new File(crawlDir, HARVEST_INFO_FILENAME).exists(); 184 } 185 186 /** 187 * @return the location of the harvestInfo File in the crawlDir. 188 */ 189 public static File getHarvestInfoFile(File crawlDir) { 190 return new File(crawlDir, HARVEST_INFO_FILENAME); 191 } 192 193 194 /** 195 * Read harvestInfo into SimpleXML object. 196 * 197 * @return SimpleXml object for harvestInfo 198 * @throws IOFailure if HarvestInfoFile does not exist or if HarvestInfoFile is invalid 199 */ 200 private synchronized SimpleXml read() { 201 if (theXML != null) { 202 return theXML; 203 } 204 if (!exists()) { 205 throw new IOFailure("The harvestInfo file '" + getHarvestInfoFile().getAbsolutePath() + "' does not exist!"); 206 } 207 SimpleXml sx = new SimpleXml(getHarvestInfoFile()); 208 XmlState validationResult = validateHarvestInfo(sx); 209 if (validationResult.getOkState().equals(XmlState.OKSTATE.NOTOK)) { 210 try { 211 String errorMsg = "The harvestInfoFile '" + getHarvestInfoFile().getAbsolutePath() + "' is invalid: " 212 + validationResult.getError() + ". The contents of the file is this: " 213 + FileUtils.readFile(getHarvestInfoFile()); 214 throw new IOFailure(errorMsg); 215 } catch (IOException e) { 216 String errorMsg = "Unable to read HarvestInfoFile: '" + getHarvestInfoFile().getAbsolutePath() + "'"; 217 throw new IOFailure(errorMsg); 218 } 219 } else { // The xml is valid 220 theXML = sx; 221 return sx; 222 } 223 } 224 225 /** 226 * Write information about given Job to XML-structure. 227 * 228 * @param harvestJob the given Job 229 * @param hdi Information about the harvestJob. 230 * @throws IOFailure if any failure occurs while persisting data, or if the file has already been written. 231 */ 232 public synchronized void write(Job harvestJob, HarvestDefinitionInfo hdi) { 233 ArgumentNotValid.checkNotNull(harvestJob, "Job harvestJob"); 234 ArgumentNotValid.checkNotNull(hdi, "HarvestDefinitionInfo hdi"); 235 if (exists()) { 236 String errorMsg = "Persistent Job data already exists in '" + crawlDir + "'. Aborting"; 237 log.warn(errorMsg); 238 throw new IOFailure(errorMsg); 239 } 240 241 SimpleXml sx = new SimpleXml(ROOT_ELEMENT); 242 sx.add(HARVESTINFO_VERSION_KEY, HARVESTINFO_VERSION_NUMBER); 243 sx.add(JOBID_KEY, harvestJob.getJobID().toString()); 244 sx.add(CHANNEL_KEY, harvestJob.getChannel()); 245 sx.add(HARVESTNUM_KEY, Integer.toString(harvestJob.getHarvestNum())); 246 sx.add(ORIGHARVESTDEFINITIONID_KEY, Long.toString(harvestJob.getOrigHarvestDefinitionID())); 247 sx.add(MAXBYTESPERDOMAIN_KEY, Long.toString(harvestJob.getMaxBytesPerDomain())); 248 sx.add(MAXOBJECTSPERDOMAIN_KEY, Long.toString(harvestJob.getMaxObjectsPerDomain())); 249 sx.add(TEMPLATENAME_KEY, harvestJob.getOrderXMLName()); 250 // insert fields got from crawler-beans.cxml and add them into 251 // harvestInfo.xml for preservation purpose 252 if(harvestJob.getOrderXMLdoc() instanceof H3HeritrixTemplate) { 253 H3HeritrixTemplate template = (H3HeritrixTemplate) harvestJob.getOrderXMLdoc(); 254 String tmp = null; 255 tmp = template.getMetadataInfo(MetadataInfo.TEMPLATE_UPDATE_DATE); 256 if (tmp != null && !tmp.isEmpty()) { 257 sx.add(ORDERXML_UPDATE_DATE_KEY, tmp); 258 } 259 tmp = template.getMetadataInfo(MetadataInfo.TEMPLATE_DESCRIPTION); 260 if (tmp != null && !tmp.isEmpty()) { 261 sx.add(ORDERXML_DESCRIPTION_KEY, tmp); 262 } 263 } 264 265 sx.add(HARVEST_NAME_KEY, hdi.getOrigHarvestName()); 266 267 String comments = hdi.getOrigHarvestDesc(); 268 if (!comments.isEmpty()) { 269 sx.add(HARVEST_DESC_KEY, comments); 270 } 271 272 String schedName = hdi.getScheduleName(); 273 if (!schedName.isEmpty()) { 274 sx.add(HARVEST_SCHED_KEY, schedName); 275 } 276 // Store the harvestname prefix selected by the used Naming Strategy. 277 sx.add(HARVEST_FILENAME_PREFIX_KEY, harvestJob.getHarvestFilenamePrefix()); 278 279 // store the submitted date in WARC Date format 280 sx.add(JOB_SUBMIT_DATE_KEY, ArchiveDateConverter.getWarcDateFormat().format(harvestJob.getSubmittedDate())); 281 // if performer set to something different from the empty String 282 if (!Settings.get(HarvesterSettings.PERFORMER).isEmpty()) { 283 sx.add(HARVEST_PERFORMER_KEY, Settings.get(HarvesterSettings.PERFORMER)); 284 } 285 // insert fields got from crawler-beans.cxml and add them into 286 // harvestInfo.xml for preservation purpose 287 if(harvestJob.getOrderXMLdoc() instanceof H3HeritrixTemplate) { 288 H3HeritrixTemplate template = (H3HeritrixTemplate) harvestJob.getOrderXMLdoc(); 289 String temp = null; 290 temp = template.getMetadataInfo(MetadataInfo.OPERATOR); 291 if (temp != null && !temp.isEmpty()) { 292 sx.add(HARVEST_OPERATOR_KEY, temp); 293 } 294 } 295 if (harvestJob.getHarvestAudience() != null && !harvestJob.getHarvestAudience().isEmpty()) { 296 sx.add(HARVEST_AUDIENCE_KEY, harvestJob.getHarvestAudience()); 297 } 298 299 XmlState validationResult = validateHarvestInfo(sx); 300 if (validationResult.getOkState().equals(XmlState.OKSTATE.NOTOK)) { 301 String msg = "Could not create a valid harvestinfo file for job " + harvestJob.getJobID() + ": " 302 + validationResult.getError(); 303 throw new IOFailure(msg); 304 } else { 305 sx.save(getHarvestInfoFile()); 306 } 307 } 308 309 /** 310 * Checks that the xml data in the persistent job data file is valid. 311 * 312 * @param sx the SimpleXml object containing the persistent job data 313 * @return empty string, if valid persistent job data, otherwise a string containing the problem. 314 */ 315 private static XmlState validateHarvestInfo(SimpleXml sx) { 316 final String version; 317 if (sx.hasKey(HARVESTINFO_VERSION_KEY)) { 318 version = sx.getString(HARVESTINFO_VERSION_KEY); 319 } else { 320 final String errMsg = "Missing version information"; 321 return new XmlState(OKSTATE.NOTOK, errMsg); 322 } 323 324 String[] keysToCheck = new String[0]; 325 if (version.equals(HARVESTINFO_VERSION_NUMBER)) { 326 keysToCheck = ALL_KEYS; 327 } else if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5)) { 328 keysToCheck = ALL_KEYS_5; 329 } else if(version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4)) { 330 version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4); 331 } else { 332 final String errMsg = "Invalid version: " + version; 333 return new XmlState(OKSTATE.NOTOK, errMsg); 334 } 335 336 /* Check, if all necessary components exist in the SimpleXml */ 337 338 for (String key : keysToCheck) { 339 if (!sx.hasKey(key)) { 340 final String errMsg = "Could not find key " + key + " in harvestInfoFile, version " + version; 341 return new XmlState(OKSTATE.NOTOK, errMsg); 342 } 343 } 344 345 /* Check, if the jobId element contains a long value */ 346 final String jobidAsString = sx.getString(JOBID_KEY); 347 try { 348 Long.valueOf(jobidAsString); 349 } catch (Throwable t) { 350 final String errMsg = "The id '" + jobidAsString + "' in harvestInfoFile must be a long value"; 351 return new XmlState(OKSTATE.NOTOK, errMsg); 352 } 353 354 // Verify, that the job channel and snapshot elements are not the empty String (version 0.5+) 355 if ((version.equals(HARVESTINFO_VERSION_NUMBER) || version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5)) 356 && sx.getString(CHANNEL_KEY).isEmpty()) { 357 final String errMsg = "The channel and/or the snapshot value of the job is undefined"; 358 return new XmlState(OKSTATE.NOTOK, errMsg); 359 } 360 361 if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_4) && sx.getString(PRIORITY_KEY).isEmpty()) { 362 final String errMsg = "The priority value of the job is undefined"; 363 return new XmlState(OKSTATE.NOTOK, errMsg); 364 } 365 366 // Verify, that the job channel element is not the empty String 367 if ((version.equals(HARVESTINFO_VERSION_NUMBER) || version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5)) 368 && sx.getString(CHANNEL_KEY).isEmpty()) { 369 final String errMsg = "The channel and/or the snapshot value of the job is undefined"; 370 return new XmlState(OKSTATE.NOTOK, errMsg); 371 } 372 373 // Verify, that the ORDERXMLNAME element is not the empty String (V.0.6) 374 if (version.equals(HARVESTINFO_VERSION_NUMBER) && sx.getString(TEMPLATENAME_KEY).isEmpty()) { 375 final String errMsg = "The orderxmlname of the job is undefined"; 376 return new XmlState(OKSTATE.NOTOK, errMsg); 377 } 378 379 // Verify, that the ORDERXMLNAME element is not the empty String (V.0.5) 380 if (version.equals(OLD_HARVESTINFO_VERSION_NUMBER_5) && sx.getString(OLDORDERXMLNAME_KEY).isEmpty()) { 381 final String errMsg = "The orderxmlname of the job is undefined"; 382 return new XmlState(OKSTATE.NOTOK, errMsg); 383 } 384 385 // Verify that the HARVESTNUM element is an integer 386 final String harvestNumAsString = sx.getString(HARVESTNUM_KEY); 387 try { 388 Integer.valueOf(harvestNumAsString); 389 } catch (Throwable t) { 390 final String errMsg = "The HARVESTNUM in harvestInfoFile must be a Integer " 391 + "value. The value given is '" + harvestNumAsString + "'."; 392 return new XmlState(OKSTATE.NOTOK, errMsg); 393 } 394 395 /* 396 * Check, if the OrigHarvestDefinitionID element contains a long value. 397 */ 398 final String origHarvestDefinitionIDAsString = sx.getString(ORIGHARVESTDEFINITIONID_KEY); 399 try { 400 Long.valueOf(origHarvestDefinitionIDAsString); 401 } catch (Throwable t) { 402 final String errMsg = "The OrigHarvestDefinitionID in harvestInfoFile must be a long value. " 403 + "The value given is: '" + origHarvestDefinitionIDAsString + "'."; 404 return new XmlState(OKSTATE.NOTOK, errMsg); 405 } 406 407 /* Check, if the MaxBytesPerDomain element contains a long value */ 408 final String maxBytesPerDomainAsString = sx.getString(MAXBYTESPERDOMAIN_KEY); 409 try { 410 Long.valueOf(maxBytesPerDomainAsString); 411 } catch (Throwable t) { 412 final String errMsg = "The MaxBytesPerDomain element in harvestInfoFile must be a long value. " 413 + "The value given is: '" + maxBytesPerDomainAsString + "'."; 414 return new XmlState(OKSTATE.NOTOK, errMsg); 415 } 416 417 /* Check, if the MaxObjectsPerDomain element contains a long value */ 418 final String maxObjectsPerDomainAsString = sx.getString(MAXOBJECTSPERDOMAIN_KEY); 419 try { 420 Long.valueOf(maxObjectsPerDomainAsString); 421 } catch (Throwable t) { 422 final String errMsg = "The MaxObjectsPerDomain element in harvestInfoFile must be a long value. " 423 + "The value given is: '" + maxObjectsPerDomainAsString + "'."; 424 return new XmlState(OKSTATE.NOTOK, errMsg); 425 } 426 427 return new XmlState(OKSTATE.OK, ""); 428 } 429 430 /** 431 * @return the harvestInfoFile. 432 */ 433 private File getHarvestInfoFile() { 434 return new File(crawlDir, HARVEST_INFO_FILENAME); 435 } 436 437 /** 438 * Return the harvestInfo jobID. 439 * 440 * @return the harvestInfo JobID 441 * @throws IOFailure if no harvestInfo exists or it is invalid. 442 */ 443 public Long getJobID() { 444 SimpleXml sx = read(); // reads and validates XML 445 String jobIDString = sx.getString(JOBID_KEY); 446 return Long.parseLong(jobIDString); 447 } 448 449 /** 450 * Return the job's harvest channel name. 451 * 452 * @return the job's harvest channel name 453 * @throws IOFailure if no harvestInfo exists or it is invalid. 454 */ 455 public String getChannel() { 456 SimpleXml sx = read(); // reads and validates XML 457 return sx.getString(CHANNEL_KEY); 458 } 459 460 /** 461 * Return the job harvestNum. 462 * 463 * @return the job harvestNum 464 * @throws IOFailure if no harvestInfo exists or it is invalid. 465 */ 466 public int getJobHarvestNum() { 467 SimpleXml sx = read(); // reads and validates XML 468 String harvestNumString = sx.getString(HARVESTNUM_KEY); 469 return Integer.parseInt(harvestNumString); 470 } 471 472 /** 473 * Return the job origHarvestDefinitionID. 474 * 475 * @return the job origHarvestDefinitionID 476 * @throws IOFailure if no harvestInfo exists or it is invalid. 477 */ 478 public Long getOrigHarvestDefinitionID() { 479 SimpleXml sx = read(); // reads and validates XML 480 String origHarvestDefinitionIDString = sx.getString(ORIGHARVESTDEFINITIONID_KEY); 481 return Long.parseLong(origHarvestDefinitionIDString); 482 } 483 484 /** 485 * Return the job maxBytesPerDomain value. 486 * 487 * @return the job maxBytesPerDomain value. 488 * @throws IOFailure if no harvestInfo exists or it is invalid. 489 */ 490 public long getMaxBytesPerDomain() { 491 SimpleXml sx = read(); // reads and validates XML 492 String maxBytesPerDomainString = sx.getString(MAXBYTESPERDOMAIN_KEY); 493 return Long.parseLong(maxBytesPerDomainString); 494 } 495 496 /** 497 * Return the job maxObjectsPerDomain value. 498 * 499 * @return the job maxObjectsPerDomain value. 500 * @throws IOFailure if no harvestInfo exists or it is invalid. 501 */ 502 public long getMaxObjectsPerDomain() { 503 SimpleXml sx = read(); // reads and validates XML 504 String maxObjectsPerDomainString = sx.getString(MAXOBJECTSPERDOMAIN_KEY); 505 return Long.parseLong(maxObjectsPerDomainString); 506 } 507 508 /** 509 * Return the job orderXMLName. 510 * 511 * @return the job orderXMLName. 512 * @throws IOFailure if no harvestInfo exists or it is invalid. 513 */ 514 public String getOrderXMLName() { 515 SimpleXml sx = read(); // reads and validates XML 516 return sx.getString(TEMPLATENAME_KEY); 517 } 518 519 /** 520 * Return the version of the xml. 521 * 522 * @return the version of the xml 523 * @throws IOFailure if no harvestInfo exists or it is invalid. 524 */ 525 public String getVersion() { 526 SimpleXml sx = read(); // reads and validates XML 527 return sx.getString(HARVESTINFO_VERSION_KEY); 528 } 529 530 /** 531 * Helper class for returning the OK-state back to the caller. 532 */ 533 protected static class XmlState { 534 /** enum for holding OK/NOTOK values. */ 535 public enum OKSTATE { 536 OK, NOTOK 537 } 538 539 /** the state of the XML. */ 540 private OKSTATE ok; 541 /** The error coming from an xml-validation. */ 542 private String error;; 543 544 /** 545 * Constructor of an XmlState object. 546 * 547 * @param ok Is the XML OK or not OKAY? 548 * @param error The error found during validation, if any. 549 */ 550 public XmlState(OKSTATE ok, String error) { 551 this.ok = ok; 552 this.error = error; 553 } 554 555 /** 556 * @return the OK value of this object. 557 */ 558 public OKSTATE getOkState() { 559 return ok; 560 } 561 562 /** 563 * @return the error value of this object (maybe null). 564 */ 565 public String getError() { 566 return error; 567 } 568 } 569 570 /** 571 * If not set in persistentJobData, fall back to the standard way. 572 * jobid-harvestid. 573 */ 574 public String getHarvestFilenamePrefix() { 575 SimpleXml sx = read(); // reads and validates XML 576 String prefix = null; 577 if (!sx.hasKey(HARVEST_FILENAME_PREFIX_KEY)) { 578 prefix = this.getJobID() + "-" + this.getOrigHarvestDefinitionID(); 579 log.warn("harvestFilenamePrefix not part of persistentJobData. Using old standard naming: {}", prefix); 580 } else { 581 prefix = sx.getString(HARVEST_FILENAME_PREFIX_KEY); 582 } 583 return prefix; 584 } 585 586 /** 587 * Return the harvestname in this xml. 588 * 589 * @return the harvestname in this xml. 590 * @throws IOFailure if no harvestInfo exists or it is invalid. 591 */ 592 public String getharvestName() { 593 SimpleXml sx = read(); // reads and validates XML 594 return sx.getString(HARVEST_NAME_KEY); 595 } 596 597 /** 598 * Return the schedulename in this xml. 599 * 600 * @return the schedulename in this xml (or null, if undefined for this job) 601 * @throws IOFailure if no harvestInfo exists or it is invalid. 602 */ 603 public String getScheduleName() { 604 SimpleXml sx = read(); // reads and validates XML 605 if (sx.hasKey(HARVEST_SCHED_KEY)) { 606 return sx.getString(HARVEST_SCHED_KEY); 607 } else { 608 return null; 609 } 610 } 611 612 /** 613 * Return the submit date of the job in this xml. 614 * 615 * @return the submit date of the job in this xml. 616 * @throws IOFailure if no harvestInfo exists or it is invalid. 617 */ 618 public String getJobSubmitDate() { 619 SimpleXml sx = read(); // reads and validates XML 620 return sx.getString(JOB_SUBMIT_DATE_KEY); 621 } 622 623 /** 624 * Return the performer information in this xml. 625 * 626 * @return the performer information in this xml or null if value undefined 627 * @throws IOFailure if no harvestInfo exists or it is invalid. 628 */ 629 public String getPerformer() { 630 SimpleXml sx = read(); // reads and validates XML 631 if (sx.hasKey(HARVEST_PERFORMER_KEY)) { 632 return sx.getString(HARVEST_PERFORMER_KEY); 633 } else { 634 return null; 635 } 636 } 637 638 /** 639 * Return the audience information in this xml. 640 * 641 * @return the audience information in this xml or null if value undefined 642 * @throws IOFailure if no harvestInfo exists or it is invalid. 643 */ 644 public String getAudience() { 645 SimpleXml sx = read(); // reads and validates XML 646 if (sx.hasKey(HARVEST_AUDIENCE_KEY)) { 647 return sx.getString(HARVEST_AUDIENCE_KEY); 648 } else { 649 return null; 650 } 651 } 652 653}