001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.datamodel;
024
025import java.io.BufferedWriter;
026import java.io.File;
027import java.io.FileWriter;
028import java.io.IOException;
029import java.io.OutputStream;
030import java.io.Serializable;
031import java.nio.charset.Charset;
032import java.util.List;
033
034import javax.servlet.jsp.JspWriter;
035
036import org.apache.commons.io.IOUtils;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import dk.netarkivet.common.exceptions.ArgumentNotValid;
041import dk.netarkivet.common.exceptions.IOFailure;
042import dk.netarkivet.common.exceptions.IllegalState;
043import dk.netarkivet.common.exceptions.NotImplementedException;
044import dk.netarkivet.common.utils.Settings;
045import dk.netarkivet.harvester.HarvesterSettings;
046
047/**
048 * Class encapsulating the Heritrix crawler-beans.cxml file 
049 * <p>
050 * 
051 * Heritrix3 has a new model based on spring, So the XPATH is no good for processing.
052 * Instead we use placeholders instead, marked by %{..} instead of ${..}, which is used by 
053 * Heritrix3 already.
054 * 
055 * The template is a H3 template if it contains the string: 
056 * 
057 * "xmlns="http://www.springframework.org/...."
058 * 
059 */
060public class H3HeritrixTemplate extends HeritrixTemplate implements Serializable {
061
062    private static final Logger log = LoggerFactory.getLogger(H3HeritrixTemplate.class);
063
064    private String template;
065    
066    /** QuotaEnforcer states for this template. TODO necessary?? */
067    private Long forceMaxbytesPerDomain;
068    private Long forceMaxobjectsPerDomain; 
069   
070    /** Has this HeritrixTemplate been verified. */
071    private boolean verified;
072
073    public final static String METADATA_ITEMS_PLACEHOLDER = "%{METADATA_ITEMS_PLACEHOLDER}";
074    public static final String MAX_TIME_SECONDS_PLACEHOLDER = "%{MAX_TIME_SECONDS_PLACEHOLDER}";
075    public static final String CRAWLERTRAPS_PLACEHOLDER = "%{CRAWLERTRAPS_PLACEHOLDER}";
076    
077    public static final String DEDUPLICATION_BEAN_REFERENCE_PATTERN = "<ref bean=\"DeDuplicator\"/>";
078    public static final String DEDUPLICATION_BEAN_PATTERN =  "<bean id=\"DeDuplicator\"";
079    public static final String DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER 
080        = "%{DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER}"; 
081
082    public static final String ARCHIVE_FILE_PREFIX_PLACEHOLDER = "%{ARCHIVE_FILE_PREFIX_PLACEHOLDER}";
083        
084    public static final String FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER 
085        = "%{FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER}";
086    
087    public static final String QUOTA_ENFORCER_GROUP_MAX_FETCH_SUCCES_PLACEHOLDER = 
088                "%{QUOTA_ENFORCER_GROUP_MAX_FETCH_SUCCES_PLACEHOLDER}";
089    
090    public static final String QUOTA_ENFORCER_MAX_BYTES_PLACEHOLDER 
091        = "%{QUOTA_ENFORCER_MAX_BYTES_PLACEHOLDER}"; 
092    
093    
094    // PLACEHOLDERS for archiver beans (Maybe not necessary)
095    final String ARCHIVER_BEAN_REFERENCE_PLACEHOLDER = "%{ARCHIVER_BEAN_REFERENCE_PLACEHOLDER}";        
096        final String ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER = "%{ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER}";
097        
098    /**
099     * Constructor for HeritrixTemplate class.
100     *
101     * @param doc the order.xml
102     * @param verify If true, verifies if the given dom4j Document contains the elements required by our software.
103     * @throws ArgumentNotValid if doc is null, or verify is true and doc does not obey the constraints required by our
104     * software.
105     */
106    public H3HeritrixTemplate(String template) {
107        ArgumentNotValid.checkNotNull(template, "String template");
108        this.template = template;
109    }
110    
111        /**
112     * return the template.
113     *
114     * @return the template
115     */
116    public HeritrixTemplate getTemplate() {
117        return this;
118    }
119
120    /**
121     * Has Template been verified?
122     *
123     * @return true, if verified on construction, otherwise false
124     */
125    public boolean isVerified() {
126        return verified;
127    }
128
129    /**
130     * Return HeritrixTemplate as XML.
131     * @return HeritrixTemplate as XML
132     */
133    @Override
134    public String getXML() {
135        return template;
136    }
137    
138    /**
139     * Update the maxTimeSeconds property in the heritrix3 template, if possible.
140     * @param maxJobRunningTimeSecondsL Force the harvestJob to end after this number of seconds 
141     * Property of the org.archive.crawler.framework.CrawlLimitEnforcer
142     * <!-- <property name="maxTimeSeconds" value="0" /> -->
143     */
144    @Override
145        public void setMaxJobRunningTime(Long maxJobRunningTimeSecondsL) {
146                if (template.contains(MAX_TIME_SECONDS_PLACEHOLDER)) {
147                this.template = template.replace(MAX_TIME_SECONDS_PLACEHOLDER, 
148                                Long.toString(maxJobRunningTimeSecondsL));
149                } else {
150                        log.warn("The placeholder '" + MAX_TIME_SECONDS_PLACEHOLDER 
151                                        + "' was not found in the template. Therefore maxRunningTime not set");
152                }
153        }
154
155    
156        @Override
157        public void setMaxBytesPerDomain(Long maxbytesL) {
158                this.forceMaxbytesPerDomain = maxbytesL;                
159        }       
160  
161
162        @Override
163        public Long getMaxBytesPerDomain() {
164                return this.forceMaxbytesPerDomain;
165        }
166
167        @Override
168        public void setMaxObjectsPerDomain(Long maxobjectsL) {
169                this.forceMaxobjectsPerDomain = maxobjectsL;
170        }
171
172        @Override
173        public Long getMaxObjectsPerDomain() {
174                return this.forceMaxobjectsPerDomain;
175        }
176    
177        @Override
178        public boolean isValid() {
179                /*
180                StringBuilder errors = new StringBuilder();
181                // check for Deduplication index-location placeholder and DEDUPLICATION_BEAN_PATTERN
182                if (template.contains(DEDUPLICATION_BEAN_PATTERN)) {
183                        if (!template.contains(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER)) {
184                                errors.append("Has DefdMissing placeholder '" +  DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER + "'"
185                        }
186                } 
187                template.contains(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER) 
188                && template.contains(deduplicationBeanPattern)
189                */
190                return true;
191        }
192
193        @Override
194        // This method is used to decide, whether to request a deduplication index or not.
195        // Done by checking, if both  
196        //   - a DeDuplicator bean is present in the template
197        // and
198        //   - a  DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER is also present.
199        // and 
200        //   - a DeDuplicator reference bean is present in the template
201        public boolean IsDeduplicationEnabled() {
202                return (template.contains(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER) 
203                                && template.contains(DEDUPLICATION_BEAN_PATTERN)
204                                && template.contains(DEDUPLICATION_BEAN_REFERENCE_PATTERN)); 
205        }       
206
207        /**
208     * Configuring the quota-enforcer, depending on budget definition. Object limit can be defined either by
209     * using the queue-total-budget property or the quota enforcer. Which is chosen is set by the argument
210     * maxObjectsIsSetByQuotaEnforcer}'s value. So quota enforcer is set as follows:
211     * If all values in the quotaEnforcer is infinity, it is in effect disabled
212     * <ul>
213     * <li>Object limit is not set by quota enforcer, disabled only if there is no byte limit.</li>
214     * <li>Object limit is set by quota enforcer, so it should be enabled if a byte or object limit is set.</li>
215     * </ul>
216     *
217     * @param maxObjectsIsSetByQuotaEnforcer Decides whether the maxObjectsIsSetByQuotaEnforcer or not.
218     * @param forceMaxBytesPerDomain The number of max bytes per domain enforced (can be no limit)
219     * @param forceMaxObjectsPerDomain The number of max objects per domain enforced (can be no limit)
220     */
221        public void configureQuotaEnforcer(boolean maxObjectsIsSetByQuotaEnforcer,
222                        long forceMaxBytesPerDomain, long forceMaxObjectsPerDomain) {
223                this.forceMaxobjectsPerDomain = forceMaxObjectsPerDomain;
224                this.forceMaxbytesPerDomain = forceMaxBytesPerDomain;
225                String tmp = template;
226                if (!maxObjectsIsSetByQuotaEnforcer) {
227                        // SetMaxObjects in the global budget to forceMaxObjectsPerDomain??
228                        String tmp1 = tmp.replace(
229                                        FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER, Long.toString( forceMaxObjectsPerDomain ));
230                        // SetMaxObjects to infinity in the quotaEnforcer
231                        tmp = tmp1.replace(QUOTA_ENFORCER_GROUP_MAX_FETCH_SUCCES_PLACEHOLDER, 
232                                        Long.toString(Constants.HERITRIX_MAXOBJECTS_INFINITY));
233                } else {
234                        // SetMaxObjects in the global budget to Infinity
235                        String tmp1 = tmp.replace(
236                                        FRONTIER_QUEUE_TOTAL_BUDGET_PLACEHOLDER, Long.toString( Constants.HERITRIX_MAXOBJECTS_INFINITY ));                      
237                        // SetMaxObjects to forceMaxObjectsPerDomain in the quotaEnforcer
238                        tmp = tmp1.replace(QUOTA_ENFORCER_GROUP_MAX_FETCH_SUCCES_PLACEHOLDER, 
239                                        Long.toString(forceMaxObjectsPerDomain));
240                }
241                
242                // SetMaxbytes in the QuotaEnforcer to forceMaxBytesPerDomain
243                // Divide by 1024 since Heritrix uses KB rather than bytes,
244                // and add 1 to avoid to low limit due to rounding.
245                String maxBytesStringValue = "-1";
246                if (forceMaxBytesPerDomain != Constants.HERITRIX_MAXBYTES_INFINITY) {
247                        maxBytesStringValue = Long.toString(( forceMaxBytesPerDomain 
248                                        / Constants.BYTES_PER_HERITRIX_BYTELIMIT_UNIT) + 1);
249                        log.debug("MaxbytesPerDomain set to {} Kbytes per domain", maxBytesStringValue);
250                } else {
251                        log.debug("MaxbytesPerDomain set to infinite number of Kbytes per domain");     
252                }
253                
254                this.template = tmp.replace(QUOTA_ENFORCER_MAX_BYTES_PLACEHOLDER, maxBytesStringValue);
255                
256        }
257        
258         /**
259     * Make sure that Heritrix will archive its data in the chosen archiveFormat.
260     *
261     * @param archiveFormat the chosen archiveformat ('arc' or 'warc' supported)
262     * @throw ArgumentNotValid If the chosen archiveFormat is not supported.
263     */
264        @Override
265        public void setArchiveFormat(String archiveFormat) {
266                if (!template.contains(ARCHIVER_BEAN_REFERENCE_PLACEHOLDER)){
267                throw new IllegalState("The placeholder '" + ARCHIVER_BEAN_REFERENCE_PLACEHOLDER 
268                                        + "' is missing. Unable to insert proper archive writer");
269        }
270        if (!template.contains(ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER)) {
271                        throw new IllegalState("The placeholder '" + ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER 
272                                        + "' is missing. Unable to insert proper archive writer");
273                }
274                if ("arc".equalsIgnoreCase(archiveFormat)) {
275                        log.debug("ARC format selected to be used by Heritrix3");
276                        setArcArchiveformat();
277                } else if ("warc".equalsIgnoreCase(archiveFormat)) {
278                        log.debug("WARC format selected to be used by Heritrix3");
279                        setWarcArchiveformat();
280                } else {
281                        throw new ArgumentNotValid("Configuration of '" + HarvesterSettings.HERITRIX_ARCHIVE_FORMAT
282                                        + "' is invalid! Unrecognized format '" + archiveFormat + "'.");
283                }
284        }
285
286        /**
287         * Set the archive-format as ARC. This means enabling the ARCWriterProcessor in the template
288         */
289        private void setArcArchiveformat(){
290                String arcWriterbeanReference = "<ref bean=\"arcWriter\"/>";
291        String templateNew = template.replace(ARCHIVER_BEAN_REFERENCE_PLACEHOLDER, arcWriterbeanReference);
292        template = templateNew.replace(ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER, getArcWriterProcessor()); 
293    }
294                
295        private String getArcWriterProcessor() {
296                
297//      <bean id="arcWriter" class="org.archive.modules.writer.ARCWriterProcessor">
298//        <!-- <property name="compress" value="true" /> -->
299//        <!-- <property name="prefix" value="IAH" /> -->
300//        <!-- <property name="suffix" value="${HOSTNAME}" /> -->
301//        <!-- <property name="maxFileSizeBytes" value="100000000" /> -->
302//        <!-- <property name="poolMaxActive" value="1" /> -->
303//        <!-- <property name="poolMaxWaitMs" value="300000" /> -->
304//        <!-- <property name="skipIdenticalDigests" value="false" /> -->
305//        <!-- <property name="maxTotalBytesToWrite" value="0" /> -->
306//        <!-- <property name="directory" value="." /> -->
307//        <!-- <property name="storePaths">
308//              <list>
309//               <value>arcs</value>
310//              </list>
311//             </property> -->
312//       </bean>
313// 
314           String arcWriterBean 
315                = "<bean id=\"arcWriter\" class=\"org.archive.modules.writer.ARCWriterProcessor\">";
316           // TODO Read compress value from heritrix3Settings
317           arcWriterBean += "\n<property name=\"compress\" value=\"false\"/>"
318                         + "\n<property name=\"prefix\" value=\"" + ARCHIVE_FILE_PREFIX_PLACEHOLDER  
319                        + "\"/></bean>";
320           return arcWriterBean;                              
321        }
322
323                
324        /** 
325         * Insert WARC-archiver beans and remove placeholder for ARC-Archiver-beans
326         * It is an error, if the WARC place-holders doesnt't exist.
327         * It is not an error, if the property placeholder does not exist.
328         */
329        private void setWarcArchiveformat() {           
330                String warcWriterbeanReference = "<ref bean=\"warcWriter\"/>";
331                String warcWriterProcessorBean = "<bean id=\"warcWriter\" class=\"dk.netarkivet.harvester.harvesting.NasWARCProcessor\">";
332                String propertyName="\n<property name=\"";
333                String valuePrefix = "\" value=\"";
334                String valueSuffix = "\"";
335                String propertyEnd="/>";
336                if (!template.contains(ARCHIVER_BEAN_REFERENCE_PLACEHOLDER)) {
337                        throw new IllegalState("The placeholder '" + ARCHIVER_BEAN_REFERENCE_PLACEHOLDER 
338                                        + "' is missing");
339                }
340                if (!template.contains(ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER)) {
341                        throw new IllegalState("The placeholder '" + ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER 
342                                        + "' is missing");
343                }
344                StringBuilder propertyBuilder = new StringBuilder();
345                // TODO Read template from Heritrix3Settings
346                propertyBuilder.append(propertyName + "template" + valuePrefix 
347                                + "${prefix}-${timestamp17}-${serialno}-${heritrix.hostname}"
348                                // Default value: ${prefix}-${timestamp17}-${serialno}-${heritrix.pid}~${heritrix.hostname}~${heritrix.port}
349                                + valueSuffix + propertyEnd);
350                propertyBuilder.append(propertyName + "compress" + valuePrefix + "false"  // TODO Replace false by Heritrix3Settingsvalue 
351                                + valueSuffix + propertyEnd);
352                propertyBuilder.append(propertyName + "prefix" + valuePrefix 
353                                + ARCHIVE_FILE_PREFIX_PLACEHOLDER
354                                + valueSuffix + propertyEnd);
355                propertyBuilder.append(propertyName + "writeRequests" + valuePrefix 
356                                + Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REQUESTS)
357                                + valueSuffix + propertyEnd);
358                propertyBuilder.append(propertyName + "writeMetadata" + valuePrefix 
359                                + Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_METADATA)
360                                + valueSuffix + propertyEnd);
361 /*
362                propertyBuilder.append(propertyName + "writeRevisitForIdenticalDigests" + valuePrefix 
363                                + Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS)
364                                + valueSuffix + propertyEnd);
365                propertyBuilder.append(propertyName + "writeRevisitForNotModified" + valuePrefix 
366                                + Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_NOT_MODIFIED)
367                                + valueSuffix + propertyEnd);
368  */
369                propertyBuilder.append(propertyName + "skipIdenticalDigests" + valuePrefix 
370                                + Settings.get(HarvesterSettings.HERITRIX_WARC_SKIP_IDENTICAL_DIGESTS)
371                                + valueSuffix + propertyEnd);
372                propertyBuilder.append(         
373                          propertyName + "startNewFilesOnCheckpoint" + valuePrefix 
374                                + Settings.get(HarvesterSettings.HERITRIX_WARC_START_NEW_FILES_ON_CHECKPOINT)
375                                + valueSuffix + propertyEnd);
376                
377                warcWriterProcessorBean += propertyBuilder.toString();
378                warcWriterProcessorBean += "\n\n%{METADATA_ITEMS_PLACEHOLDER}\n</bean>";
379                String templateNew = template.replace(
380                                ARCHIVER_BEAN_REFERENCE_PLACEHOLDER, warcWriterbeanReference);
381                this.template = templateNew.replace(ARCHIVER_PROCESSOR_BEAN_PLACEHOLDER,
382                                warcWriterProcessorBean);
383        }
384
385        @Override
386        /**
387         * With H3 template, we insert the crawlertraps into the template at once.
388         * They are inserted to be part of a org.archive.modules.deciderules.MatchesListRegexDecideRule
389         * bean.
390         * 
391         * @param elementName The elementName is currently not used with H3
392         * @param crawlertraps A list of crawlertraps to be inserted
393         */
394        public void insertCrawlerTraps(String elementName, List<String> crawlertraps) {
395//      <bean class="org.archive.modules.deciderules.MatchesListRegexDecideRule">
396//      <!-- <property name="listLogicalOr" value="true" /> -->
397//      <!-- <property name="regexList">
398//            <list>
399//            CRAWLERTRAPS_PLACEHOLDER 
400//            </list>
401//           </property> -->
402//     </bean>
403        if (crawlertraps.isEmpty()) {
404                log.debug("No crawlertraps yet. No insertion is done");
405                return;
406        } else if (!template.contains(CRAWLERTRAPS_PLACEHOLDER)) {      
407                log.warn("The placeholder '" + CRAWLERTRAPS_PLACEHOLDER 
408                                + "' is absent from the template. No insertion is done at all. {} traps were ignored", 
409                                crawlertraps);
410                return;
411        } else {
412                log.info("Inserting {} crawlertraps into the template", crawlertraps.size());
413                StringBuilder sb = new StringBuilder();
414                for (String trap: crawlertraps) {
415                        sb.append("<value>" + trap + "</value>\n");
416                }
417                // Adding the placeholder again to be able to insert crawlertraps multiple times.
418                sb.append(CRAWLERTRAPS_PLACEHOLDER + "\n"); 
419                String templateNew = template.replace(CRAWLERTRAPS_PLACEHOLDER, sb.toString());
420                this.template = templateNew;
421        }
422        }
423
424        @Override
425        public void writeTemplate(OutputStream os) throws IOFailure {
426                try {
427                        os.write(template.getBytes(Charset.forName("UTF-8")));
428                } catch (IOException e) {
429                        throw new IOFailure("Unable to write template to outputstream", e);
430                }
431                
432        }
433
434        @Override
435        public boolean hasContent() {
436                throw new NotImplementedException("The hasContent method hasn't been implemented yet");
437        }
438
439        @Override
440        public void writeToFile(File orderXmlFile) {
441                BufferedWriter writer = null;
442                try {
443                        writer = new BufferedWriter( new FileWriter(orderXmlFile));
444                        writer.write(template);
445                } catch(IOException e) {
446                        throw new IOFailure("Unable to write template to file '" + orderXmlFile.getAbsolutePath() + "'.", e);
447                } finally {
448                        IOUtils.closeQuietly(writer);
449                }
450        }
451
452        @Override
453        public void setRecoverlogNode(File recoverlogGzFile) {
454                throw new NotImplementedException("This method has not yet been implemented");
455                
456        }
457
458        @Override
459        public void setDeduplicationIndexLocation(String absolutePath) {
460                if (!template.contains(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER)) {
461                        throw new IllegalState("The placeholder for the deduplication index location property '" +  DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER 
462                                        + "' was not found. Maybe the placeholder has already been replaced with the correct value: " 
463                                        + template);
464                }
465        String templateNew = template.replace(DEDUPLICATION_INDEX_LOCATION_PLACEHOLDER, absolutePath); 
466        this.template = templateNew;
467        }
468
469        @Override
470        public void setSeedsFilePath(String absolutePath) {
471         log.debug("Note: SeedsFilePath is not set in h3");
472        }
473
474        @Override
475        public void setArchiveFilePrefix(String archiveFilePrefix) {
476                if (!template.contains(ARCHIVE_FILE_PREFIX_PLACEHOLDER)) {
477                        throw new IllegalState("The placeholder for the archive file prefix property '" 
478                                        + ARCHIVE_FILE_PREFIX_PLACEHOLDER 
479                                        + "' was not found. Maybe the placeholder has already been replaced with the correct value. The template looks like this: " 
480                                        + template);
481                }
482                String templateNew = template.replace(ARCHIVE_FILE_PREFIX_PLACEHOLDER, archiveFilePrefix);              
483        this.template = templateNew;
484                
485        }
486
487        @Override
488        public void setDiskPath(String absolutePath) {
489                // NOP
490                log.warn("The DiskPath is not settable in the H3 template");
491        }
492
493        @Override
494        public void removeDeduplicatorIfPresent() {
495                //NOP
496                log.warn("Removing the Deduplicator is not possible with the H3 templates and should not be required with the H3 template.");
497        }
498        
499//<property name="metadataItems">
500//  <map>
501//        <entry key="harvestInfo.version" value="1.03"/> <!-- TODO maybe not add this one -->
502//        <entry key="harvestInfo.jobId" value="1"/>
503//        <entry key="harvestInfo.channel" value="HIGH"/>
504//        <entry key="harvestInfo.harvestNum" value="1"/>
505//        <entry key="harvestInfo.origHarvestDefinitionID" value="1"/>
506//        <entry key="harvestInfo.maxBytesPerDomain" value="100000"/>
507//        <entry key="harvestInfo.maxObjectsPerDomain" value="-1"/>
508//        <entry key="harvestInfo.orderXMLName" value="defaultOrderXml"/>
509//        <entry key="harvestInfo.origHarvestDefinitionName" value="ddddddddd"/>
510//        <entry key="harvestInfo.scheduleName" value="EveryHour"/> <!-- Optional. only relevant for Selective Harvests -->
511//        <entry key="harvestInfo.harvestFilenamePrefix" value="netarkivet-1-1"/>
512//        <entry key="harvestInfo.jobSubmitDate" value="22. 10. 2014"/>
513//        <entry key="harvestInfo.performer" value="performer"/> <!-- Optional. -->
514//        <entry key="harvestInfo.audience" value="audience"/> <!-- Optional. -->
515//  </map>
516//  </property>
517
518        public void insertWarcInfoMetadata(Job ajob, String origHarvestdefinitionName, 
519                        String scheduleName, String performer) {
520                if (!template.contains(METADATA_ITEMS_PLACEHOLDER)) {
521                        throw new IllegalState("The placeholder for the property '" + METADATA_ITEMS_PLACEHOLDER  
522                                        + "' was not found. Maybe the placeholder has already been replaced with the correct value. The template looks like this: " 
523                                        + template); 
524                }
525                String startMetadataEntry = "\n<entry key=\"";
526                String endMetadataEntry = "\"/>";
527                String valuePart = "\" value=\"";
528                StringBuilder sb = new StringBuilder();
529                sb.append("<property name=\"metadataItems\">\n<map>\n");
530                
531                // <entry key="harvestInfo.version" value="1.03"/>
532                
533                sb.append(startMetadataEntry);
534                sb.append(HARVESTINFO_VERSION + valuePart + HARVESTINFO_VERSION_NUMBER + endMetadataEntry); 
535                sb.append(startMetadataEntry);
536                sb.append(HARVESTINFO_JOBID + valuePart + ajob.getJobID() + endMetadataEntry);
537
538                sb.append(startMetadataEntry);
539                sb.append(HARVESTINFO_CHANNEL + valuePart + ajob.getChannel() + endMetadataEntry);
540                sb.append(startMetadataEntry);
541                sb.append(HARVESTINFO_HARVESTNUM + valuePart + ajob.getHarvestNum() + endMetadataEntry);
542                sb.append(startMetadataEntry);
543                sb.append(HARVESTINFO_ORIGHARVESTDEFINITIONID + valuePart + ajob.getOrigHarvestDefinitionID() + endMetadataEntry);
544                sb.append(startMetadataEntry);
545                sb.append(HARVESTINFO_MAXBYTESPERDOMAIN + valuePart + ajob.getMaxBytesPerDomain() + endMetadataEntry);
546                sb.append(startMetadataEntry);
547                sb.append(HARVESTINFO_MAXOBJECTSPERDOMAIN + valuePart + ajob.getMaxObjectsPerDomain() + endMetadataEntry);
548                sb.append(startMetadataEntry);
549                sb.append(HARVESTINFO_ORDERXMLNAME + valuePart + ajob.getOrderXMLName() + endMetadataEntry);
550                sb.append(startMetadataEntry);
551                sb.append(HARVESTINFO_ORIGHARVESTDEFINITIONNAME + valuePart + 
552                                origHarvestdefinitionName + endMetadataEntry);
553                
554                /* optional schedule-name. */
555                if (scheduleName != null) {
556                        sb.append(startMetadataEntry);
557                        sb.append(HARVESTINFO_SCHEDULENAME + valuePart + scheduleName + endMetadataEntry);
558                }
559                sb.append(startMetadataEntry);
560                sb.append(HARVESTINFO_HARVESTFILENAMEPREFIX + valuePart + ajob.getHarvestFilenamePrefix() + endMetadataEntry);
561                sb.append(startMetadataEntry);
562                sb.append(HARVESTINFO_JOBSUBMITDATE + valuePart + ajob.getSubmittedDate() + endMetadataEntry);
563                
564                /* optional HARVESTINFO_PERFORMER */
565                if (performer != null){
566                        sb.append(startMetadataEntry);
567                        sb.append(HARVESTINFO_PERFORMER + valuePart + performer  + endMetadataEntry);
568                }
569                
570                /* optional HARVESTINFO_PERFORMER */
571                if (ajob.getHarvestAudience() != null) {
572                        sb.append(startMetadataEntry);
573                        sb.append(HARVESTINFO_AUDIENCE + valuePart + ajob.getHarvestAudience() + endMetadataEntry);
574                }
575                sb.append("\n</map>\n</property>\n");
576                
577                // Replace command
578                String templateNew = template.replace(METADATA_ITEMS_PLACEHOLDER, sb.toString());
579                this.template = templateNew;
580        }
581        
582        @Override
583        public void writeTemplate(JspWriter out) throws IOFailure {
584                try {
585                        out.write(template);
586                } catch (IOException e) {
587                        throw new IOFailure("Unable to write to JspWriter", e);
588                }
589        }
590        
591        /**
592         *  Hack to remove existing placeholders, that is still present after template 
593         *  manipulation is completed.
594         */
595        public void removePlaceholders() {
596                template = template.replace(METADATA_ITEMS_PLACEHOLDER, "");
597                template = template.replace(CRAWLERTRAPS_PLACEHOLDER, "");
598                
599                if (template.contains(METADATA_ITEMS_PLACEHOLDER)) {
600                        throw new IllegalState("The placeholder for the property '" + METADATA_ITEMS_PLACEHOLDER  
601                                        + "' should have been deleted now."); 
602                }
603                if (template.contains(CRAWLERTRAPS_PLACEHOLDER)) {
604                        throw new IllegalState("The placeholder for the property '" + CRAWLERTRAPS_PLACEHOLDER  
605                                        + "' should have been deleted now."); 
606                }               
607        }
608}