package dk.netarkivet.harvester.datamodel;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.exceptions.UnknownID;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.XmlUtils;
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.datamodel.eav.EAV;
import dk.netarkivet.harvester.harvesting.report.Heritrix1Constants;
import dk.netarkivet.harvester.webinterface.ExtendedFieldConstants;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.servlet.jsp.JspWriter;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Node;
import org.dom4j.io.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/harvester/datamodel/H1HeritrixTemplate.class */
public class H1HeritrixTemplate extends HeritrixTemplate implements Serializable {
    private Document template;
    private boolean verified;
    public static final String QUOTA_ENFORCER_ENABLED_XPATH = "/crawl-order/controller/map[@name='pre-fetch-processors']/newObject[@name='QuotaEnforcer']/boolean[@name='enabled']";
    public static final String GROUP_MAX_ALL_KB_XPATH = "/crawl-order/controller/map[@name='pre-fetch-processors']/newObject[@name='QuotaEnforcer']/long[@name='group-max-all-kb']";
    public static final String GROUP_MAX_FETCH_SUCCESS_XPATH = "/crawl-order/controller/map[@name='pre-fetch-processors']/newObject[@name='QuotaEnforcer']/long[@name='group-max-fetch-successes']";
    public static final String QUEUE_TOTAL_BUDGET_XPATH = "/crawl-order/controller/newObject[@name='frontier']/long[@name='queue-total-budget']";
    public static final String DECIDERULES_MAP_XPATH = "/crawl-order/controller/newObject/newObject[@name='decide-rules']/map[@name='rules']";
    public static final String DECIDERULES_ACCEPT_IF_PREREQUISITE_XPATH = "/crawl-order/controller/newObject/newObject[@name='decide-rules']/map[@name='rules']/newObject[@class='org.archive.crawler.deciderules.PrerequisiteAcceptDecideRule']";
    public static final String HERITRIX_USER_AGENT_XPATH = "/crawl-order/controller/map[@name='http-headers']/string[@name='user-agent']";
    public static final String HERITRIX_FROM_XPATH = "/crawl-order/controller/map[@name='http-headers']/string[@name='from']";
    public static final String DECIDINGSCOPE_XPATH = "/crawl-order/controller/newObject[@name='scope'][@class='org.archive.crawler.deciderules.DecidingScope']";
    public static final String DEDUPLICATOR_XPATH = "/crawl-order/controller/map[@name='write-processors']/newObject[@name='DeDuplicator']";
    public static final String ARC_ARCHIVER_PATH_XPATH = "/crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']/stringList[@name='path']/string";
    public static final String WARC_ARCHIVER_PATH_XPATH = "/crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/stringList[@name='path']/string";
    public static final String DEDUPLICATOR_INDEX_LOCATION_XPATH = "/crawl-order/controller/map[@name='write-processors']/newObject[@name='DeDuplicator']/string[@name='index-location']";
    public static final String DEDUPLICATOR_ENABLED = "/crawl-order/controller/map[@name='write-processors']/newObject[@name='DeDuplicator']/boolean[@name='enabled']";
    public static final String DISK_PATH_XPATH = "//crawl-order/controller/string[@name='disk-path']";
    public static final String ARCHIVEFILE_PREFIX_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject/string[@name='prefix']";
    public static final String ARCSDIR_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']/stringList[@name='path']/string";
    public static final String WARCWRITERPROCESSOR_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']";
    public static final String ARCWRITERPROCESSOR_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']";
    public static final String WARCSDIR_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/stringList[@name='path']/string";
    public static final String SEEDS_FILE_XPATH = "//crawl-order/controller/newObject[@name='scope']/string[@name='seedsfile']";
    public static final String ARCS_ENABLED_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']/boolean[@name='enabled']";
    public static final String WARCS_ENABLED_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='enabled']";
    public static final String WARCS_WRITE_REQUESTS_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='write-requests']";
    public static final String WARCS_WRITE_METADATA_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='write-metadata']";
    public static final String WARCS_WRITE_METADATA_OUTLINKS_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='write-metadata-outlinks']";
    public static final String WARCS_SKIP_IDENTICAL_DIGESTS_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='skip-identical-digests']";
    public static final String WARCS_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='write-revisit-for-identical-digests']";
    public static final String WARCS_WRITE_REVISIT_FOR_NOT_MODIFIED_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='write-revisit-for-not-modified']";
    public static final String METADATA_ITEMS_XPATH = "//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/map[@name='metadata-items']";
    private static final String WHOLE_NUMBER_REGEXP = "\\s*-?[0-9]+\\s*";
    private static final String EVERYTHING_REGEXP = ".*";
    private static final String USER_AGENT_REGEXP = "\\S+.*\\(.*\\+http(s)?://\\S+\\.\\S+.*\\).*";
    private static final String FROM_REGEXP = "\\S+@\\S+\\.\\S+";
    public static final String MAXTIMESEC_PATH_XPATH = "/crawl-order/controller/long[@name='max-time-sec']";
    private static final Logger log = LoggerFactory.getLogger(H1HeritrixTemplate.class);
    private static final Map<String, Pattern> requiredXpaths = new HashMap();

    public H1HeritrixTemplate(Document document, boolean z) {
        ArgumentNotValid.checkNotNull(document, "Document doc");
        if (z) {
            for (Map.Entry<String, Pattern> entry : requiredXpaths.entrySet()) {
                String key = entry.getKey();
                Node selectSingleNode = document.selectSingleNode(key);
                ArgumentNotValid.checkTrue(selectSingleNode != null, "Template error: Missing node: " + key + ". The template looks like this: " + document.asXML());
                Pattern value = entry.getValue();
                ArgumentNotValid.checkTrue(value.matcher(selectSingleNode.getText().trim()).matches(), "Template error: Value '" + selectSingleNode.getText() + "' of node '" + key + "' does not match required regexp '" + value + "'. The template looks like this: " + document.asXML());
            }
            this.verified = true;
            int i = 0;
            Node selectSingleNode2 = document.selectSingleNode(ARC_ARCHIVER_PATH_XPATH);
            if (selectSingleNode2 != null) {
                Pattern compile = Pattern.compile("arcs");
                ArgumentNotValid.checkTrue(compile.matcher(selectSingleNode2.getText().trim()).matches(), "Template error: Value '" + selectSingleNode2.getText() + "' of node '" + ARC_ARCHIVER_PATH_XPATH + "' does not match required regexp '" + compile + "'");
                i = 0 + 1;
            }
            Node selectSingleNode3 = document.selectSingleNode(WARC_ARCHIVER_PATH_XPATH);
            if (selectSingleNode3 != null) {
                Pattern compile2 = Pattern.compile("warcs");
                ArgumentNotValid.checkTrue(compile2.matcher(selectSingleNode3.getText().trim()).matches(), "Template error: Value '" + selectSingleNode3.getText() + "' of node '" + WARC_ARCHIVER_PATH_XPATH + "' does not match required regexp '" + compile2 + "'");
                i++;
            }
            ArgumentNotValid.checkTrue(i > 0, "Template error: An ARC or WARC writer processor seems to be missing");
        }
        this.template = (Document) document.clone();
    }

    public H1HeritrixTemplate(Document document) {
        this(document, true);
    }

    public H1HeritrixTemplate(long j, String str) throws DocumentException {
        ArgumentNotValid.checkNotNull(str, "String template");
        this.template_id = j;
        this.template = XmlUtils.documentFromString(str);
    }

    public Document getTemplate() {
        return (Document) this.template.clone();
    }

    public boolean isVerified() {
        return this.verified;
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public String getXML() {
        return this.template.asXML();
    }

    public static void editOrderXMLAddCrawlerTraps(Document document, String str, List<String> list) {
        if (list.size() == 0) {
            return;
        }
        Element selectSingleNode = document.selectSingleNode(DECIDERULES_MAP_XPATH);
        if (selectSingleNode == null || !(selectSingleNode instanceof Element)) {
            throw new IllegalState("Unable to update order.xml document. It does not have the right form to addcrawler trap deciderules.");
        }
        Element element = selectSingleNode;
        Element addElement = element.addElement("newObject");
        Node selectSingleNode2 = document.selectSingleNode(DECIDERULES_ACCEPT_IF_PREREQUISITE_XPATH);
        if (selectSingleNode2 != null) {
            List elements = element.elements();
            int indexOf = elements.indexOf(selectSingleNode2);
            addElement.detach();
            elements.add(indexOf, addElement);
        } else {
            element.elements().size();
        }
        addElement.addAttribute("name", str);
        addElement.addAttribute("class", Heritrix1Constants.MATCHESLISTREGEXPDECIDERULE_CLASSNAME);
        Element addElement2 = addElement.addElement("string");
        addElement2.addAttribute("name", "decision");
        addElement2.addText("REJECT");
        Element addElement3 = addElement.addElement("string");
        addElement3.addAttribute("name", "list-logic");
        addElement3.addText("OR");
        Element addElement4 = addElement.addElement("stringList");
        addElement4.addAttribute("name", "regexp-list");
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            addElement4.addElement("string").addText(it.next());
        }
    }

    private static void setIfFound(Document document, String str, String str2, String str3) {
        if (document.selectSingleNode(str) != null) {
            XmlUtils.setNode(document, str, str3);
        } else {
            log.warn("Could not replace setting value of '" + str2 + "' in template. Xpath not found: " + str);
        }
    }

    public static void editOrderXML_maxObjectsPerDomain(Document document, long j, boolean z) {
        String str = z ? GROUP_MAX_FETCH_SUCCESS_XPATH : QUEUE_TOTAL_BUDGET_XPATH;
        Node selectSingleNode = document.selectSingleNode(str);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate " + str + " element in order.xml: " + document.asXML());
        }
        selectSingleNode.setText(String.valueOf(j));
    }

    public static void editOrderXML_configureQuotaEnforcer(Document document, boolean z, long j, long j2) {
        boolean z2;
        if (z) {
            z2 = (j2 == -1 && j == -1) ? false : true;
        } else {
            z2 = j != -1;
        }
        Node selectSingleNode = document.selectSingleNode(QUOTA_ENFORCER_ENABLED_XPATH);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate " + QUOTA_ENFORCER_ENABLED_XPATH + " element in order.xml: " + document.asXML());
        }
        selectSingleNode.setText(Boolean.toString(z2));
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public boolean isValid() {
        return true;
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void configureQuotaEnforcer(boolean z, long j, long j2) {
        boolean z2;
        Document document = this.template;
        if (z) {
            z2 = (j2 == -1 && j == -1) ? false : true;
        } else {
            z2 = j != -1;
        }
        Node selectSingleNode = document.selectSingleNode(QUOTA_ENFORCER_ENABLED_XPATH);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate " + QUOTA_ENFORCER_ENABLED_XPATH + " element in order.xml: " + document.asXML());
        }
        selectSingleNode.setText(Boolean.toString(z2));
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setMaxBytesPerDomain(Long l) {
        Node selectSingleNode = this.template.selectSingleNode(GROUP_MAX_ALL_KB_XPATH);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate QuotaEnforcer object in order.xml: " + this.template.asXML());
        }
        if (l.longValue() == 0) {
            selectSingleNode.setText(ExtendedFieldConstants.FALSE);
        } else if (l.longValue() != -1) {
            selectSingleNode.setText(Long.toString((l.longValue() / Constants.BYTES_PER_HERITRIX_BYTELIMIT_UNIT) + 1));
        } else {
            selectSingleNode.setText(String.valueOf(-1L));
        }
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public Long getMaxBytesPerDomain() {
        return null;
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setMaxObjectsPerDomain(Long l) {
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public Long getMaxObjectsPerDomain() {
        return null;
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public boolean IsDeduplicationEnabled() {
        Node selectSingleNode = this.template.selectSingleNode(DEDUPLICATOR_ENABLED);
        return selectSingleNode != null && selectSingleNode.getText().trim().equals(dk.netarkivet.harvester.webinterface.Constants.TRUE);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setArchiveFormat(String str) {
        Document document = this.template;
        boolean z = false;
        boolean z2 = false;
        if ("arc".equalsIgnoreCase(str)) {
            z = true;
            log.debug("ARC format selected to be used by Heritrix");
        } else {
            if (!"warc".equalsIgnoreCase(str)) {
                throw new ArgumentNotValid("Configuration of '" + HarvesterSettings.HERITRIX_ARCHIVE_FORMAT + "' is invalid! Unrecognized format '" + str + "'.");
            }
            z2 = true;
            log.debug("WARC format selected to be used by Heritrix");
        }
        if (z) {
            if (document.selectSingleNode(ARCSDIR_XPATH) == null || document.selectSingleNode(ARCS_ENABLED_XPATH) == null) {
                throw new IllegalState("Unable to choose ARC as Heritrix archive format because  one of the following xpaths are invalid in the given order.xml: //crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']/stringList[@name='path']/string,//crawl-order/controller/map[@name='write-processors']/newObject[@name='Archiver']/boolean[@name='enabled']");
            }
            XmlUtils.setNode(document, ARCSDIR_XPATH, "arcs");
            XmlUtils.setNode(document, ARCS_ENABLED_XPATH, dk.netarkivet.harvester.webinterface.Constants.TRUE);
            if (document.selectSingleNode(WARCS_ENABLED_XPATH) != null) {
                XmlUtils.setNode(document, WARCS_ENABLED_XPATH, dk.netarkivet.harvester.webinterface.Constants.FALSE);
                return;
            }
            return;
        }
        if (!z2) {
            throw new IllegalState("Unknown state: Should have selected either ARC or WARC as heritrix archive format");
        }
        if (document.selectSingleNode(WARCSDIR_XPATH) == null || document.selectSingleNode(WARCS_ENABLED_XPATH) == null) {
            throw new IllegalState("Unable to choose WARC as Heritrix archive format because  one of the following xpaths are invalid in the given order.xml: //crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/stringList[@name='path']/string,//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/boolean[@name='enabled']. order.xml: " + document.asXML());
        }
        XmlUtils.setNode(document, WARCSDIR_XPATH, "warcs");
        XmlUtils.setNode(document, WARCS_ENABLED_XPATH, dk.netarkivet.harvester.webinterface.Constants.TRUE);
        if (document.selectSingleNode(ARCS_ENABLED_XPATH) != null) {
            XmlUtils.setNode(document, ARCS_ENABLED_XPATH, dk.netarkivet.harvester.webinterface.Constants.FALSE);
        }
        String str2 = null;
        try {
            str2 = Settings.get(HarvesterSettings.HERITRIX_WARC_PARAMETERS_OVERRIDE);
        } catch (UnknownID e) {
        }
        if (str2 == null || (str2 != null && dk.netarkivet.harvester.webinterface.Constants.TRUE.equals(str2))) {
            setIfFound(document, WARCS_SKIP_IDENTICAL_DIGESTS_XPATH, HarvesterSettings.HERITRIX_WARC_SKIP_IDENTICAL_DIGESTS, Settings.get(HarvesterSettings.HERITRIX_WARC_SKIP_IDENTICAL_DIGESTS));
            setIfFound(document, WARCS_WRITE_METADATA_XPATH, HarvesterSettings.HERITRIX_WARC_WRITE_METADATA, Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_METADATA));
            setIfFound(document, WARCS_WRITE_METADATA_OUTLINKS_XPATH, HarvesterSettings.HERITRIX_WARC_WRITE_METADATA_OUTLINKS, Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_METADATA_OUTLINKS));
            setIfFound(document, WARCS_WRITE_REQUESTS_XPATH, HarvesterSettings.HERITRIX_WARC_WRITE_REQUESTS, Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REQUESTS));
            setIfFound(document, WARCS_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS_XPATH, HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS, Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS));
            setIfFound(document, WARCS_WRITE_REVISIT_FOR_NOT_MODIFIED_XPATH, HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_NOT_MODIFIED, Settings.get(HarvesterSettings.HERITRIX_WARC_WRITE_REVISIT_FOR_NOT_MODIFIED));
        }
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setMaxJobRunningTime(Long l) {
        Node selectSingleNode = this.template.selectSingleNode(MAXTIMESEC_PATH_XPATH);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate xpath '" + MAXTIMESEC_PATH_XPATH + "' in the order.xml: " + this.template.asXML());
        }
        String text = selectSingleNode.getText();
        selectSingleNode.setText(Long.toString(l.longValue()));
        log.trace("Value of groupMaxTimeSecNode changed from " + text + " to " + l);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void writeTemplate(OutputStream outputStream) throws IOException, ArgumentNotValid {
        try {
            new XMLWriter(outputStream).write(this.template);
        } catch (UnsupportedEncodingException e) {
            log.error("The encoding of this template is unsupported by this environment", e);
            throw new ArgumentNotValid("The encoding of this template is unsupported by this environment", e);
        }
    }

    public String getText() {
        return this.template.getText();
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void insertCrawlerTraps(String str, List<String> list) {
        if (list.size() == 0) {
            return;
        }
        Element selectSingleNode = this.template.selectSingleNode(DECIDERULES_MAP_XPATH);
        if (selectSingleNode == null || !(selectSingleNode instanceof Element)) {
            throw new IllegalState("Unable to update order.xml document. It does not have the right form to addcrawler trap deciderules.");
        }
        Element element = selectSingleNode;
        Element addElement = element.addElement("newObject");
        Node selectSingleNode2 = this.template.selectSingleNode(DECIDERULES_ACCEPT_IF_PREREQUISITE_XPATH);
        if (selectSingleNode2 != null) {
            List elements = element.elements();
            int indexOf = elements.indexOf(selectSingleNode2);
            addElement.detach();
            elements.add(indexOf, addElement);
        } else {
            element.elements().size();
        }
        addElement.addAttribute("name", str);
        addElement.addAttribute("class", Heritrix1Constants.MATCHESLISTREGEXPDECIDERULE_CLASSNAME);
        Element addElement2 = addElement.addElement("string");
        addElement2.addAttribute("name", "decision");
        addElement2.addText("REJECT");
        Element addElement3 = addElement.addElement("string");
        addElement3.addAttribute("name", "list-logic");
        addElement3.addText("OR");
        Element addElement4 = addElement.addElement("stringList");
        addElement4.addAttribute("name", "regexp-list");
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            addElement4.addElement("string").addText(it.next());
        }
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public boolean hasContent() {
        return this.template.hasContent();
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void writeToFile(File file) {
        XmlUtils.writeXmlToFile(this.template, file);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setRecoverlogNode(File file) {
        Node selectSingleNode = this.template.selectSingleNode("/crawl-order/controller/string[@name='recover-path']");
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate the '/crawl-order/controller/string[@name='recover-path']' element in order.xml: " + this.template.asXML());
        }
        selectSingleNode.setText(file.getAbsolutePath());
        log.debug("The Heritrix recover path now refers to '{}'.", file.getAbsolutePath());
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setDeduplicationIndexLocation(String str) {
        XmlUtils.setNode(this.template, DEDUPLICATOR_INDEX_LOCATION_XPATH, str);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setSeedsFilePath(String str) {
        XmlUtils.setNode(this.template, SEEDS_FILE_XPATH, str);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setArchiveFilePrefix(String str) {
        XmlUtils.setNodes(this.template, ARCHIVEFILE_PREFIX_XPATH, str);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void setDiskPath(String str) {
        XmlUtils.setNode(this.template, DISK_PATH_XPATH, str);
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void removeDeduplicatorIfPresent() {
        Node selectSingleNode = this.template.selectSingleNode(DEDUPLICATOR_XPATH);
        if (selectSingleNode != null) {
            selectSingleNode.detach();
        }
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void enableOrDisableDeduplication(boolean z) {
        log.debug("In H1 templates we don't enable/disable deduplication.");
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void insertWarcInfoMetadata(Job job, String str, String str2, String str3, String str4) {
        Element selectSingleNode = this.template.selectSingleNode(WARCWRITERPROCESSOR_XPATH);
        if (selectSingleNode == null) {
            throw new IOFailure("Unable to locate the '//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']' element in order.xml: " + this.template.asXML());
        }
        Element addElement = selectSingleNode.addElement("map");
        addElement.addAttribute("name", "metadata-items");
        Element addElement2 = addElement.addElement("string");
        addElement2.addAttribute("name", "harvestInfo.version");
        addElement2.addText("0.6");
        Element addElement3 = addElement.addElement("string");
        addElement3.addAttribute("name", "harvestInfo.jobId");
        addElement3.addText("" + job.getJobID());
        Element addElement4 = addElement.addElement("string");
        addElement4.addAttribute("name", "harvestInfo.channel");
        addElement4.addText(job.getChannel());
        Element addElement5 = addElement.addElement("string");
        addElement5.addAttribute("name", "harvestInfo.harvestNum");
        addElement5.addText("" + job.getHarvestNum());
        Element addElement6 = addElement.addElement("string");
        addElement6.addAttribute("name", "harvestInfo.origHarvestDefinitionID");
        addElement6.addText("" + job.getOrigHarvestDefinitionID());
        Element addElement7 = addElement.addElement("string");
        addElement7.addAttribute("name", "harvestInfo.maxBytesPerDomain");
        addElement7.addText("" + job.getMaxBytesPerDomain());
        Element addElement8 = addElement.addElement("string");
        addElement8.addAttribute("name", "harvestInfo.maxObjectsPerDomain");
        addElement8.addText("" + job.getMaxObjectsPerDomain());
        Element addElement9 = addElement.addElement("string");
        addElement9.addAttribute("name", "harvestInfo.templateName");
        addElement9.addText(job.getOrderXMLName());
        Element addElement10 = addElement.addElement("string");
        addElement10.addAttribute("name", "harvestInfo.origHarvestDefinitionName");
        addElement10.addText(str);
        Element addElement11 = addElement.addElement("string");
        addElement11.addAttribute("name", "harvestInfo.origHarvestDefinitionComments");
        addElement11.addText(str2);
        if (str3 != null) {
            Element addElement12 = addElement.addElement("string");
            addElement12.addAttribute("name", "harvestInfo.scheduleName");
            addElement12.addText(str3);
        }
        Element addElement13 = addElement.addElement("string");
        addElement13.addAttribute("name", "harvestInfo.harvestFilenamePrefix");
        addElement13.addText(job.getHarvestFilenamePrefix());
        Element addElement14 = addElement.addElement("string");
        addElement14.addAttribute("name", "harvestInfo.jobSubmitDate");
        addElement14.addText("" + job.getSubmittedDate());
        if (str4 != null) {
            Element addElement15 = addElement.addElement("string");
            addElement15.addAttribute("name", "harvestInfo.performer");
            addElement15.addText(str4);
        }
        if (job.getHarvestAudience() != null) {
            Element addElement16 = addElement.addElement("string");
            addElement16.addAttribute("name", "harvestInfo.audience");
            addElement16.addText(job.getHarvestAudience());
        }
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void insertAttributes(List<EAV.AttributeAndType> list) {
        log.warn("No attribute insertion is done for H1 templates");
    }

    @Override // dk.netarkivet.harvester.datamodel.HeritrixTemplate
    public void writeTemplate(JspWriter jspWriter) throws IOFailure {
        try {
            jspWriter.write(this.template.asXML());
        } catch (IOException e) {
            throw new IOFailure("Unable to write to JspWriter", e);
        }
    }

    static {
        requiredXpaths.put(GROUP_MAX_FETCH_SUCCESS_XPATH, Pattern.compile(WHOLE_NUMBER_REGEXP));
        requiredXpaths.put(QUEUE_TOTAL_BUDGET_XPATH, Pattern.compile(WHOLE_NUMBER_REGEXP));
        requiredXpaths.put(GROUP_MAX_ALL_KB_XPATH, Pattern.compile(WHOLE_NUMBER_REGEXP));
        requiredXpaths.put(DECIDERULES_MAP_XPATH, Pattern.compile(EVERYTHING_REGEXP, 32));
        requiredXpaths.put(HERITRIX_USER_AGENT_XPATH, Pattern.compile(USER_AGENT_REGEXP, 32));
        requiredXpaths.put(HERITRIX_FROM_XPATH, Pattern.compile(FROM_REGEXP));
        requiredXpaths.put(MAXTIMESEC_PATH_XPATH, Pattern.compile(WHOLE_NUMBER_REGEXP));
    }
}
