package dk.netarkivet.harvester.harvesting;

import dk.netarkivet.common.Constants;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.lang.StringUtils;
import org.archive.crawler.Heritrix;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.datamodel.FetchStatusCodes;
import org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule;
import org.archive.crawler.event.CrawlStatusListener;
import org.archive.crawler.extractor.Link;
import org.archive.crawler.framework.WriterPoolProcessor;
import org.archive.crawler.settings.MapType;
import org.archive.crawler.settings.SimpleType;
import org.archive.crawler.settings.Type;
import org.archive.io.ReplayInputStream;
import org.archive.io.WriterPoolMember;
import org.archive.io.WriterPoolSettings;
import org.archive.io.warc.WARCConstants;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPool;
import org.archive.uid.GeneratorFactory;
import org.archive.util.ArchiveUtils;
import org.archive.util.XmlUtils;
import org.archive.util.anvl.ANVLRecord;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:dk/netarkivet/harvester/harvesting/WARCWriterProcessor.class */
public class WARCWriterProcessor extends WriterPoolProcessor implements CoreAttributeConstants, CrawlStatusListener, WriterPoolSettings, FetchStatusCodes, WARCConstants {
    private static final long serialVersionUID = -2006725968882994351L;
    public static final String ATTR_WRITE_REQUESTS = "write-requests";
    public static final String ATTR_WRITE_METADATA = "write-metadata";
    public static final String ATTR_WRITE_METADATA_OUTLINKS = "write-metadata-outlinks";
    public static final String ATTR_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS = "write-revisit-for-identical-digests";
    public static final String ATTR_WRITE_REVISIT_FOR_NOT_MODIFIED = "write-revisit-for-not-modified";
    public static final String ATTR_METADATA_ITEMS = "metadata-items";
    private Map metadataMap;
    private static final String HARVESTINFO_VERSION = "harvestInfo.version";
    private static final String HARVESTINFO_JOBID = "harvestInfo.jobId";
    private static final String HARVESTINFO_CHANNEL = "harvestInfo.channel";
    private static final String HARVESTINFO_HARVESTNUM = "harvestInfo.harvestNum";
    private static final String HARVESTINFO_ORIGHARVESTDEFINITIONID = "harvestInfo.origHarvestDefinitionID";
    private static final String HARVESTINFO_MAXBYTESPERDOMAIN = "harvestInfo.maxBytesPerDomain";
    private static final String HARVESTINFO_MAXOBJECTSPERDOMAIN = "harvestInfo.maxObjectsPerDomain";
    private static final String HARVESTINFO_ORDERXMLNAME = "harvestInfo.orderXMLName";
    private static final String HARVESTINFO_ORIGHARVESTDEFINITIONNAME = "harvestInfo.origHarvestDefinitionName";
    private static final String HARVESTINFO_SCHEDULENAME = "harvestInfo.scheduleName";
    private static final String HARVESTINFO_HARVESTFILENAMEPREFIX = "harvestInfo.harvestFilenamePrefix";
    private static final String HARVESTINFO_JOBSUBMITDATE = "harvestInfo.jobSubmitDate";
    private static final String HARVESTINFO_PERFORMER = "harvestInfo.performer";
    private static final String HARVESTINFO_AUDIENCE = "harvestInfo.audience";
    private static final Logger logger = Logger.getLogger(WARCWriterProcessor.class.getName());
    private static final String[] DEFAULT_PATH = {"warcs"};

    public long getDefaultMaxFileSize() {
        return 1000000000L;
    }

    protected String[] getDefaultPath() {
        return DEFAULT_PATH;
    }

    public WARCWriterProcessor(String str) {
        super(str, "Netarchivesuite WARCWriter processor (Version 1.0");
        Type addElementToDefinition = addElementToDefinition(new SimpleType(ATTR_WRITE_REQUESTS, "Whether to write 'request' type records. Default is true.", new Boolean(true)));
        addElementToDefinition.setOverrideable(true);
        addElementToDefinition.setExpertSetting(true);
        Type addElementToDefinition2 = addElementToDefinition(new SimpleType(ATTR_WRITE_METADATA, "Whether to write 'metadata' type records. Default is true.", new Boolean(true)));
        addElementToDefinition2.setOverrideable(true);
        addElementToDefinition2.setExpertSetting(true);
        Type addElementToDefinition3 = addElementToDefinition(new SimpleType(ATTR_WRITE_METADATA_OUTLINKS, "Whether to write 'metadata-outlinks' type records. Default is true.", new Boolean(true)));
        addElementToDefinition3.setOverrideable(true);
        addElementToDefinition3.setExpertSetting(true);
        Type addElementToDefinition4 = addElementToDefinition(new SimpleType(ATTR_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS, "Whether to write 'revisit' type records when a URI's history indicates the previous fetch had an identical content digest. Default is true.", new Boolean(true)));
        addElementToDefinition4.setOverrideable(true);
        addElementToDefinition4.setExpertSetting(true);
        Type addElementToDefinition5 = addElementToDefinition(new SimpleType(ATTR_WRITE_REVISIT_FOR_NOT_MODIFIED, "Whether to write 'revisit' type records when a 304-Not Modified response is received. Default is true.", new Boolean(true)));
        addElementToDefinition5.setOverrideable(true);
        addElementToDefinition5.setExpertSetting(true);
        Type addElementToDefinition6 = addElementToDefinition(new MapType(ATTR_METADATA_ITEMS, "Metadata items.", String.class));
        addElementToDefinition6.setOverrideable(true);
        addElementToDefinition6.setExpertSetting(true);
    }

    protected void setupPool(AtomicInteger atomicInteger) {
        setPool(new WARCWriterPool(atomicInteger, this, getPoolMaximumActive(), getPoolMaximumWait()));
    }

    protected void innerProcess(CrawlURI crawlURI) {
        if (crawlURI.getFetchStatus() <= 0) {
            return;
        }
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        if (crawlURI.getContentSize() > 0 || lowerCase.equals("ftp")) {
            try {
                if (shouldWrite(crawlURI)) {
                    write(lowerCase, crawlURI);
                } else {
                    logger.info("This writer does not write out scheme " + lowerCase + " content");
                }
            } catch (IOException e) {
                crawlURI.addLocalizedError(getName(), e, "WriteRecord: " + crawlURI.toString());
                logger.log(Level.SEVERE, "Failed write of Record: " + crawlURI.toString(), (Throwable) e);
            }
        }
    }

    protected void write(String str, CrawlURI crawlURI) throws IOException {
        logger.info("writing warc record for " + crawlURI);
        WriterPoolMember borrowFile = getPool().borrowFile();
        long position = borrowFile.getPosition();
        borrowFile.checkSize();
        if (borrowFile.getPosition() != position) {
            setTotalBytesWritten(getTotalBytesWritten() + (borrowFile.getPosition() - position));
            position = borrowFile.getPosition();
        }
        WARCWriter wARCWriter = (WARCWriter) borrowFile;
        try {
            try {
                URI recordID = getRecordID();
                String log14Date = ArchiveUtils.getLog14Date(crawlURI.getLong("fetch-began-time"));
                if (str.startsWith("http")) {
                    writeHttpRecords(wARCWriter, crawlURI, recordID, log14Date);
                } else if (str.equals("dns")) {
                    writeDnsRecords(wARCWriter, crawlURI, recordID, log14Date);
                } else if (str.equals("ftp")) {
                    writeFtpRecords(wARCWriter, crawlURI, recordID, log14Date);
                } else {
                    logger.warning("No handler for scheme " + str);
                }
                checkBytesWritten();
            } catch (IOException e) {
                getPool().invalidateFile(borrowFile);
                throw e;
            }
        } finally {
            if (borrowFile != null) {
                setTotalBytesWritten(getTotalBytesWritten() + (borrowFile.getPosition() - position));
                getPool().returnFile(borrowFile);
            }
        }
    }

    private void writeFtpRecords(WARCWriter wARCWriter, CrawlURI crawlURI, URI uri, String str) throws IOException {
        ANVLRecord aNVLRecord = new ANVLRecord(3);
        aNVLRecord.addLabelValue("WARC-IP-Address", getHostAddress(crawlURI));
        URI writeFtpControlConversation = writeFtpControlConversation(wARCWriter, str, uri, crawlURI, aNVLRecord, crawlURI.getString("ftp-control-conversation"));
        if (crawlURI.getContentDigest() != null) {
            aNVLRecord.addLabelValue("WARC-Payload-Digest", crawlURI.getContentDigestSchemeString());
        }
        if (crawlURI.getHttpRecorder() != null) {
            if (IdenticalDigestDecideRule.hasIdenticalDigest(crawlURI) && ((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS)).booleanValue()) {
                writeFtpControlConversation = writeRevisitDigest(wARCWriter, str, null, uri, crawlURI, aNVLRecord);
            } else {
                ANVLRecord aNVLRecord2 = new ANVLRecord(3);
                if (crawlURI.isTruncatedFetch()) {
                    aNVLRecord2.addLabelValue("WARC-Truncated", crawlURI.isTimeTruncatedFetch() ? "time" : crawlURI.isLengthTruncatedFetch() ? "length" : crawlURI.isHeaderTruncatedFetch() ? "long-headers" : "unspecified");
                }
                if (crawlURI.getContentDigest() != null) {
                    aNVLRecord2.addLabelValue("WARC-Payload-Digest", crawlURI.getContentDigestSchemeString());
                }
                aNVLRecord2.addLabelValue("WARC-Concurrent-To", '<' + writeFtpControlConversation.toString() + '>');
                writeFtpControlConversation = writeResource(wARCWriter, str, crawlURI.getContentType(), uri, crawlURI, aNVLRecord2);
            }
        }
        if (((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_METADATA)).booleanValue()) {
            ANVLRecord aNVLRecord3 = new ANVLRecord(1);
            aNVLRecord3.addLabelValue("WARC-Concurrent-To", '<' + writeFtpControlConversation.toString() + '>');
            writeMetadata(wARCWriter, str, uri, crawlURI, aNVLRecord3, ((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_METADATA_OUTLINKS)).booleanValue());
        }
    }

    private void writeDnsRecords(WARCWriter wARCWriter, CrawlURI crawlURI, URI uri, String str) throws IOException {
        ANVLRecord aNVLRecord = null;
        String string = crawlURI.getString("dns-server-ip");
        if (string != null && string.length() > 0) {
            aNVLRecord = new ANVLRecord(1);
            aNVLRecord.addLabelValue("WARC-IP-Address", string);
        }
        writeResponse(wARCWriter, str, crawlURI.getContentType(), uri, crawlURI, aNVLRecord);
    }

    private void writeHttpRecords(WARCWriter wARCWriter, CrawlURI crawlURI, URI uri, String str) throws IOException {
        URI writeResponse;
        ANVLRecord aNVLRecord = new ANVLRecord(5);
        if (crawlURI.getContentDigest() != null) {
            aNVLRecord.addLabelValue("WARC-Payload-Digest", crawlURI.getContentDigestSchemeString());
        }
        aNVLRecord.addLabelValue("WARC-IP-Address", getHostAddress(crawlURI));
        if (IdenticalDigestDecideRule.hasIdenticalDigest(crawlURI) && ((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_REVISIT_FOR_IDENTICAL_DIGESTS)).booleanValue()) {
            writeResponse = writeRevisitDigest(wARCWriter, str, "application/http; msgtype=response", uri, crawlURI, aNVLRecord);
        } else if (crawlURI.getFetchStatus() == 304 && ((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_REVISIT_FOR_NOT_MODIFIED)).booleanValue()) {
            writeResponse = writeRevisitNotModified(wARCWriter, str, uri, crawlURI, aNVLRecord);
        } else {
            if (crawlURI.isTruncatedFetch()) {
                aNVLRecord.addLabelValue("WARC-Truncated", crawlURI.isTimeTruncatedFetch() ? "time" : crawlURI.isLengthTruncatedFetch() ? "length" : crawlURI.isHeaderTruncatedFetch() ? "long-headers" : "unspecified");
            }
            writeResponse = writeResponse(wARCWriter, str, "application/http; msgtype=response", uri, crawlURI, aNVLRecord);
        }
        ANVLRecord aNVLRecord2 = new ANVLRecord(1);
        aNVLRecord2.addLabelValue("WARC-Concurrent-To", '<' + writeResponse.toString() + '>');
        if (((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_REQUESTS)).booleanValue()) {
            writeRequest(wARCWriter, str, "application/http; msgtype=request", uri, crawlURI, aNVLRecord2);
        }
        if (((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_METADATA)).booleanValue()) {
            writeMetadata(wARCWriter, str, uri, crawlURI, aNVLRecord2, ((Boolean) getUncheckedAttribute(crawlURI, ATTR_WRITE_METADATA_OUTLINKS)).booleanValue());
        }
    }

    protected URI writeFtpControlConversation(WARCWriter wARCWriter, String str, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord, String str2) throws IOException {
        URI qualifyRecordID = qualifyRecordID(uri, "type", "metadata");
        wARCWriter.writeMetadataRecord(crawlURI.toString(), str, "text/x-ftp-control-conversation", qualifyRecordID, aNVLRecord, new ByteArrayInputStream(str2.getBytes("UTF-8")), r0.length);
        return qualifyRecordID;
    }

    protected URI writeRequest(WARCWriter wARCWriter, String str, String str2, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord) throws IOException {
        URI qualifyRecordID = qualifyRecordID(uri, "type", "request");
        ReplayInputStream replayInputStream = crawlURI.getHttpRecorder().getRecordedOutput().getReplayInputStream();
        try {
            wARCWriter.writeRequestRecord(crawlURI.toString(), str, str2, qualifyRecordID, aNVLRecord, replayInputStream, crawlURI.getHttpRecorder().getRecordedOutput().getSize());
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            return qualifyRecordID;
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            throw th;
        }
    }

    protected URI writeResponse(WARCWriter wARCWriter, String str, String str2, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord) throws IOException {
        ReplayInputStream replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getReplayInputStream();
        try {
            wARCWriter.writeResponseRecord(crawlURI.toString(), str, str2, uri, aNVLRecord, replayInputStream, crawlURI.getHttpRecorder().getRecordedInput().getSize());
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            return uri;
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            throw th;
        }
    }

    protected URI writeResource(WARCWriter wARCWriter, String str, String str2, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord) throws IOException {
        ReplayInputStream replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getReplayInputStream();
        try {
            wARCWriter.writeResourceRecord(crawlURI.toString(), str, str2, uri, aNVLRecord, replayInputStream, crawlURI.getHttpRecorder().getRecordedInput().getSize());
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            return uri;
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            throw th;
        }
    }

    protected URI writeRevisitDigest(WARCWriter wARCWriter, String str, String str2, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord) throws IOException {
        aNVLRecord.addLabelValue("WARC-Profile", "http://netpreserve.org/warc/1.0/revisit/identical-payload-digest");
        aNVLRecord.addLabelValue("WARC-Truncated", "length");
        ReplayInputStream replayInputStream = null;
        long j = 0;
        if (str2 != null) {
            replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getReplayInputStream();
            long contentBegin = crawlURI.getHttpRecorder().getRecordedInput().getContentBegin();
            j = contentBegin > 0 ? contentBegin : crawlURI.getHttpRecorder().getRecordedInput().getSize();
        }
        try {
            wARCWriter.writeRevisitRecord(crawlURI.toString(), str, str2, uri, aNVLRecord, replayInputStream, j);
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            crawlURI.addAnnotation("warcRevisit:digest");
            return uri;
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            throw th;
        }
    }

    protected URI writeRevisitNotModified(WARCWriter wARCWriter, String str, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord) throws IOException {
        aNVLRecord.addLabelValue("WARC-Profile", "http://netpreserve.org/warc/1.0/revisit/server-not-modified");
        if (crawlURI.containsKey("http-transaction")) {
            HttpMethodBase httpMethodBase = (HttpMethodBase) crawlURI.getObject("http-transaction");
            saveHeader("etag", httpMethodBase, aNVLRecord, "WARC-Etag");
            saveHeader("last-modified", httpMethodBase, aNVLRecord, "WARC-Last-Modified");
        }
        aNVLRecord.addLabelValue("WARC-Truncated", "length");
        ReplayInputStream replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getReplayInputStream();
        try {
            wARCWriter.writeRevisitRecord(crawlURI.toString(), str, (String) null, uri, aNVLRecord, replayInputStream, 0L);
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            crawlURI.addAnnotation("warcRevisit:notModified");
            return uri;
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            throw th;
        }
    }

    protected void saveHeader(String str, HttpMethodBase httpMethodBase, ANVLRecord aNVLRecord, String str2) {
        Header responseHeader = httpMethodBase.getResponseHeader(str);
        if (responseHeader != null) {
            aNVLRecord.addLabelValue(str2, responseHeader.getValue());
        }
    }

    protected URI writeMetadata(WARCWriter wARCWriter, String str, URI uri, CrawlURI crawlURI, ANVLRecord aNVLRecord, boolean z) throws IOException {
        Collection outLinks;
        URI qualifyRecordID = qualifyRecordID(uri, "type", "metadata");
        ANVLRecord aNVLRecord2 = new ANVLRecord();
        if (crawlURI.isSeed()) {
            aNVLRecord2.addLabel("seed");
        } else {
            if (crawlURI.forceFetch()) {
                aNVLRecord2.addLabel("force-fetch");
            }
            aNVLRecord2.addLabelValue("via", crawlURI.flattenVia());
            aNVLRecord2.addLabelValue("hopsFromSeed", crawlURI.getPathFromSeed());
            if (crawlURI.containsKey("source")) {
                aNVLRecord2.addLabelValue("sourceTag", crawlURI.getString("source"));
            }
        }
        long fetchDuration = crawlURI.getFetchDuration();
        if (fetchDuration > -1) {
            aNVLRecord2.addLabelValue("fetchTimeMs", Long.toString(fetchDuration));
        }
        if (crawlURI.containsKey("ftp-fetch-status")) {
            aNVLRecord2.addLabelValue("ftpFetchStatus", crawlURI.getString("ftp-fetch-status"));
        }
        if (z && (outLinks = crawlURI.getOutLinks()) != null && outLinks.size() > 0) {
            Iterator it = outLinks.iterator();
            while (it.hasNext()) {
                aNVLRecord2.addLabelValue("outlink", ((Link) it.next()).toString());
            }
        }
        wARCWriter.writeMetadataRecord(crawlURI.toString(), str, "application/warc-fields", qualifyRecordID, aNVLRecord, new ByteArrayInputStream(aNVLRecord2.getUTF8Bytes()), r0.length);
        return qualifyRecordID;
    }

    protected URI getRecordID() throws IOException {
        try {
            return GeneratorFactory.getFactory().getRecordID();
        } catch (URISyntaxException e) {
            throw new IOException(e.toString());
        }
    }

    protected URI qualifyRecordID(URI uri, String str, String str2) throws IOException {
        HashMap hashMap = new HashMap(1);
        hashMap.put(str, str2);
        try {
            return GeneratorFactory.getFactory().qualifyRecordID(uri, hashMap);
        } catch (URISyntaxException e) {
            throw new IOException(e.toString());
        }
    }

    protected String getFirstrecordStylesheet() {
        return "/warcinfobody.xsl";
    }

    protected String getFirstrecordBody(File file) {
        Node node;
        NodeList childNodes;
        Node namedItem;
        String nodeValue;
        ANVLRecord aNVLRecord = new ANVLRecord(7);
        aNVLRecord.addLabelValue("software", "Heritrix/" + Heritrix.getVersion() + " http://crawler.archive.org");
        try {
            InetAddress localHost = InetAddress.getLocalHost();
            aNVLRecord.addLabelValue("ip", localHost.getHostAddress());
            aNVLRecord.addLabelValue("hostname", localHost.getCanonicalHostName());
        } catch (UnknownHostException e) {
            logger.log(Level.WARNING, "unable top obtain local crawl engine host", (Throwable) e);
        }
        aNVLRecord.addLabelValue("format", "WARC File Format 1.0");
        aNVLRecord.addLabelValue("conformsTo", "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
        try {
            Document document = XmlUtils.getDocument(file);
            addIfNotBlank(aNVLRecord, "operator", XmlUtils.xpathOrNull(document, "//meta/operator"));
            addIfNotBlank(aNVLRecord, "publisher", XmlUtils.xpathOrNull(document, "//meta/organization"));
            addIfNotBlank(aNVLRecord, "audience", XmlUtils.xpathOrNull(document, "//meta/audience"));
            addIfNotBlank(aNVLRecord, "isPartOf", XmlUtils.xpathOrNull(document, "//meta/name"));
            addIfNotBlank(aNVLRecord, "description", XmlUtils.xpathOrNull(document, "//meta/description"));
            addIfNotBlank(aNVLRecord, "robots", XmlUtils.xpathOrNull(document, "//newObject[@name='robots-honoring-policy']/string[@name='type']"));
            addIfNotBlank(aNVLRecord, "http-header-user-agent", XmlUtils.xpathOrNull(document, "//map[@name='http-headers']/string[@name='user-agent']"));
            addIfNotBlank(aNVLRecord, "http-header-from", XmlUtils.xpathOrNull(document, "//map[@name='http-headers']/string[@name='from']"));
            if (this.metadataMap == null && (node = (Node) XPathFactory.newInstance().newXPath().compile("//crawl-order/controller/map[@name='write-processors']/newObject[@name='WARCArchiver']/map[@name='metadata-items']").evaluate(document, XPathConstants.NODE)) != null && (childNodes = node.getChildNodes()) != null) {
                this.metadataMap = new HashMap();
                for (int i = 0; i < childNodes.getLength(); i++) {
                    Node item = childNodes.item(i);
                    if (item.getNodeType() == 1 && "string".equals(item.getNodeName()) && (namedItem = item.getAttributes().getNamedItem("name")) != null && namedItem.getNodeType() == 2 && (nodeValue = namedItem.getNodeValue()) != null && nodeValue.length() > 0) {
                        this.metadataMap.put(nodeValue, item.getTextContent());
                    }
                }
            }
        } catch (IOException e2) {
            logger.log(Level.WARNING, "Error obtaining warcinfo", (Throwable) e2);
        } catch (XPathExpressionException e3) {
            logger.log(Level.WARNING, "Error obtaining metadata items", (Throwable) e3);
        }
        String str = "#added by NetarchiveSuite " + Constants.getVersionString();
        ANVLRecord aNVLRecord2 = new ANVLRecord(7);
        if (this.metadataMap != null) {
            aNVLRecord2.addLabelValue(HARVESTINFO_VERSION, (String) this.metadataMap.get(HARVESTINFO_VERSION));
            aNVLRecord2.addLabelValue(HARVESTINFO_JOBID, (String) this.metadataMap.get(HARVESTINFO_JOBID));
            aNVLRecord2.addLabelValue(HARVESTINFO_CHANNEL, (String) this.metadataMap.get(HARVESTINFO_CHANNEL));
            aNVLRecord2.addLabelValue(HARVESTINFO_HARVESTNUM, (String) this.metadataMap.get(HARVESTINFO_HARVESTNUM));
            aNVLRecord2.addLabelValue(HARVESTINFO_ORIGHARVESTDEFINITIONID, (String) this.metadataMap.get(HARVESTINFO_ORIGHARVESTDEFINITIONID));
            aNVLRecord2.addLabelValue(HARVESTINFO_MAXBYTESPERDOMAIN, (String) this.metadataMap.get(HARVESTINFO_MAXBYTESPERDOMAIN));
            aNVLRecord2.addLabelValue(HARVESTINFO_MAXOBJECTSPERDOMAIN, (String) this.metadataMap.get(HARVESTINFO_MAXOBJECTSPERDOMAIN));
            aNVLRecord2.addLabelValue(HARVESTINFO_ORDERXMLNAME, (String) this.metadataMap.get(HARVESTINFO_ORDERXMLNAME));
            aNVLRecord2.addLabelValue(HARVESTINFO_ORIGHARVESTDEFINITIONNAME, (String) this.metadataMap.get(HARVESTINFO_ORIGHARVESTDEFINITIONNAME));
            if (this.metadataMap.containsKey(HARVESTINFO_SCHEDULENAME)) {
                aNVLRecord2.addLabelValue(HARVESTINFO_SCHEDULENAME, (String) this.metadataMap.get(HARVESTINFO_SCHEDULENAME));
            }
            aNVLRecord2.addLabelValue(HARVESTINFO_HARVESTFILENAMEPREFIX, (String) this.metadataMap.get(HARVESTINFO_HARVESTFILENAMEPREFIX));
            aNVLRecord2.addLabelValue(HARVESTINFO_JOBSUBMITDATE, (String) this.metadataMap.get(HARVESTINFO_JOBSUBMITDATE));
            if (this.metadataMap.containsKey(HARVESTINFO_PERFORMER)) {
                aNVLRecord2.addLabelValue(HARVESTINFO_PERFORMER, (String) this.metadataMap.get(HARVESTINFO_PERFORMER));
            }
            if (this.metadataMap.containsKey(HARVESTINFO_AUDIENCE)) {
                aNVLRecord2.addLabelValue(HARVESTINFO_AUDIENCE, (String) this.metadataMap.get(HARVESTINFO_AUDIENCE));
            }
        } else {
            logger.log(Level.SEVERE, "Error missing metadata");
        }
        return aNVLRecord.toString() + str + "\n" + aNVLRecord2.toString();
    }

    protected void addIfNotBlank(ANVLRecord aNVLRecord, String str, String str2) {
        if (StringUtils.isNotBlank(str2)) {
            aNVLRecord.addLabelValue(str, str2);
        }
    }
}
