package dk.netarkivet.common.utils.cdx;

import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.ChecksumCalculator;
import dk.netarkivet.common.utils.archive.ArchiveHeaderBase;
import dk.netarkivet.common.utils.archive.HeritrixArchiveRecordWrapper;
import dk.netarkivet.common.utils.batch.WARCBatchFilter;
import dk.netarkivet.common.utils.warc.WARCBatchJob;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import org.archive.io.warc.WARCRecord;
import org.jwat.common.ByteCountingPushBackInputStream;
import org.jwat.common.ContentType;
import org.jwat.common.HttpHeader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/common/utils/cdx/WARCExtractCDXJob.class */
public class WARCExtractCDXJob extends WARCBatchJob {
    private static final Logger log = LoggerFactory.getLogger(WARCExtractCDXJob.class);
    private static final String[] STD_FIELDS_EXCL_CHECKSUM = {"A", "e", "b", "m", "n", "g", "v"};
    private static final String[] STD_FIELDS_INCL_CHECKSUM = {"A", "e", "b", "m", "n", "g", "v", "c"};
    private String[] fields;
    private boolean includeChecksum;

    public WARCExtractCDXJob(boolean z) {
        this.fields = z ? STD_FIELDS_INCL_CHECKSUM : STD_FIELDS_EXCL_CHECKSUM;
        this.includeChecksum = z;
        this.batchJobTimeout = 604800000L;
    }

    public WARCExtractCDXJob() {
        this(true);
    }

    @Override // dk.netarkivet.common.utils.warc.WARCBatchJob
    public WARCBatchFilter getFilter() {
        return WARCBatchFilter.EXCLUDE_NON_RESPONSE_RECORDS;
    }

    @Override // dk.netarkivet.common.utils.warc.WARCBatchJob, dk.netarkivet.common.utils.batch.FileBatchJob
    public void initialize(OutputStream outputStream) {
    }

    @Override // dk.netarkivet.common.utils.warc.WARCBatchJob
    public void processRecord(WARCRecord wARCRecord, OutputStream outputStream) {
        ContentType parseContentType;
        log.trace("Processing WARCRecord with offset: {}", Long.valueOf(wARCRecord.getHeader().getOffset()));
        ArchiveHeaderBase header = new HeritrixArchiveRecordWrapper(wARCRecord).getHeader();
        HashMap hashMap = new HashMap();
        hashMap.put("A", header.getUrl());
        hashMap.put("e", header.getIp());
        hashMap.put("b", header.getArcDateStr());
        hashMap.put("n", Long.toString(header.getLength()));
        hashMap.put("v", Long.toString(wARCRecord.getHeader().getOffset()));
        hashMap.put("g", wARCRecord.getHeader().getReaderIdentifier());
        String mimetype = header.getMimetype();
        ContentType parseContentType2 = ContentType.parseContentType(mimetype);
        boolean z = false;
        if (parseContentType2 != null) {
            if ("application".equals(parseContentType2.contentType) && "http".equals(parseContentType2.mediaType)) {
                String parameter = parseContentType2.getParameter("msgtype");
                if ("response".equals(parameter)) {
                    z = true;
                } else if ("request".equals(parameter)) {
                }
            }
            mimetype = parseContentType2.toStringShort();
        }
        ByteCountingPushBackInputStream byteCountingPushBackInputStream = new ByteCountingPushBackInputStream(wARCRecord, 8192);
        HttpHeader httpHeader = null;
        if (z) {
            try {
                httpHeader = HttpHeader.processPayload(1, byteCountingPushBackInputStream, header.getLength(), (String) null);
                if (httpHeader != null && httpHeader.contentType != null && (parseContentType = ContentType.parseContentType(httpHeader.contentType)) != null) {
                    mimetype = parseContentType.toStringShort();
                }
            } catch (IOException e) {
                throw new IOFailure("Error reading WARC httpresponse header", e);
            }
        }
        hashMap.put("m", mimetype);
        if (this.includeChecksum) {
            hashMap.put("c", ChecksumCalculator.calculateMd5((InputStream) byteCountingPushBackInputStream));
        }
        if (httpHeader != null) {
            try {
                httpHeader.close();
            } catch (IOException e2) {
                throw new IOFailure("Error closing WARC httpresponse header", e2);
            }
        }
        printFields(hashMap, outputStream);
    }

    @Override // dk.netarkivet.common.utils.warc.WARCBatchJob, dk.netarkivet.common.utils.batch.FileBatchJob
    public void finish(OutputStream outputStream) {
    }

    private void printFields(Map<String, String> map, OutputStream outputStream) {
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        while (i < this.fields.length) {
            String str = map.get(this.fields[i]);
            stringBuffer.append(i > 0 ? " " : "");
            stringBuffer.append(str == null ? "-" : str.toString());
            i++;
        }
        stringBuffer.append("\n");
        try {
            outputStream.write(stringBuffer.toString().getBytes("UTF-8"));
        } catch (IOException e) {
            throw new IOFailure("Error writing CDX line '" + ((Object) stringBuffer) + "' to batch outstream", e);
        }
    }

    public String toString() {
        return getClass().getName() + ", with Filter: " + getFilter() + ", include checksum = " + this.includeChecksum;
    }
}
