package dk.netarkivet.common.utils.cdx;

import ch.qos.logback.core.pattern.color.ANSIConstants;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.ChecksumCalculator;
import dk.netarkivet.common.utils.archive.ArchiveBatchJob;
import dk.netarkivet.common.utils.archive.ArchiveHeaderBase;
import dk.netarkivet.common.utils.archive.ArchiveRecordBase;
import dk.netarkivet.common.utils.batch.ArchiveBatchFilter;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import org.archive.wayback.core.CaptureSearchResult;
import org.bitrepository.commandline.Constants;
import org.jwat.common.ByteCountingPushBackInputStream;
import org.jwat.common.ContentType;
import org.jwat.common.HttpHeader;
import org.jwat.warc.WarcConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/common/utils/cdx/ArchiveExtractCDXJob.class */
public class ArchiveExtractCDXJob extends ArchiveBatchJob {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) ArchiveExtractCDXJob.class);
    private static final String[] STD_FIELDS_EXCL_CHECKSUM = {CaptureSearchResult.CAPTURE_ROBOT_NOARCHIVE, "e", "b", ANSIConstants.ESC_END, "n", "g", Constants.VERBOSITY_ARG};
    private static final String[] STD_FIELDS_INCL_CHECKSUM = {CaptureSearchResult.CAPTURE_ROBOT_NOARCHIVE, "e", "b", ANSIConstants.ESC_END, "n", "g", Constants.VERBOSITY_ARG, Constants.COLLECTION_ID_ARG};
    private int HTTP_HEADER_BUFFER_SIZE;
    private String[] fields;
    private boolean includeChecksum;

    public ArchiveExtractCDXJob(boolean z) {
        this.HTTP_HEADER_BUFFER_SIZE = 1048576;
        this.fields = z ? STD_FIELDS_INCL_CHECKSUM : STD_FIELDS_EXCL_CHECKSUM;
        this.includeChecksum = z;
        this.batchJobTimeout = 604800000L;
    }

    public ArchiveExtractCDXJob() {
        this(true);
    }

    @Override // dk.netarkivet.common.utils.archive.ArchiveBatchJob
    public ArchiveBatchFilter getFilter() {
        return ArchiveBatchFilter.EXCLUDE_NON_RESPONSE_RECORDS;
    }

    @Override // dk.netarkivet.common.utils.archive.ArchiveBatchJobBase, dk.netarkivet.common.utils.batch.FileBatchJob
    public void initialize(OutputStream outputStream) {
    }

    @Override // dk.netarkivet.common.utils.archive.ArchiveBatchJob
    public void processRecord(ArchiveRecordBase archiveRecordBase, OutputStream outputStream) {
        ContentType parseContentType;
        log.trace("Processing Archive Record with offset: {}", Long.valueOf(archiveRecordBase.getHeader().getOffset()));
        ArchiveHeaderBase header = archiveRecordBase.getHeader();
        HashMap hashMap = new HashMap();
        hashMap.put(CaptureSearchResult.CAPTURE_ROBOT_NOARCHIVE, header.getUrl());
        hashMap.put("e", header.getIp());
        hashMap.put("b", header.getArcDateStr());
        hashMap.put("n", Long.toString(header.getLength()));
        hashMap.put("g", archiveRecordBase.getHeader().getArchiveFile().getName());
        hashMap.put(Constants.VERBOSITY_ARG, Long.toString(archiveRecordBase.getHeader().getOffset()));
        String mimetype = header.getMimetype();
        ContentType parseContentType2 = ContentType.parseContentType(mimetype);
        boolean z = false;
        if (parseContentType2 != null) {
            if (WarcConstants.CONTENT_TYPE_METADATA.equals(parseContentType2.contentType) && "http".equals(parseContentType2.mediaType)) {
                String parameter = parseContentType2.getParameter("msgtype");
                if (WarcConstants.RT_RESPONSE.equals(parameter)) {
                    z = true;
                } else if (WarcConstants.RT_REQUEST.equals(parameter)) {
                }
            }
            mimetype = parseContentType2.toStringShort();
        }
        ByteCountingPushBackInputStream byteCountingPushBackInputStream = new ByteCountingPushBackInputStream(archiveRecordBase.getInputStream(), this.HTTP_HEADER_BUFFER_SIZE);
        HttpHeader httpHeader = null;
        if (z) {
            try {
                httpHeader = HttpHeader.processPayload(1, byteCountingPushBackInputStream, header.getLength(), null);
                if (httpHeader != null && httpHeader.contentType != null && (parseContentType = ContentType.parseContentType(httpHeader.contentType)) != null) {
                    mimetype = parseContentType.toStringShort();
                }
            } catch (IOException e) {
                throw new IOFailure("Error reading httpresponse header", e);
            }
        }
        hashMap.put(ANSIConstants.ESC_END, mimetype);
        if (this.includeChecksum) {
            hashMap.put(Constants.COLLECTION_ID_ARG, ChecksumCalculator.calculateMd5(byteCountingPushBackInputStream));
        }
        if (httpHeader != null) {
            try {
                httpHeader.close();
            } catch (IOException e2) {
                throw new IOFailure("Error closing httpresponse header", e2);
            }
        }
        printFields(hashMap, outputStream);
    }

    @Override // dk.netarkivet.common.utils.archive.ArchiveBatchJobBase, dk.netarkivet.common.utils.batch.FileBatchJob
    public void finish(OutputStream outputStream) {
    }

    private void printFields(Map<String, String> map, OutputStream outputStream) {
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        while (i < this.fields.length) {
            String str = map.get(this.fields[i]);
            stringBuffer.append(i > 0 ? " " : "");
            stringBuffer.append(str == null ? "-" : str.toString());
            i++;
        }
        stringBuffer.append("\n");
        try {
            outputStream.write(stringBuffer.toString().getBytes("UTF-8"));
        } catch (IOException e) {
            throw new IOFailure("Error writing CDX line '" + ((Object) stringBuffer) + "' to batch outstream", e);
        }
    }

    public String toString() {
        return getClass().getName() + ", with Filter: " + getFilter() + ", include checksum = " + this.includeChecksum;
    }
}
