package dk.netarkivet.common.utils.warc;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.utils.archive.ArchiveDateConverter;
import dk.netarkivet.common.utils.archive.HeritrixArchiveHeaderWrapper;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.activemq.filter.DestinationFilter;
import org.archive.format.ArchiveFileConstants;
import org.archive.format.warc.WARCConstants;
import org.archive.io.ArchiveRecord;
import org.archive.io.warc.WARCReaderFactory;
import org.archive.io.warc.WARCRecord;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPoolSettingsData;
import org.archive.uid.UUIDGenerator;
import org.jwat.warc.WarcConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/common/utils/warc/WARCUtils.class */
public class WARCUtils {
    protected static final Logger log = LoggerFactory.getLogger((Class<?>) WARCUtils.class);
    private static final Set<String> ignoreHeadersMap = new HashSet();
    private static final Map<String, String> headerNamesCaseMap = new HashMap();

    public static WARCWriter createWARCWriter(File file) {
        PrintStream printStream = null;
        try {
            printStream = new PrintStream(new FileOutputStream(file));
            return new WARCWriter(new AtomicInteger(), printStream, file, new WARCWriterPoolSettingsData(WARCConstants.WARC_FILE_EXTENSION, null, 1073741824L, false, null, null, new UUIDGenerator()));
        } catch (IOException e) {
            if (printStream != null) {
                printStream.close();
            }
            String str = "Could not create WARCWriter to file '" + file + "'.\n";
            log.warn(str);
            throw new IOFailure(str, e);
        }
    }

    public static void insertWARCFile(File file, WARCWriter wARCWriter) {
        ArgumentNotValid.checkNotNull(wARCWriter, "WARCWriter aw");
        ArgumentNotValid.checkNotNull(file, "File warcFile");
        try {
            Iterator<ArchiveRecord> it2 = WARCReaderFactory.get(file).iterator();
            while (it2.hasNext()) {
                copySingleRecord(wARCWriter, (WARCRecord) it2.next());
            }
        } catch (IOException e) {
            String str = "Error while copying WARC records from " + file;
            log.warn(str, (Throwable) e);
            throw new IOFailure(str, e);
        }
    }

    private static void copySingleRecord(WARCWriter wARCWriter, WARCRecord wARCRecord) {
        try {
            HeritrixArchiveHeaderWrapper wrapArchiveHeader = HeritrixArchiveHeaderWrapper.wrapArchiveHeader(null, wARCRecord);
            String headerStringValue = wrapArchiveHeader.getHeaderStringValue("WARC-Type");
            String url = wrapArchiveHeader.getUrl();
            ArchiveDateConverter.getWarcDateFormat().format(wrapArchiveHeader.getDate());
            String mimetype = wrapArchiveHeader.getMimetype();
            try {
                String headerStringValue2 = wrapArchiveHeader.getHeaderStringValue("warc-record-id");
                if (headerStringValue2.startsWith("<") && headerStringValue2.endsWith(DestinationFilter.ANY_DESCENDENT)) {
                    headerStringValue2 = headerStringValue2.substring(1, headerStringValue2.length() - 1);
                }
                URI uri = new URI(headerStringValue2);
                Long valueOf = Long.valueOf(wrapArchiveHeader.getLength() - wARCRecord.getHeader().getContentBegin());
                WARCConstants.WARCRecordType valueOf2 = WARCConstants.WARCRecordType.valueOf(headerStringValue);
                WARCRecordInfo wARCRecordInfo = new WARCRecordInfo();
                for (Map.Entry<String, Object> entry : wrapArchiveHeader.getHeaderFields().entrySet()) {
                    if (!ignoreHeadersMap.contains(entry.getKey())) {
                        String key = entry.getKey();
                        String str = headerNamesCaseMap.get(key);
                        if (str != null) {
                            key = str;
                        }
                        wARCRecordInfo.addExtraHeader(key, entry.getValue().toString());
                    }
                }
                wARCRecordInfo.setType(valueOf2);
                wARCRecordInfo.setUrl(url);
                wARCRecordInfo.setMimetype(mimetype);
                wARCRecordInfo.setRecordId(uri);
                wARCRecordInfo.setContentStream(wARCRecord);
                wARCRecordInfo.setContentLength(valueOf.longValue());
                wARCWriter.writeRecord(wARCRecordInfo);
            } catch (URISyntaxException e) {
                throw new IllegalState("Epic fail creating URI from UUID!");
            }
        } catch (Exception e2) {
            throw new IOFailure("Error occurred while writing an WARC record" + wARCRecord, e2);
        }
    }

    public static byte[] readWARCRecord(WARCRecord wARCRecord) throws IOFailure {
        ArgumentNotValid.checkNotNull(wARCRecord, "WARCRecord record");
        if (wARCRecord.getHeader().getLength() > 2147483647L) {
            throw new IOFailure("WARC Record too long to fit in array: " + wARCRecord.getHeader().getLength() + " > 2147483647");
        }
        int length = (int) (wARCRecord.getHeader().getLength() - r0.getContentBegin());
        byte[] bArr = new byte[length];
        byte[] bArr2 = new byte[4096];
        int i = 0;
        while (i < length) {
            try {
                int read = wARCRecord.read(bArr2);
                if (read == -1) {
                    break;
                }
                System.arraycopy(bArr2, 0, bArr, i, read);
                i += read;
            } catch (IOException e) {
                throw new IOFailure("Failure when reading the WARC-record", e);
            }
        }
        if (bArr.length == i) {
            return bArr;
        }
        byte[] bArr3 = new byte[i];
        System.arraycopy(bArr, 0, bArr3, 0, i);
        log.debug("Storing {} bytes. Expected to store: {}", Integer.valueOf(i), Integer.valueOf(bArr.length));
        return bArr3;
    }

    public static String getRecordType(WARCRecord wARCRecord) {
        ArgumentNotValid.checkNotNull(wARCRecord, "record");
        return (String) wARCRecord.getHeader().getHeaderValue("WARC-Type");
    }

    public static boolean isWarc(String str) {
        ArgumentNotValid.checkNotNullOrEmpty(str, "filename");
        String lowerCase = str.toLowerCase();
        return lowerCase.endsWith(".warc") || lowerCase.endsWith(".warc.gz");
    }

    static {
        ignoreHeadersMap.add(ArchiveFileConstants.MIMETYPE_FIELD_KEY);
        ignoreHeadersMap.add(ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY);
        ignoreHeadersMap.add(ArchiveFileConstants.ABSOLUTE_OFFSET_KEY);
        ignoreHeadersMap.add("content-length");
        ignoreHeadersMap.add("warc-record-id");
        ignoreHeadersMap.add("warc-type");
        ignoreHeadersMap.add("warc-target-uri");
        String[] strArr = {"WARC-Type", "WARC-Record-ID", "WARC-Date", "Content-Length", "Content-Type", "WARC-Concurrent-To", "WARC-Block-Digest", "WARC-Payload-Digest", "WARC-IP-Address", "WARC-Refers-To", "WARC-Target-URI", "WARC-Truncated", WarcConstants.FN_WARC_WARCINFO_ID, "WARC-Filename", "WARC-Profile", WarcConstants.FN_WARC_IDENTIFIED_PAYLOAD_TYPE, WarcConstants.FN_WARC_SEGMENT_ORIGIN_ID, WarcConstants.FN_WARC_SEGMENT_NUMBER, WarcConstants.FN_WARC_SEGMENT_TOTAL_LENGTH};
        for (int i = 0; i < strArr.length; i++) {
            headerNamesCaseMap.put(strArr[i].toLowerCase(), strArr[i]);
        }
    }
}
