package org.archive.modules.writer;

import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang.StringUtils;
import org.archive.format.warc.WARCConstants;
import org.archive.io.warc.WARCRecordInfo;
import org.archive.io.warc.WARCWriter;
import org.archive.io.warc.WARCWriterPool;
import org.archive.io.warc.WARCWriterPoolSettings;
import org.archive.modules.CoreAttributeConstants;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.fetcher.FetchStats;
import org.archive.modules.recrawl.RecrawlAttributeConstants;
import org.archive.modules.revisit.IdenticalPayloadDigestRevisit;
import org.archive.spring.ConfigPath;
import org.archive.uid.RecordIDGenerator;
import org.archive.uid.UUIDGenerator;
import org.archive.util.ArchiveUtils;
import org.archive.util.anvl.ANVLRecord;

/* loaded from: input_file:org/archive/modules/writer/BaseWARCWriterProcessor.class */
public abstract class BaseWARCWriterProcessor extends WriterPoolProcessor implements WARCWriterPoolSettings {
    private static final Logger logger = Logger.getLogger(BaseWARCWriterProcessor.class.getName());
    protected AtomicLong urlsWritten = new AtomicLong();
    protected ConcurrentMap<String, ConcurrentMap<String, AtomicLong>> stats = new ConcurrentHashMap();
    protected RecordIDGenerator generator = new UUIDGenerator();
    private transient List<String> cachedMetadata;

    public ConcurrentMap<String, ConcurrentMap<String, AtomicLong>> getStats() {
        return this.stats;
    }

    public RecordIDGenerator getRecordIDGenerator() {
        return this.generator;
    }

    public void setRecordIDGenerator(RecordIDGenerator recordIDGenerator) {
        this.generator = recordIDGenerator;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public URI getRecordID() throws IOException {
        return this.generator.getRecordID();
    }

    @Override // org.archive.modules.writer.WriterPoolProcessor
    public long getDefaultMaxFileSize() {
        return 1000000000L;
    }

    @Override // org.archive.modules.writer.WriterPoolProcessor
    public List<ConfigPath> getDefaultStorePaths() {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new ConfigPath("warcs default store path", "warcs"));
        return arrayList;
    }

    @Override // org.archive.modules.writer.WriterPoolProcessor
    protected void setupPool(AtomicInteger atomicInteger) {
        setPool(new WARCWriterPool(atomicInteger, this, getPoolMaxActive(), getMaxWaitForIdleMs()));
    }

    @Override // org.archive.modules.writer.WriterPoolProcessor
    public List<String> getMetadata() {
        if (this.cachedMetadata != null) {
            return this.cachedMetadata;
        }
        ANVLRecord aNVLRecord = new ANVLRecord();
        aNVLRecord.addLabelValue("software", "Heritrix/" + ArchiveUtils.VERSION + " http://crawler.archive.org");
        try {
            InetAddress localHost = InetAddress.getLocalHost();
            aNVLRecord.addLabelValue("ip", localHost.getHostAddress());
            aNVLRecord.addLabelValue("hostname", localHost.getCanonicalHostName());
        } catch (UnknownHostException e) {
            logger.log(Level.WARNING, "unable top obtain local crawl engine host", (Throwable) e);
        }
        aNVLRecord.addLabelValue("format", "WARC File Format 1.0");
        aNVLRecord.addLabelValue("conformsTo", "http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf");
        CrawlMetadata metadataProvider = getMetadataProvider();
        addIfNotBlank(aNVLRecord, "operator", metadataProvider.getOperator());
        addIfNotBlank(aNVLRecord, "publisher", metadataProvider.getOrganization());
        addIfNotBlank(aNVLRecord, "audience", metadataProvider.getAudience());
        addIfNotBlank(aNVLRecord, "isPartOf", metadataProvider.getJobName());
        addIfNotBlank(aNVLRecord, "description", metadataProvider.getDescription());
        addIfNotBlank(aNVLRecord, "robots", metadataProvider.getRobotsPolicyName().toLowerCase());
        addIfNotBlank(aNVLRecord, "http-header-user-agent", metadataProvider.getUserAgent());
        addIfNotBlank(aNVLRecord, "http-header-from", metadataProvider.getOperatorFrom());
        return Collections.singletonList(aNVLRecord.toString());
    }

    protected void addIfNotBlank(ANVLRecord aNVLRecord, String str, String str2) {
        if (StringUtils.isNotBlank(str2)) {
            aNVLRecord.addLabelValue(str, str2);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addStats(Map<String, Map<String, Long>> map) {
        for (String str : map.keySet()) {
            if (this.stats.get(str) == null) {
                this.stats.putIfAbsent(str, new ConcurrentHashMap());
            }
            for (String str2 : map.get(str).keySet()) {
                AtomicLong atomicLong = this.stats.get(str).get(str2);
                if (atomicLong == null) {
                    atomicLong = this.stats.get(str).putIfAbsent(str2, new AtomicLong(map.get(str).get(str2).longValue()));
                }
                if (atomicLong != null) {
                    atomicLong.addAndGet(map.get(str).get(str2).longValue());
                }
            }
        }
    }

    @Override // org.archive.modules.Processor
    public String report() {
        logger.info("final stats: " + this.stats);
        StringBuilder sb = new StringBuilder();
        sb.append("Processor: " + getClass().getName() + "\n");
        sb.append("  Function:          Writes WARCs\n");
        sb.append("  Total CrawlURIs:   " + this.urlsWritten + "\n");
        sb.append("  Revisit records:   " + WARCWriter.getStat(this.stats, WARCConstants.WARCRecordType.revisit.toString(), "numRecords") + "\n");
        long stat = WARCWriter.getStat(this.stats, WARCConstants.WARCRecordType.response.toString(), "contentBytes") + WARCWriter.getStat(this.stats, WARCConstants.WARCRecordType.resource.toString(), "contentBytes");
        sb.append("  Crawled content bytes (including http headers): " + stat + " (" + ArchiveUtils.formatBytesForDisplay(stat) + ")\n");
        long stat2 = WARCWriter.getStat(this.stats, "totals", FetchStats.TOTAL_BYTES);
        sb.append("  Total uncompressed bytes (including all warc records): " + stat2 + " (" + ArchiveUtils.formatBytesForDisplay(stat2) + ")\n");
        sb.append("  Total size on disk (" + (getCompress() ? "compressed" : "uncompressed") + "): " + getTotalBytesWritten() + " (" + ArchiveUtils.formatBytesForDisplay(getTotalBytesWritten()) + ")\n");
        return sb.toString();
    }

    protected Map<String, Map<String, Long>> copyStats(Map<String, Map<String, Long>> map) {
        HashMap hashMap = new HashMap(map.size());
        for (String str : map.keySet()) {
            hashMap.put(str, new HashMap(map.get(str)));
        }
        return hashMap;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void updateMetadataAfterWrite(CrawlURI crawlURI, WARCWriter wARCWriter, long j) {
        if (WARCWriter.getStat(wARCWriter.getTmpStats(), "totals", "numRecords") > 0) {
            addStats(wARCWriter.getTmpStats());
            this.urlsWritten.incrementAndGet();
        }
        if (logger.isLoggable(Level.FINE)) {
            logger.fine("wrote " + WARCWriter.getStat(wARCWriter.getTmpStats(), "totals", "sizeOnDisk") + " bytes to " + wARCWriter.getFile().getName() + " for " + crawlURI);
        }
        addTotalBytesWritten(wARCWriter.getPosition() - j);
        crawlURI.addExtraInfo("warcFilename", wARCWriter.getFilenameWithoutOccupiedSuffix());
        crawlURI.addExtraInfo("warcFileOffset", Long.valueOf(j));
        crawlURI.getData().put(CoreAttributeConstants.A_WARC_STATS, copyStats(wARCWriter.getTmpStats()));
        HashMap<String, Object>[] fetchHistory = crawlURI.getFetchHistory();
        if (fetchHistory != null && fetchHistory[0] != null) {
            fetchHistory[0].put(RecrawlAttributeConstants.A_WRITE_TAG, wARCWriter.getFilenameWithoutOccupiedSuffix());
        }
        if (crawlURI.getContentDigest() == null || !crawlURI.hasContentDigestHistory()) {
            return;
        }
        for (WARCRecordInfo wARCRecordInfo : wARCWriter.getTmpRecordLog()) {
            if ((wARCRecordInfo.getType() == WARCConstants.WARCRecordType.response || wARCRecordInfo.getType() == WARCConstants.WARCRecordType.resource) && wARCRecordInfo.getContentStream() != null && wARCRecordInfo.getContentLength() > 0) {
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_ORIGINAL_URL, wARCRecordInfo.getUrl());
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_WARC_RECORD_ID, wARCRecordInfo.getRecordId().toString());
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_WARC_FILENAME, wARCRecordInfo.getWARCFilename());
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_WARC_FILE_OFFSET, wARCRecordInfo.getWARCFileOffset());
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_ORIGINAL_DATE, wARCRecordInfo.getCreate14DigitDate());
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_CONTENT_DIGEST_COUNT, 1);
            } else if (wARCRecordInfo.getType() == WARCConstants.WARCRecordType.revisit && (crawlURI.getRevisitProfile() instanceof IdenticalPayloadDigestRevisit)) {
                Integer num = (Integer) crawlURI.getContentDigestHistory().get(RecrawlAttributeConstants.A_CONTENT_DIGEST_COUNT);
                if (num == null) {
                    num = 1;
                }
                crawlURI.getContentDigestHistory().put(RecrawlAttributeConstants.A_CONTENT_DIGEST_COUNT, Integer.valueOf(num.intValue() + 1));
            }
        }
    }
}
