package dk.netarkivet.viewerproxy.webinterface;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.DomainUtils;
import dk.netarkivet.common.utils.FixedUURI;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.archive.ArchiveBatchJob;
import dk.netarkivet.common.utils.archive.ArchiveRecordBase;
import dk.netarkivet.common.utils.batch.ArchiveBatchFilter;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/viewerproxy/webinterface/HarvestedUrlsForDomainBatchJob.class */
public class HarvestedUrlsForDomainBatchJob extends ArchiveBatchJob {
    private static final Logger log = LoggerFactory.getLogger(HarvestedUrlsForDomainBatchJob.class);
    private static final String SETUP_URL_FORMAT = String.format("metadata://%s/crawl/logs/crawl.log", Settings.get(CommonSettings.ORGANIZATION));
    final String domain;

    public HarvestedUrlsForDomainBatchJob(String str) {
        ArgumentNotValid.checkNotNullOrEmpty(str, Constants.DOMAIN_PARAM);
        this.domain = str;
        this.batchJobTimeout = 604800000L;
    }

    public void initialize(OutputStream outputStream) {
    }

    public ArchiveBatchFilter getFilter() {
        return new ArchiveBatchFilter("OnlyCrawlLog") { // from class: dk.netarkivet.viewerproxy.webinterface.HarvestedUrlsForDomainBatchJob.1
            public boolean accept(ArchiveRecordBase archiveRecordBase) {
                return archiveRecordBase.bIsWarc ? archiveRecordBase.getHeader().getUrl() != null && archiveRecordBase.getHeader().getUrl().startsWith(HarvestedUrlsForDomainBatchJob.SETUP_URL_FORMAT) : archiveRecordBase.getHeader().getUrl().startsWith(HarvestedUrlsForDomainBatchJob.SETUP_URL_FORMAT);
            }
        };
    }

    public void processRecord(ArchiveRecordBase archiveRecordBase, OutputStream outputStream) {
        ArgumentNotValid.checkNotNull(archiveRecordBase, "ArchiveRecordBase record");
        ArgumentNotValid.checkNotNull(outputStream, "OutputStream os");
        log.info("looking for crawl-log lines for domain: " + this.domain);
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(archiveRecordBase.getInputStream()));
        String str = null;
        try {
            try {
                str = bufferedReader.readLine();
                while (str != null) {
                    String[] split = str.split("\\s+");
                    if (split.length > 3 && getDomainFromUrlPart(split[3]).equals(this.domain)) {
                        outputStream.write(str.getBytes("UTF-8"));
                        outputStream.write(10);
                    } else if (split.length > 5 && !split[5].equals(dk.netarkivet.harvester.webinterface.Constants.NoNextDate) && getDomainFromUrlPart(split[5]).equals(this.domain)) {
                        outputStream.write(str.getBytes("UTF-8"));
                        outputStream.write(10);
                    }
                    str = bufferedReader.readLine();
                }
            } finally {
                try {
                    bufferedReader.close();
                } catch (IOException e) {
                    log.warn("unable to close arcreader probably", e);
                }
            }
        } catch (IOException e2) {
            throw new IOFailure("Unable to process (w)arc record", e2);
        } catch (Throwable th) {
            th.printStackTrace();
            System.out.println("caused by line '" + str + "'");
            try {
                bufferedReader.close();
            } catch (IOException e3) {
                log.warn("unable to close arcreader probably", e3);
            }
        }
    }

    private String getDomainFromUrlPart(String str) {
        String str2 = null;
        try {
            str2 = DomainUtils.domainNameFromHostname(new FixedUURI(str, true).getReferencedHost());
        } catch (Exception e) {
            log.warn("Unable to extract a domain name from the url ' " + str + "' due to exception", e);
        }
        if (str2 == null) {
            str2 = "";
        }
        return str2;
    }

    public void finish(OutputStream outputStream) {
    }

    public String toString() {
        return getClass().getName() + ", with arguments: Domain = " + this.domain + ", Filter = " + getFilter();
    }
}
