package org.archive.modules.recrawl.wbm;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.ByteBuffer;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessResult;
import org.archive.modules.Processor;
import org.archive.modules.recrawl.FetchHistoryHelper;
import org.archive.modules.recrawl.hbase.SingleColumnJsonRecrawlDataSchema;
import org.archive.util.ArchiveUtils;
import org.archive.util.DateUtils;

/* loaded from: input_file:org/archive/modules/recrawl/wbm/WbmPersistLoadProcessor.class */
public class WbmPersistLoadProcessor extends Processor {
    private static final Log log = LogFactory.getLog(WbmPersistLoadProcessor.class);
    private HttpClient client;
    private PoolingHttpClientConnectionManager conman;
    private FormatSegment[] preparedQueryURL;
    private int historyLength = 2;
    private String queryURL = "http://wwwb-dedup.us.archive.org:8083/web/timemap/cdx?url=$u&limit=-1";
    private String contentDigestScheme = SingleColumnJsonRecrawlDataSchema.CONTENT_DIGEST_SCHEME;
    private int socketTimeout = 10000;
    private int connectionTimeout = 10000;
    private int maxConnections = 10;
    private boolean gzipAccepted = false;
    private Map<String, String> requestHeaders = new ConcurrentHashMap(1, 0.75f, 2);
    private AtomicLong loadedCount = new AtomicLong();
    private AtomicLong missedCount = new AtomicLong();
    private AtomicLong errorCount = new AtomicLong();
    private AtomicLong cumulativeFetchTime = new AtomicLong();
    private long queryRangeSecs = 15552000;

    /* loaded from: input_file:org/archive/modules/recrawl/wbm/WbmPersistLoadProcessor$FormatSegment.class */
    public interface FormatSegment {
        void print(StringBuilder sb, String[] strArr);
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/archive/modules/recrawl/wbm/WbmPersistLoadProcessor$InterpolateSegment.class */
    public static class InterpolateSegment implements FormatSegment {
        int aidx;

        public InterpolateSegment(int i) {
            this.aidx = i;
        }

        @Override // org.archive.modules.recrawl.wbm.WbmPersistLoadProcessor.FormatSegment
        public void print(StringBuilder sb, String[] strArr) {
            sb.append(strArr[this.aidx]);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/archive/modules/recrawl/wbm/WbmPersistLoadProcessor$StaticSegment.class */
    public static class StaticSegment implements FormatSegment {
        String s;

        public StaticSegment(String str) {
            this.s = str;
        }

        @Override // org.archive.modules.recrawl.wbm.WbmPersistLoadProcessor.FormatSegment
        public void print(StringBuilder sb, String[] strArr) {
            sb.append(this.s);
        }
    }

    public void setHistoryLength(int i) {
        this.historyLength = i;
    }

    public int getHistoryLength() {
        return this.historyLength;
    }

    public void setQueryURL(String str) {
        this.queryURL = str;
        prepareQueryURL();
    }

    public String getQueryURL() {
        return this.queryURL;
    }

    private void prepareQueryURL() {
        int i;
        int indexOf;
        ArrayList arrayList = new ArrayList();
        int length = this.queryURL.length();
        int i2 = 0;
        while (true) {
            i = i2;
            if (i >= length || (indexOf = this.queryURL.indexOf(36, i)) < 0 || indexOf + 2 > length) {
                break;
            }
            if (indexOf > i) {
                arrayList.add(new StaticSegment(this.queryURL.substring(i, indexOf)));
            }
            char charAt = this.queryURL.charAt(indexOf + 1);
            if (charAt == 'u') {
                arrayList.add(new InterpolateSegment(0));
            } else if (charAt == 's') {
                arrayList.add(new InterpolateSegment(1));
            } else {
                arrayList.add(new StaticSegment(this.queryURL.substring(indexOf, indexOf + 2)));
            }
            i2 = indexOf + 2;
        }
        if (i < length) {
            arrayList.add(new StaticSegment(this.queryURL.substring(i)));
        }
        this.preparedQueryURL = (FormatSegment[]) arrayList.toArray(new FormatSegment[arrayList.size()]);
    }

    public void setContentDigestScheme(String str) {
        this.contentDigestScheme = str;
    }

    public String getContentDigestScheme() {
        return this.contentDigestScheme;
    }

    public void setSocketTimeout(int i) {
        this.socketTimeout = i;
    }

    public int getSocketTimeout() {
        return this.socketTimeout;
    }

    public void setConnectionTimeout(int i) {
        this.connectionTimeout = i;
    }

    public int getConnectionTimeout() {
        return this.connectionTimeout;
    }

    public int getMaxConnections() {
        return this.maxConnections;
    }

    public synchronized void setMaxConnections(int i) {
        this.maxConnections = i;
        if (this.conman != null) {
            if (this.conman.getMaxTotal() < this.maxConnections) {
                this.conman.setMaxTotal(this.maxConnections);
            }
            this.conman.setDefaultMaxPerRoute(this.maxConnections);
        }
    }

    public boolean isGzipAccepted() {
        return this.gzipAccepted;
    }

    public void setGzipAccepted(boolean z) {
        this.gzipAccepted = z;
    }

    public Map<String, String> getRequestHeaders() {
        return this.requestHeaders;
    }

    public void setRequestHeaders(Map<String, String> map) {
        if (map == null) {
            this.requestHeaders.clear();
            return;
        }
        ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap(1, 0.75f, 2);
        concurrentHashMap.putAll(map);
        this.requestHeaders = concurrentHashMap;
    }

    public long getLoadedCount() {
        return this.loadedCount.get();
    }

    public long getMissedCount() {
        return this.missedCount.get();
    }

    public long getErrorCount() {
        return this.errorCount.get();
    }

    public long getCumulativeFetchTime() {
        return this.cumulativeFetchTime.get();
    }

    public void setHttpClient(HttpClient httpClient) {
        this.client = httpClient;
    }

    private static boolean contains(HeaderElement[] headerElementArr, String str) {
        for (HeaderElement headerElement : headerElementArr) {
            if (headerElement.getName().equalsIgnoreCase(str)) {
                return true;
            }
        }
        return false;
    }

    public synchronized HttpClient getHttpClient() {
        if (this.client == null) {
            if (this.conman == null) {
                this.conman = new PoolingHttpClientConnectionManager();
                this.conman.setDefaultMaxPerRoute(this.maxConnections);
                this.conman.setMaxTotal(Math.max(this.conman.getMaxTotal(), this.maxConnections));
            }
            HttpClientBuilder connectionManager = HttpClientBuilder.create().disableCookieManagement().setConnectionManager(this.conman);
            connectionManager.useSystemProperties();
            connectionManager.addInterceptorLast(new HttpRequestInterceptor() { // from class: org.archive.modules.recrawl.wbm.WbmPersistLoadProcessor.1
                @Override // org.apache.http.HttpRequestInterceptor
                public void process(HttpRequest httpRequest, HttpContext httpContext) {
                    if (WbmPersistLoadProcessor.this.requestHeaders != null) {
                        for (Map.Entry entry : WbmPersistLoadProcessor.this.requestHeaders.entrySet()) {
                            httpRequest.addHeader((String) entry.getKey(), (String) entry.getValue());
                        }
                    }
                }
            });
            this.client = connectionManager.build();
        }
        return this.client;
    }

    public void setQueryRangeSecs(long j) {
        this.queryRangeSecs = j;
    }

    public long getQueryRangeSecs() {
        return this.queryRangeSecs;
    }

    private String buildStartDate() {
        long j = this.queryRangeSecs;
        return j <= 0 ? ArchiveUtils.get14DigitDate(new Date(0L)) : ArchiveUtils.get14DigitDate(new Date(new Date().getTime() - (j * 1000)));
    }

    protected String buildURL(String str) {
        String str2;
        StringBuilder sb = new StringBuilder();
        FormatSegment[] formatSegmentArr = this.preparedQueryURL;
        try {
            str2 = URLEncoder.encode(str, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            str2 = str;
        }
        String[] strArr = {str2, buildStartDate()};
        for (FormatSegment formatSegment : formatSegmentArr) {
            formatSegment.print(sb, strArr);
        }
        return sb.toString();
    }

    public WbmPersistLoadProcessor() {
        prepareQueryURL();
    }

    protected InputStream getCDX(String str) throws InterruptedException, IOException {
        String buildURL = buildURL(str);
        HttpGet httpGet = new HttpGet(buildURL);
        httpGet.setConfig(RequestConfig.custom().setConnectTimeout(this.connectionTimeout).setSocketTimeout(this.socketTimeout).build());
        HttpEntity httpEntity = null;
        int i = 0;
        while (!Thread.interrupted()) {
            if (i > 0) {
                Thread.sleep(5000L);
            }
            try {
                long currentTimeMillis = System.currentTimeMillis();
                HttpResponse execute = getHttpClient().execute(httpGet);
                this.cumulativeFetchTime.addAndGet(System.currentTimeMillis() - currentTimeMillis);
                StatusLine statusLine = execute.getStatusLine();
                if (statusLine.getStatusCode() != 200) {
                    log.error("GET " + buildURL + " failed with status=" + statusLine.getStatusCode() + " " + statusLine.getReasonPhrase());
                    execute.getEntity().getContent().close();
                    httpEntity = null;
                } else {
                    httpEntity = execute.getEntity();
                }
            } catch (IOException e) {
                log.error("GEt " + buildURL + " failed with error " + e.getMessage());
            } catch (Exception e2) {
                log.error("GET " + buildURL + " failed with error ", e2);
            }
            if (httpEntity == null) {
                i++;
                if (i >= 3) {
                }
            }
            if (httpEntity == null) {
                throw new IOException("giving up on GET " + buildURL + " after " + i + " attempts");
            }
            return httpEntity.getContent();
        }
        throw new InterruptedException("interrupted while GET " + buildURL);
    }

    protected ProcessResult innerProcessResult(CrawlURI crawlURI) throws InterruptedException {
        try {
            InputStream cdx = getCDX(crawlURI.toString());
            HashMap<String, Object> hashMap = null;
            try {
                try {
                    hashMap = getLastCrawl(cdx);
                    if (cdx != null) {
                        ArchiveUtils.closeQuietly(cdx);
                    }
                } catch (IOException e) {
                    log.error("error parsing response", e);
                    if (cdx != null) {
                        ArchiveUtils.closeQuietly(cdx);
                    }
                }
                if (hashMap != null) {
                    Map<String, Object> fetchHistory = FetchHistoryHelper.getFetchHistory(crawlURI, ((Long) hashMap.get(FetchHistoryHelper.A_TIMESTAMP)).longValue(), this.historyLength);
                    if (fetchHistory != null) {
                        fetchHistory.putAll(hashMap);
                    }
                    this.loadedCount.incrementAndGet();
                } else {
                    this.missedCount.incrementAndGet();
                }
                return ProcessResult.PROCEED;
            } catch (Throwable th) {
                if (cdx != null) {
                    ArchiveUtils.closeQuietly(cdx);
                }
                throw th;
            }
        } catch (IOException e2) {
            log.error(e2.getMessage());
            this.errorCount.incrementAndGet();
            return ProcessResult.PROCEED;
        }
    }

    protected HashMap<String, Object> getLastCrawl(InputStream inputStream) throws IOException {
        int read;
        ByteBuffer allocate = ByteBuffer.allocate(32);
        ByteBuffer allocate2 = ByteBuffer.allocate(14);
        int i = 0;
        do {
            read = inputStream.read();
            if (i == 1) {
                allocate2.clear();
                while (Character.isDigit(read) && allocate2.remaining() > 0) {
                    allocate2.put((byte) read);
                    read = inputStream.read();
                }
                if (read != 32 || allocate2.position() != 14) {
                    allocate2.clear();
                }
            } else if (i == 5) {
                allocate.clear();
                while (true) {
                    if (((read < 65 || read > 90) && (read < 48 || read > 57)) || allocate.remaining() <= 0) {
                        break;
                    }
                    allocate.put((byte) read);
                    read = inputStream.read();
                }
                if (read != 32 || allocate.position() != 32) {
                    allocate.clear();
                }
            }
            while (true) {
                if (read == -1) {
                    break;
                }
                if (read == 10) {
                    i = 0;
                    break;
                }
                if (read == 32) {
                    i++;
                    break;
                }
                read = inputStream.read();
            }
        } while (read != -1);
        HashMap<String, Object> hashMap = new HashMap<>();
        if (allocate.remaining() == 0) {
            hashMap.put("content-digest", this.contentDigestScheme + new String(allocate.array()));
        }
        if (allocate2.remaining() == 0) {
            try {
                long time = DateUtils.parse14DigitDate(new String(allocate2.array())).getTime();
                hashMap.put(FetchHistoryHelper.A_TIMESTAMP, Long.valueOf(time));
                hashMap.put("fetch-began-time", Long.valueOf(time));
            } catch (ParseException e) {
            }
        }
        if (hashMap.isEmpty()) {
            return null;
        }
        return hashMap;
    }

    protected void innerProcess(CrawlURI crawlURI) throws InterruptedException {
    }

    protected boolean shouldProcess(CrawlURI crawlURI) {
        String scheme = crawlURI.getUURI().getScheme();
        return scheme.equals("http") || scheme.equals("https");
    }

    public static void main(String[] strArr) throws Exception {
        String str = strArr[0];
        String str2 = strArr.length > 1 ? strArr[1] : null;
        WbmPersistLoadProcessor wbmPersistLoadProcessor = new WbmPersistLoadProcessor();
        if (str2 != null) {
            wbmPersistLoadProcessor.setRequestHeaders(Collections.singletonMap("Cookie", str2));
        }
        InputStream cdx = wbmPersistLoadProcessor.getCDX(str);
        byte[] bArr = new byte[1024];
        while (true) {
            int read = cdx.read(bArr);
            if (read <= 0) {
                cdx.close();
                return;
            }
            System.out.write(bArr, 0, read);
        }
    }
}
