package org.archive.wayback.resourceindex.cdxserver;

import java.io.IOException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.httpclient.URIException;
import org.archive.cdxserver.CDXQuery;
import org.archive.cdxserver.CDXServer;
import org.archive.cdxserver.auth.AuthToken;
import org.archive.format.cdx.CDXFieldConstants;
import org.archive.format.cdx.CDXInputSource;
import org.archive.format.cdx.MultiCDXInputSource;
import org.archive.format.cdx.StandardCDXLineFactory;
import org.archive.format.gzip.zipnum.ZipNumParams;
import org.archive.url.UrlSurtRangeComputer;
import org.archive.util.binsearch.SeekableLineReaderIterator;
import org.archive.util.binsearch.impl.HTTPSeekableLineReader;
import org.archive.util.binsearch.impl.HTTPSeekableLineReaderFactory;
import org.archive.util.binsearch.impl.http.ApacheHttp31SLRFactory;
import org.archive.util.io.RuntimeIOException;
import org.archive.util.iterator.CloseableIterator;
import org.archive.util.iterator.SortedCompositeIterator;
import org.archive.wayback.ResourceIndex;
import org.archive.wayback.UrlCanonicalizer;
import org.archive.wayback.core.CaptureSearchResult;
import org.archive.wayback.core.CaptureSearchResults;
import org.archive.wayback.core.SearchResults;
import org.archive.wayback.core.WaybackRequest;
import org.archive.wayback.exception.AccessControlException;
import org.archive.wayback.exception.BadQueryException;
import org.archive.wayback.exception.ResourceIndexNotAvailableException;
import org.archive.wayback.exception.ResourceNotInArchiveException;
import org.archive.wayback.exception.WaybackException;
import org.archive.wayback.memento.MementoConstants;
import org.archive.wayback.memento.MementoHandler;
import org.archive.wayback.memento.MementoUtils;
import org.archive.wayback.resourceindex.filters.SelfRedirectFilter;
import org.archive.wayback.util.webapp.AbstractRequestHandler;
import org.archive.wayback.webapp.PerfStats;
import org.json.JSONException;
import org.json.JSONObject;
import org.springframework.web.bind.ServletRequestBindingException;
import org.springframework.web.servlet.tags.BindTag;

/* loaded from: input_file:WEB-INF/lib/openwayback-core-2.0.0.jar:org/archive/wayback/resourceindex/cdxserver/EmbeddedCDXServerIndex.class */
public class EmbeddedCDXServerIndex extends AbstractRequestHandler implements MementoHandler, ResourceIndex {
    private static final Logger LOGGER = Logger.getLogger(EmbeddedCDXServerIndex.class.getName());
    protected CDXServer cdxServer;
    protected SelfRedirectFilter selfRedirFilter;
    protected String remoteCdxPath;
    private String remoteAuthCookie;
    private String remoteAuthCookieIgnoreRobots;
    protected CDXInputSource extraSource;
    protected String preferContains;
    protected List<String> ignoreRobotPaths;
    protected String baseStatusRegexp;
    protected String baseStatusFilter;
    protected int timestampDedupLength = 0;
    protected int limit = 0;
    protected UrlCanonicalizer canonicalizer = null;
    private HTTPSeekableLineReaderFactory remoteCdxHttp = new ApacheHttp31SLRFactory();
    private StandardCDXLineFactory cdxLineFactory = new StandardCDXLineFactory("cdx11");
    protected boolean tryFuzzyMatch = false;

    /* loaded from: input_file:WEB-INF/lib/openwayback-core-2.0.0.jar:org/archive/wayback/resourceindex/cdxserver/EmbeddedCDXServerIndex$PerfStat.class */
    enum PerfStat {
        IndexLoad
    }

    public EmbeddedCDXServerIndex() {
        setBaseStatusRegexp("!(500|502|504)");
    }

    @Override // org.archive.wayback.ResourceIndex
    public SearchResults query(WaybackRequest waybackRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException {
        try {
            PerfStats.timeStart(PerfStat.IndexLoad);
            SearchResults doQuery = doQuery(waybackRequest);
            PerfStats.timeEnd(PerfStat.IndexLoad);
            return doQuery;
        } catch (Throwable th) {
            PerfStats.timeEnd(PerfStat.IndexLoad);
            throw th;
        }
    }

    protected AuthToken createAuthToken(WaybackRequest waybackRequest, String str) {
        APContextAuthToken aPContextAuthToken = new APContextAuthToken(waybackRequest.getAccessPoint());
        aPContextAuthToken.setAllCdxFieldsAllow();
        if (waybackRequest.isCSSContext() || waybackRequest.isIMGContext() || waybackRequest.isJSContext()) {
            aPContextAuthToken.setIgnoreRobots(true);
        }
        if (this.ignoreRobotPaths != null) {
            Iterator<String> it2 = this.ignoreRobotPaths.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                if (str.startsWith(it2.next())) {
                    aPContextAuthToken.setIgnoreRobots(true);
                    break;
                }
            }
        }
        return aPContextAuthToken;
    }

    public SearchResults doQuery(WaybackRequest waybackRequest) throws ResourceIndexNotAvailableException, ResourceNotInArchiveException, BadQueryException, AccessControlException {
        CDXToSearchResultWriter captureSearchWriter;
        CDXToCaptureSearchResultsWriter captureSearchWriter2;
        UrlCanonicalizer canonicalizer = getCanonicalizer();
        if (canonicalizer == null && this.selfRedirFilter != null) {
            canonicalizer = this.selfRedirFilter.getCanonicalizer();
        }
        if (canonicalizer == null) {
            throw new IllegalArgumentException("Unable to find canonicalizer, canonicalizer property or selfRedirFilter property must be set");
        }
        try {
            String urlStringToKey = canonicalizer.urlStringToKey(waybackRequest.getRequestUrl());
            AuthToken createAuthToken = createAuthToken(waybackRequest, urlStringToKey);
            if (waybackRequest.isReplayRequest() || waybackRequest.isCaptureQueryRequest()) {
                captureSearchWriter = getCaptureSearchWriter(waybackRequest, createAuthToken, false);
            } else {
                if (!waybackRequest.isUrlQueryRequest()) {
                    throw new BadQueryException("Unknown Query Type");
                }
                captureSearchWriter = getUrlSearchWriter(waybackRequest);
            }
            try {
                loadWaybackCdx(urlStringToKey, waybackRequest, captureSearchWriter.getQuery(), createAuthToken, captureSearchWriter, false);
                if (captureSearchWriter.getErrorMsg() != null) {
                    throw new BadQueryException(captureSearchWriter.getErrorMsg());
                }
                SearchResults searchResults = captureSearchWriter.getSearchResults();
                if (searchResults.getReturnedCount() == 0 && ((waybackRequest.isReplayRequest() || waybackRequest.isCaptureQueryRequest()) && this.tryFuzzyMatch && (captureSearchWriter2 = getCaptureSearchWriter(waybackRequest, createAuthToken, true)) != null)) {
                    loadWaybackCdx(urlStringToKey, waybackRequest, captureSearchWriter2.getQuery(), createAuthToken, captureSearchWriter2, true);
                    searchResults = captureSearchWriter2.getSearchResults();
                }
                if (searchResults.getReturnedCount() == 0) {
                    throw new ResourceNotInArchiveException(waybackRequest.getRequestUrl() + " was not found");
                }
                return searchResults;
            } catch (IOException e) {
                throw new ResourceIndexNotAvailableException(e.toString());
            } catch (RuntimeException e2) {
                Throwable cause = e2.getCause();
                if (cause instanceof AccessControlException) {
                    throw ((AccessControlException) cause);
                }
                if (cause instanceof IOException) {
                    throw new ResourceIndexNotAvailableException(cause.toString());
                }
                e2.printStackTrace();
                throw new ResourceIndexNotAvailableException(e2.toString());
            }
        } catch (URIException e3) {
            throw new BadQueryException(e3.toString());
        }
    }

    protected void loadWaybackCdx(String str, WaybackRequest waybackRequest, CDXQuery cDXQuery, AuthToken authToken, CDXToSearchResultWriter cDXToSearchResultWriter, boolean z) throws IOException, AccessControlException {
        if (this.remoteCdxPath != null && !waybackRequest.isUrlQueryRequest()) {
            try {
                waybackRequest.setTimestampSearchKey(false);
                remoteCdxServerQuery(str, cDXToSearchResultWriter.getQuery(), authToken, cDXToSearchResultWriter);
                return;
            } catch (IOException e) {
            } catch (RuntimeIOException e2) {
                Throwable cause = e2.getCause();
                if (cause instanceof AccessControlException) {
                    throw ((AccessControlException) cause);
                }
                LOGGER.warning(e2.toString());
            }
        }
        this.cdxServer.getCdx(cDXToSearchResultWriter.getQuery(), authToken, cDXToSearchResultWriter);
    }

    protected CDXQuery createQuery(WaybackRequest waybackRequest, boolean z) {
        CDXQuery cDXQuery = new CDXQuery(waybackRequest.getRequestUrl());
        cDXQuery.setLimit(this.limit);
        String str = this.baseStatusFilter;
        if (waybackRequest.isReplayRequest()) {
            if (waybackRequest.isBestLatestReplayRequest()) {
                str = "statuscode:[23]..";
            }
            if (waybackRequest.isTimestampSearchKey()) {
                cDXQuery.setClosest(waybackRequest.getReplayTimestamp());
            }
        } else if (waybackRequest.isCaptureQueryRequest()) {
            String startTimestamp = waybackRequest.getStartTimestamp();
            if (startTimestamp != null) {
                cDXQuery.setFrom(startTimestamp);
            }
            String endTimestamp = waybackRequest.getEndTimestamp();
            if (endTimestamp != null) {
                cDXQuery.setTo(endTimestamp);
            }
        }
        if (this.timestampDedupLength > 0) {
            cDXQuery.setCollapseTime(this.timestampDedupLength);
        }
        if (str != null && !str.isEmpty()) {
            cDXQuery.setFilter(new String[]{str});
        }
        return cDXQuery;
    }

    protected void remoteCdxServerQuery(String str, CDXQuery cDXQuery, AuthToken authToken, CDXToSearchResultWriter cDXToSearchResultWriter) throws IOException, AccessControlException {
        HTTPSeekableLineReader hTTPSeekableLineReader = null;
        this.cdxServer.getAuthChecker().createAccessFilter(authToken).includeUrl(str, cDXQuery.getUrl());
        try {
            StringBuilder sb = new StringBuilder(this.remoteCdxPath);
            sb.append("?url=");
            sb.append(URLEncoder.encode(cDXQuery.getUrl(), "UTF-8"));
            sb.append("&filter=");
            sb.append(URLEncoder.encode(cDXQuery.getFilter()[0], "UTF-8"));
            if (cDXQuery.getCollapseTime() > 0) {
                sb.append("&collapseTime=");
                sb.append(cDXQuery.getCollapseTime());
            }
            sb.append("&gzip=true");
            hTTPSeekableLineReader = this.remoteCdxHttp.get(sb.toString());
            if (this.remoteAuthCookie != null) {
                hTTPSeekableLineReader.setCookie("cdx_auth_token=" + ((!authToken.isIgnoreRobots() || this.remoteAuthCookieIgnoreRobots == null) ? this.remoteAuthCookie : this.remoteAuthCookieIgnoreRobots));
            }
            hTTPSeekableLineReader.setSaveErrHeader("X-Archive-Wayback-Runtime-Error");
            hTTPSeekableLineReader.seekWithMaxRead(0L, true, -1);
            CloseableIterator<String> createRemoteIter = createRemoteIter(str, hTTPSeekableLineReader);
            cDXToSearchResultWriter.begin();
            while (createRemoteIter.hasNext() && !cDXToSearchResultWriter.isAborted()) {
                cDXToSearchResultWriter.writeLine(this.cdxLineFactory.createStandardCDXLine(createRemoteIter.next(), StandardCDXLineFactory.cdx11));
            }
            cDXToSearchResultWriter.end();
            createRemoteIter.close();
            if (hTTPSeekableLineReader != null) {
                hTTPSeekableLineReader.close();
            }
        } catch (Throwable th) {
            if (hTTPSeekableLineReader != null) {
                hTTPSeekableLineReader.close();
            }
            throw th;
        }
    }

    protected CloseableIterator<String> createRemoteIter(String str, HTTPSeekableLineReader hTTPSeekableLineReader) throws IOException {
        SeekableLineReaderIterator seekableLineReaderIterator = new SeekableLineReaderIterator(hTTPSeekableLineReader);
        String headerValue = hTTPSeekableLineReader.getHeaderValue("X-Page-Cache");
        if (headerValue != null && headerValue.equals("HIT") && LOGGER.isLoggable(Level.FINE)) {
            LOGGER.fine("CACHED");
        }
        if (this.extraSource != null) {
            CloseableIterator<String> cDXIterator = this.extraSource.getCDXIterator(str, str, str, new ZipNumParams());
            if (cDXIterator.hasNext()) {
                SortedCompositeIterator sortedCompositeIterator = new SortedCompositeIterator(MultiCDXInputSource.defaultComparator);
                sortedCompositeIterator.addIterator(seekableLineReaderIterator);
                sortedCompositeIterator.addIterator(cDXIterator);
                return sortedCompositeIterator;
            }
        }
        return seekableLineReaderIterator;
    }

    protected CDXToCaptureSearchResultsWriter getCaptureSearchWriter(WaybackRequest waybackRequest, AuthToken authToken, boolean z) {
        CDXQuery createQuery = createQuery(waybackRequest, z);
        if (z && createQuery == null) {
            return null;
        }
        CDXToCaptureSearchResultsWriter cDXToCaptureSearchResultsWriter = new CDXToCaptureSearchResultsWriter(createQuery, waybackRequest.isReplayRequest(), false, this.preferContains);
        cDXToCaptureSearchResultsWriter.setTargetTimestamp(waybackRequest.getReplayTimestamp());
        cDXToCaptureSearchResultsWriter.setSelfRedirFilter(this.selfRedirFilter);
        return cDXToCaptureSearchResultsWriter;
    }

    protected CDXToSearchResultWriter getUrlSearchWriter(WaybackRequest waybackRequest) {
        CDXQuery cDXQuery = new CDXQuery(waybackRequest.getRequestUrl());
        cDXQuery.setCollapse(new String[]{"urlkey"});
        cDXQuery.setMatchType(UrlSurtRangeComputer.MatchType.prefix);
        cDXQuery.setShowGroupCount(true);
        cDXQuery.setShowUniqCount(true);
        cDXQuery.setLastSkipTimestamp(true);
        cDXQuery.setFl("urlkey,original,timestamp,endtimestamp,groupcount,uniqcount");
        return new CDXToUrlSearchResultWriter(cDXQuery);
    }

    @Override // org.archive.wayback.memento.MementoHandler
    public boolean renderMementoTimemap(WaybackRequest waybackRequest, HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws WaybackException, IOException {
        try {
            PerfStats.timeStart(PerfStat.IndexLoad);
            String mementoTimemapFormat = waybackRequest.getMementoTimemapFormat();
            if (mementoTimemapFormat != null && mementoTimemapFormat.equals("link")) {
                MementoUtils.printTimemapResponse((CaptureSearchResults) waybackRequest.getAccessPoint().queryIndex(waybackRequest), waybackRequest, httpServletResponse);
                PerfStats.timeEnd(PerfStat.IndexLoad);
                return true;
            }
            CDXQuery cDXQuery = new CDXQuery(waybackRequest.getRequestUrl());
            cDXQuery.setOutput(waybackRequest.getMementoTimemapFormat());
            String str = waybackRequest.get(MementoConstants.PAGE_STARTS);
            if (str != null) {
                cDXQuery.setFrom(str);
            }
            try {
                cDXQuery.fill(httpServletRequest);
            } catch (ServletRequestBindingException e) {
            }
            this.cdxServer.getCdx(httpServletRequest, httpServletResponse, cDXQuery);
            PerfStats.timeEnd(PerfStat.IndexLoad);
            return true;
        } catch (Throwable th) {
            PerfStats.timeEnd(PerfStat.IndexLoad);
            throw th;
        }
    }

    @Override // org.archive.wayback.util.webapp.RequestHandler
    public boolean handleRequest(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws ServletException, IOException {
        this.cdxServer.getCdx(httpServletRequest, httpServletResponse, new CDXQuery(httpServletRequest));
        return true;
    }

    @Override // org.archive.wayback.ResourceIndex
    public void shutdown() throws IOException {
    }

    public CDXServer getCdxServer() {
        return this.cdxServer;
    }

    public void setCdxServer(CDXServer cDXServer) {
        this.cdxServer = cDXServer;
    }

    public int getTimestampDedupLength() {
        return this.timestampDedupLength;
    }

    public void setTimestampDedupLength(int i) {
        this.timestampDedupLength = i;
    }

    public SelfRedirectFilter getSelfRedirFilter() {
        return this.selfRedirFilter;
    }

    public void setSelfRedirFilter(SelfRedirectFilter selfRedirectFilter) {
        this.selfRedirFilter = selfRedirectFilter;
    }

    public UrlCanonicalizer getCanonicalizer() {
        return this.canonicalizer;
    }

    public void setCanonicalizer(UrlCanonicalizer urlCanonicalizer) {
        this.canonicalizer = urlCanonicalizer;
    }

    public int getLimit() {
        return this.limit;
    }

    public void setLimit(int i) {
        this.limit = i;
    }

    @Override // org.archive.wayback.memento.MementoHandler
    public void addTimegateHeaders(HttpServletResponse httpServletResponse, CaptureSearchResults captureSearchResults, WaybackRequest waybackRequest, boolean z) {
        MementoUtils.addTimegateHeaders(httpServletResponse, captureSearchResults, waybackRequest, z);
        CaptureSearchResult closest = captureSearchResults.getClosest();
        JSONObject jSONObject = new JSONObject();
        JSONObject jSONObject2 = new JSONObject();
        try {
            jSONObject.put("wb_url", MementoUtils.getMementoPrefix(waybackRequest.getAccessPoint()) + waybackRequest.getAccessPoint().getUriConverter().makeReplayURI(closest.getCaptureTimestamp(), waybackRequest.getRequestUrl()));
            jSONObject.put(CDXFieldConstants.timestamp, closest.getCaptureTimestamp());
            jSONObject.put(BindTag.STATUS_VARIABLE_NAME, closest.getHttpCode());
            jSONObject2.put(CaptureSearchResult.CAPTURE_CLOSEST_INDICATOR, jSONObject);
        } catch (JSONException e) {
        }
        httpServletResponse.setHeader("X-Link-JSON", jSONObject2.toString().replace("\\/", "/"));
    }

    public String getRemoteCdxPath() {
        return this.remoteCdxPath;
    }

    public void setRemoteCdxPath(String str) {
        this.remoteCdxPath = str;
    }

    public String getRemoteAuthCookie() {
        return this.remoteAuthCookie;
    }

    public void setRemoteAuthCookie(String str) {
        this.remoteAuthCookie = str;
    }

    public String getRemoteAuthCookieIgnoreRobots() {
        return this.remoteAuthCookieIgnoreRobots;
    }

    public void setRemoteAuthCookieIgnoreRobots(String str) {
        this.remoteAuthCookieIgnoreRobots = str;
    }

    public HTTPSeekableLineReaderFactory getRemoteCdxHttp() {
        return this.remoteCdxHttp;
    }

    public void setRemoteCdxHttp(HTTPSeekableLineReaderFactory hTTPSeekableLineReaderFactory) {
        this.remoteCdxHttp = hTTPSeekableLineReaderFactory;
    }

    public CDXInputSource getExtraSource() {
        return this.extraSource;
    }

    public void setExtraSource(CDXInputSource cDXInputSource) {
        this.extraSource = cDXInputSource;
    }

    public String getPreferContains() {
        return this.preferContains;
    }

    public void setPreferContains(String str) {
        this.preferContains = str;
    }

    public List<String> getIgnoreRobotPaths() {
        return this.ignoreRobotPaths;
    }

    public void setIgnoreRobotPaths(List<String> list) {
        this.ignoreRobotPaths = list;
    }

    public boolean isTryFuzzyMatch() {
        return this.tryFuzzyMatch;
    }

    public void setTryFuzzyMatch(boolean z) {
        this.tryFuzzyMatch = z;
    }

    public String getBaseStatusRegexp() {
        return this.baseStatusRegexp;
    }

    public void setBaseStatusRegexp(String str) {
        this.baseStatusRegexp = str;
        this.baseStatusFilter = buildStatusFilter(str);
    }

    protected static String buildStatusFilter(String str) {
        if (str == null) {
            return "";
        }
        String trim = str.trim();
        if (trim.isEmpty()) {
            return "";
        }
        if (trim.charAt(0) != '!') {
            return "statuscode:" + trim;
        }
        String trim2 = trim.substring(1).trim();
        return trim2.isEmpty() ? "" : "!statuscode:" + trim2;
    }
}
