package is.hi.bok.deduplicator;

import dk.netarkivet.common.utils.AllDocsCollector;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessResult;
import org.archive.modules.Processor;
import org.archive.modules.net.ServerCache;
import org.archive.modules.revisit.IdenticalPayloadDigestRevisit;
import org.archive.util.ArchiveUtils;
import org.archive.util.Base32;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:is/hi/bok/deduplicator/DeDuplicator.class */
public class DeDuplicator extends Processor implements InitializingBean {
    private static final String ATTR_INDEX_LOCATION = "index-location";
    private static final String ATTR_MATCHING_METHOD = "matching-method";
    public static final String ATTR_JUMP_TO = "jump-to";
    public static final String ATTR_ORIGIN = "origin";
    public static final String ATTR_EQUIVALENT = "try-equivalent";
    public static final String ATTR_MIME_FILTER = "mime-filter";
    public static final String DEFAULT_MIME_FILTER = "^text/.*";
    public static final String ATTR_FILTER_MODE = "filter-mode";
    public static final String ATTR_ANALYZE_MODE = "analyze-modes";
    public static final String ATTR_CHANGE_CONTENT_SIZE = "change-content-size";
    public static final String ATTR_STATS_PER_HOST = "stats-per-host";
    public static final String ATTR_ORIGIN_HANDLING = "origin-handling";
    public static final String ATTR_REVISIT_IN_WARCS = "revisit-in-warcs";
    protected ServerCache serverCache;
    protected IndexSearcher indexSearcher;
    protected IndexReader indexReader;
    protected boolean lookupByURL;
    protected boolean statsPerHost;
    protected boolean useOrigin;
    protected boolean useOriginFromIndex;
    protected Statistics stats;
    protected HashMap<String, Statistics> perHostStats;
    private static Logger logger = Logger.getLogger(DeDuplicator.class.getName());
    private static final MatchingMethod DEFAULT_MATCHING_METHOD = MatchingMethod.URL;
    public static final OriginHandling DEFAULT_ORIGIN_HANDLING = OriginHandling.NONE;

    /* loaded from: input_file:is/hi/bok/deduplicator/DeDuplicator$AnalysisMode.class */
    public enum AnalysisMode {
        NONE,
        TIMESTAMP,
        TIMESTAMP_AND_ETAG
    }

    /* loaded from: input_file:is/hi/bok/deduplicator/DeDuplicator$FilterMode.class */
    public enum FilterMode {
        BLACKLIST,
        WHITELIST
    }

    /* loaded from: input_file:is/hi/bok/deduplicator/DeDuplicator$MatchingMethod.class */
    public enum MatchingMethod {
        URL,
        DIGEST
    }

    /* loaded from: input_file:is/hi/bok/deduplicator/DeDuplicator$OriginHandling.class */
    public enum OriginHandling {
        NONE,
        PROCESSOR,
        INDEX
    }

    public DeDuplicator() {
        setMatchingMethod(DEFAULT_MATCHING_METHOD);
        setOrigin("");
        setTryEquivalent(false);
        setMimeFilter(DEFAULT_MIME_FILTER);
        setfilterMode(FilterMode.BLACKLIST);
        setAnalysisMode(AnalysisMode.TIMESTAMP);
        setChangeContentSize(false);
        setStatsPerHost(false);
        setOriginHandling(DEFAULT_ORIGIN_HANDLING);
        setRevisitInWarcs(Boolean.TRUE);
        this.indexSearcher = null;
        this.indexReader = null;
        this.lookupByURL = true;
        this.statsPerHost = false;
        this.useOrigin = false;
        this.useOriginFromIndex = false;
        this.stats = null;
        this.perHostStats = null;
    }

    public boolean getEnabled() {
        return super.getEnabled();
    }

    public void setEnabled(boolean z) {
        super.setEnabled(z);
    }

    public String getIndexLocation() {
        return (String) this.kp.get(ATTR_INDEX_LOCATION);
    }

    public void setIndexLocation(String str) {
        this.kp.put(ATTR_INDEX_LOCATION, str);
    }

    public MatchingMethod getMatchingMethod() {
        return (MatchingMethod) this.kp.get(ATTR_MATCHING_METHOD);
    }

    public void setMatchingMethod(MatchingMethod matchingMethod) {
        this.kp.put(ATTR_MATCHING_METHOD, matchingMethod);
    }

    public String getJumpTo() {
        return (String) this.kp.get(ATTR_JUMP_TO);
    }

    public void setJumpTo(String str) {
        this.kp.put(ATTR_JUMP_TO, str);
    }

    public String getOrigin() {
        return (String) this.kp.get(ATTR_ORIGIN);
    }

    public void setOrigin(String str) {
        this.kp.put(ATTR_ORIGIN, str);
    }

    public Boolean getTryEquivalent() {
        return (Boolean) this.kp.get(ATTR_EQUIVALENT);
    }

    public void setTryEquivalent(Boolean bool) {
        this.kp.put(ATTR_EQUIVALENT, bool);
    }

    public String getMimeFilter() {
        return (String) this.kp.get(ATTR_MIME_FILTER);
    }

    public void setMimeFilter(String str) {
        this.kp.put(ATTR_MIME_FILTER, str);
    }

    public FilterMode getFilterMode() {
        return (FilterMode) this.kp.get(ATTR_FILTER_MODE);
    }

    public Boolean getBlacklist() {
        return Boolean.valueOf(((FilterMode) this.kp.get(ATTR_FILTER_MODE)).equals(FilterMode.BLACKLIST));
    }

    public void setfilterMode(FilterMode filterMode) {
        this.kp.put(ATTR_FILTER_MODE, filterMode);
    }

    public boolean getAnalyzeTimestamp() {
        return ((AnalysisMode) this.kp.get(ATTR_ANALYZE_MODE)).equals(AnalysisMode.TIMESTAMP);
    }

    public void setAnalysisMode(AnalysisMode analysisMode) {
        this.kp.put(ATTR_ANALYZE_MODE, analysisMode);
    }

    public AnalysisMode getAnalysisMode() {
        return (AnalysisMode) this.kp.get(ATTR_ANALYZE_MODE);
    }

    public Boolean getChangeContentSize() {
        return (Boolean) this.kp.get(ATTR_CHANGE_CONTENT_SIZE);
    }

    public void setChangeContentSize(Boolean bool) {
        this.kp.put(ATTR_CHANGE_CONTENT_SIZE, bool);
    }

    public Boolean getStatsPerHost() {
        return (Boolean) this.kp.get(ATTR_STATS_PER_HOST);
    }

    public void setStatsPerHost(Boolean bool) {
        this.kp.put(ATTR_STATS_PER_HOST, bool);
    }

    public OriginHandling getOriginHandling() {
        return (OriginHandling) this.kp.get(ATTR_ORIGIN_HANDLING);
    }

    public void setOriginHandling(OriginHandling originHandling) {
        this.kp.put(ATTR_ORIGIN_HANDLING, originHandling);
    }

    public void setRevisitInWarcs(Boolean bool) {
        this.kp.put(ATTR_REVISIT_IN_WARCS, bool);
    }

    public Boolean getRevisitInWarcs() {
        return (Boolean) this.kp.get(ATTR_REVISIT_IN_WARCS);
    }

    public ServerCache getServerCache() {
        return this.serverCache;
    }

    @Autowired
    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }

    public void afterPropertiesSet() throws Exception {
        if (!getEnabled()) {
            logger.info(getClass().getName() + " disabled.");
            return;
        }
        String indexLocation = getIndexLocation();
        try {
            FSDirectory open = FSDirectory.open(new File(indexLocation));
            open.setReadChunkSize(open.getReadChunkSize() / 2);
            this.indexReader = DirectoryReader.open(open);
            this.indexSearcher = new IndexSearcher(this.indexReader);
            this.lookupByURL = getMatchingMethod() == MatchingMethod.URL;
            this.statsPerHost = getStatsPerHost().booleanValue();
            OriginHandling originHandling = getOriginHandling();
            if (originHandling != OriginHandling.NONE) {
                this.useOrigin = true;
                logger.fine("Use origin");
                if (originHandling == OriginHandling.INDEX) {
                    this.useOriginFromIndex = true;
                    logger.fine("Use origin from index");
                }
            }
            this.stats = new Statistics();
            if (this.statsPerHost) {
                this.perHostStats = new HashMap<>();
            }
        } catch (Exception e) {
            throw new IllegalArgumentException("Unable to find/open index at " + indexLocation, e);
        }
    }

    protected boolean shouldProcess(CrawlURI crawlURI) {
        if (!getEnabled()) {
            logger.finest("Not handling " + crawlURI.toString() + ", deduplication disabled.");
            return false;
        }
        if (!crawlURI.isSuccess()) {
            logger.finest("Not handling " + crawlURI.toString() + ", did not succeed.");
            return false;
        }
        if (crawlURI.isPrerequisite()) {
            logger.finest("Not handling " + crawlURI.toString() + ", prerequisite.");
            return false;
        }
        if (!crawlURI.toString().startsWith("http")) {
            logger.finest("Not handling " + crawlURI.toString() + ", non-http.");
            return false;
        }
        if (crawlURI.getContentType() == null) {
            logger.finest("Not handling " + crawlURI.toString() + ", missing content (mime) type");
            return false;
        }
        if (crawlURI.getContentType().matches(getMimeFilter()) == getBlacklist().booleanValue()) {
            logger.finest("Not handling " + crawlURI.toString() + ", excluded by mimefilter (" + crawlURI.getContentType() + ").");
            return false;
        }
        if (!crawlURI.isRevisit()) {
            return true;
        }
        logger.finest("Not handling " + crawlURI.toString() + ", already flagged as revisit.");
        return false;
    }

    protected void innerProcess(CrawlURI crawlURI) {
        throw new AssertionError();
    }

    private String getRefersToDate(Document document) {
        String str = document.get("date");
        String str2 = document.get(ATTR_ORIGIN);
        if (str2 != null && !str2.isEmpty()) {
            String[] split = str2.split(",");
            if (split.length == 3) {
                str = split[2];
            }
        }
        Date date = null;
        try {
            date = ArchiveDateConverter.getHeritrixDateFormat().parse(str);
        } catch (ParseException e) {
            logger.warning("Unable to parse the indexed date '" + str + "' as a 17-digit date: " + e);
        }
        String str3 = str;
        if (date != null) {
            str3 = ArchiveDateConverter.getWarcDateFormat().format(date);
        }
        return str3;
    }

    protected ProcessResult innerProcessResult(CrawlURI crawlURI) throws InterruptedException {
        ProcessResult processResult = ProcessResult.PROCEED;
        logger.finest("Processing " + crawlURI.toString() + "(" + crawlURI.getContentType() + ")");
        this.stats.handledNumber++;
        this.stats.totalAmount += crawlURI.getContentSize();
        Statistics statistics = null;
        if (this.statsPerHost) {
            synchronized (this.perHostStats) {
                String hostName = getServerCache().getHostFor(crawlURI.getUURI()).getHostName();
                statistics = this.perHostStats.get(hostName);
                if (statistics == null) {
                    statistics = new Statistics();
                    this.perHostStats.put(hostName, statistics);
                }
            }
            statistics.handledNumber++;
            statistics.totalAmount += crawlURI.getContentSize();
        }
        Document lookupByURL = this.lookupByURL ? lookupByURL(crawlURI, statistics) : lookupByDigest(crawlURI, statistics);
        if (lookupByURL != null) {
            IdenticalPayloadDigestRevisit identicalPayloadDigestRevisit = new IdenticalPayloadDigestRevisit(lookupByURL.get("digest"));
            identicalPayloadDigestRevisit.setRefersToTargetURI(lookupByURL.get("url"));
            identicalPayloadDigestRevisit.setRefersToDate(getRefersToDate(lookupByURL));
            String str = lookupByURL.get("orig_record_id");
            if (str != null && !str.isEmpty()) {
                identicalPayloadDigestRevisit.setRefersToRecordID(str);
            }
            this.stats.duplicateAmount += crawlURI.getContentSize();
            this.stats.duplicateNumber++;
            if (this.statsPerHost) {
                statistics.duplicateAmount += crawlURI.getContentSize();
                statistics.duplicateNumber++;
            }
            String jumpTo = getJumpTo();
            if (jumpTo != null) {
                processResult = ProcessResult.jump(jumpTo);
            }
            String str2 = "duplicate";
            if (this.useOrigin) {
                if (!this.useOriginFromIndex || lookupByURL.get(ATTR_ORIGIN) == null) {
                    String origin = getOrigin();
                    if (origin != null && origin.trim().length() > 0) {
                        str2 = str2 + ":\"" + origin + "\"";
                    }
                } else {
                    str2 = str2 + ":\"" + lookupByURL.get(ATTR_ORIGIN) + "\"";
                }
            }
            crawlURI.getAnnotations().add(str2);
            if (getRevisitInWarcs().booleanValue()) {
                crawlURI.setRevisitProfile(identicalPayloadDigestRevisit);
            }
        }
        if (getAnalyzeTimestamp()) {
            doAnalysis(crawlURI, statistics, lookupByURL != null);
        }
        return processResult;
    }

    protected Document lookupByURL(CrawlURI crawlURI, Statistics statistics) {
        try {
            Query queryField = queryField("url", crawlURI.toString());
            AllDocsCollector allDocsCollector = new AllDocsCollector();
            this.indexSearcher.search(queryField, allDocsCollector);
            List hits = allDocsCollector.getHits();
            String digestAsString = getDigestAsString(crawlURI);
            if (hits != null && hits.size() > 0) {
                Iterator it = hits.iterator();
                while (it.hasNext()) {
                    Document doc = this.indexSearcher.doc(((ScoreDoc) it.next()).doc);
                    if (doc.get("digest").equalsIgnoreCase(digestAsString)) {
                        this.stats.exactURLDuplicates++;
                        if (this.statsPerHost) {
                            statistics.exactURLDuplicates++;
                        }
                        logger.finest("Found exact match for " + crawlURI.toString());
                        return doc;
                    }
                }
            }
            if (getTryEquivalent().booleanValue()) {
                String stripURL = DigestIndexer.stripURL(crawlURI.toString());
                Query queryField2 = queryField("url-normalized", stripURL);
                allDocsCollector.reset();
                this.indexSearcher.search(queryField2, allDocsCollector);
                Iterator it2 = allDocsCollector.getHits().iterator();
                while (it2.hasNext()) {
                    Document doc2 = this.indexSearcher.doc(((ScoreDoc) it2.next()).doc);
                    if (doc2.get("digest").equals(digestAsString)) {
                        String str = doc2.get("url");
                        crawlURI.getAnnotations().add("equivalentURL:\"" + str + "\"");
                        this.stats.equivalentURLDuplicates++;
                        if (this.statsPerHost) {
                            statistics.equivalentURLDuplicates++;
                        }
                        logger.finest("Found equivalent match for " + crawlURI.toString() + ". Normalized: " + stripURL + ". Equivalent to: " + str);
                        return doc2;
                    }
                }
            }
            return null;
        } catch (IOException e) {
            logger.log(Level.SEVERE, "Error accessing index.", (Throwable) e);
            return null;
        }
    }

    protected Document lookupByDigest(CrawlURI crawlURI, Statistics statistics) {
        Document document = null;
        byte[] contentDigest = crawlURI.getContentDigest();
        if (contentDigest == null) {
            logger.warning("Digest received from CrawlURI is null. Null Document returned");
            return null;
        }
        Query queryField = queryField("digest", Base32.encode(contentDigest));
        try {
            AllDocsCollector allDocsCollector = new AllDocsCollector();
            this.indexSearcher.search(queryField, allDocsCollector);
            List hits = allDocsCollector.getHits();
            StringBuffer stringBuffer = new StringBuffer();
            stringBuffer.append("mirrors: ");
            if (hits != null && hits.size() > 0) {
                Iterator it = hits.iterator();
                while (it.hasNext() && document == null) {
                    Document doc = this.indexSearcher.doc(((ScoreDoc) it.next()).doc);
                    String str = doc.get("url");
                    if (crawlURI.toString().equals(str)) {
                        document = doc;
                        this.stats.exactURLDuplicates++;
                        if (this.statsPerHost) {
                            statistics.exactURLDuplicates++;
                        }
                        logger.finest("Found exact match for " + crawlURI.toString());
                    }
                    if (document == null && getTryEquivalent().booleanValue()) {
                        String stripURL = DigestIndexer.stripURL(crawlURI.toString());
                        if (stripURL.equals(doc.get("url-normalized"))) {
                            document = doc;
                            this.stats.equivalentURLDuplicates++;
                            if (this.statsPerHost) {
                                statistics.equivalentURLDuplicates++;
                            }
                            crawlURI.getAnnotations().add("equivalentURL:\"" + str + "\"");
                            logger.finest("Found equivalent match for " + crawlURI.toString() + ". Normalized: " + stripURL + ". Equivalent to: " + str);
                        }
                    }
                    if (document == null) {
                        stringBuffer.append(str + " ");
                    }
                }
                if (document == null) {
                    this.stats.mirrorNumber++;
                    if (this.statsPerHost) {
                        statistics.mirrorNumber++;
                    }
                    logger.log(Level.FINEST, "Found mirror URLs for " + crawlURI.toString() + ". " + ((Object) stringBuffer));
                }
            }
        } catch (IOException e) {
            logger.log(Level.SEVERE, "Error accessing index.", (Throwable) e);
        }
        return document;
    }

    public String report() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Processor: is.hi.bok.digest.DeDuplicator\n");
        stringBuffer.append("  Function:          Abort processing of duplicate records\n");
        if (!getEnabled()) {
            stringBuffer.append("Processor is disabled by configuration");
            stringBuffer.append("\n");
            return stringBuffer.toString();
        }
        stringBuffer.append("                     - Lookup by " + (this.lookupByURL ? "url" : "digest") + " in use\n");
        stringBuffer.append("  Total handled:     " + this.stats.handledNumber + "\n");
        stringBuffer.append("  Duplicates found:  " + this.stats.duplicateNumber + " " + getPercentage(this.stats.duplicateNumber, this.stats.handledNumber) + "\n");
        stringBuffer.append("  Bytes total:       " + this.stats.totalAmount + " (" + ArchiveUtils.formatBytesForDisplay(this.stats.totalAmount) + ")\n");
        stringBuffer.append("  Bytes discarded:   " + this.stats.duplicateAmount + " (" + ArchiveUtils.formatBytesForDisplay(this.stats.duplicateAmount) + ") " + getPercentage(this.stats.duplicateAmount, this.stats.totalAmount) + "\n");
        stringBuffer.append("  New (no hits):     " + (this.stats.handledNumber - ((this.stats.mirrorNumber + this.stats.exactURLDuplicates) + this.stats.equivalentURLDuplicates)) + "\n");
        stringBuffer.append("  Exact hits:        " + this.stats.exactURLDuplicates + "\n");
        stringBuffer.append("  Equivalent hits:   " + this.stats.equivalentURLDuplicates + "\n");
        if (!this.lookupByURL) {
            stringBuffer.append("  Mirror hits:       " + this.stats.mirrorNumber + "\n");
        }
        if (getAnalyzeTimestamp()) {
            stringBuffer.append("  Timestamp predicts: (Where exact URL existed in the index)\n");
            stringBuffer.append("  Change correctly:  " + this.stats.timestampChangeCorrect + "\n");
            stringBuffer.append("  Change falsely:     " + this.stats.timestampChangeFalse + "\n");
            stringBuffer.append("  Non-change correct:" + this.stats.timestampNoChangeCorrect + "\n");
            stringBuffer.append("  Non-change falsely: " + this.stats.timestampNoChangeFalse + "\n");
            stringBuffer.append("  Missing timpestamp:" + this.stats.timestampMissing + "\n");
        }
        if (this.statsPerHost) {
            stringBuffer.append("  [Host] [total] [duplicates] [bytes] [bytes discarded] [new] [exact] [equiv]");
            if (!this.lookupByURL) {
                stringBuffer.append(" [mirror]");
            }
            if (getAnalyzeTimestamp()) {
                stringBuffer.append(" [change correct] [change falsely]");
                stringBuffer.append(" [non-change correct] [non-change falsely]");
                stringBuffer.append(" [no timestamp]");
            }
            stringBuffer.append("\n");
            synchronized (this.perHostStats) {
                for (String str : this.perHostStats.keySet()) {
                    Statistics statistics = this.perHostStats.get(str);
                    stringBuffer.append("  " + str);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.handledNumber);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.duplicateNumber);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.totalAmount);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.duplicateAmount);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.handledNumber - ((statistics.mirrorNumber + statistics.exactURLDuplicates) + statistics.equivalentURLDuplicates));
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.exactURLDuplicates);
                    stringBuffer.append(" ");
                    stringBuffer.append(statistics.equivalentURLDuplicates);
                    if (!this.lookupByURL) {
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.mirrorNumber);
                    }
                    if (getAnalyzeTimestamp()) {
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.timestampChangeCorrect);
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.timestampChangeFalse);
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.timestampNoChangeCorrect);
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.timestampNoChangeFalse);
                        stringBuffer.append(" ");
                        stringBuffer.append(statistics.timestampMissing);
                    }
                    stringBuffer.append("\n");
                }
            }
        }
        stringBuffer.append("\n");
        return stringBuffer.toString();
    }

    protected static String getPercentage(double d, double d2) {
        String d3 = Double.toString((d / d2) * 100.0d);
        int indexOf = d3.indexOf(46);
        if (indexOf + 3 < d3.length()) {
            d3 = d3.substring(0, indexOf + 3);
        }
        return d3 + "%";
    }

    private static String getDigestAsString(CrawlURI crawlURI) {
        byte[] contentDigest = crawlURI.getContentDigest();
        if (contentDigest != null) {
            return Base32.encode(contentDigest);
        }
        return null;
    }

    protected void doAnalysis(CrawlURI crawlURI, Statistics statistics, boolean z) {
        try {
            Query queryField = queryField("url", crawlURI.toString());
            AllDocsCollector allDocsCollector = new AllDocsCollector();
            this.indexSearcher.search(queryField, allDocsCollector);
            List hits = allDocsCollector.getHits();
            if (hits != null && hits.size() > 0) {
                Document document = null;
                Iterator it = hits.iterator();
                while (it.hasNext()) {
                    Document doc = this.indexSearcher.doc(((ScoreDoc) it.next()).doc);
                    String str = doc.get("date");
                    if (document == null || document.get("date").compareTo(str) > 0) {
                        document = doc;
                    }
                }
                doTimestampAnalysis(crawlURI, document, statistics, z);
            }
        } catch (IOException e) {
            logger.log(Level.SEVERE, "Error accessing index.", (Throwable) e);
        }
    }

    protected void doTimestampAnalysis(CrawlURI crawlURI, Document document, Statistics statistics, boolean z) {
        if (crawlURI.getHttpResponseHeader("last-modified") == null) {
            this.stats.timestampMissing++;
            if (this.statsPerHost) {
                statistics.timestampMissing++;
                logger.finest("Missing timestamp on " + crawlURI.toString());
                return;
            }
            return;
        }
        try {
            Date parse = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH).parse(crawlURI.getHttpResponseHeader("last-modified"));
            try {
                Date parse2 = new SimpleDateFormat(ArchiveDateConverter.HERITRIX_DATE_FORMAT).parse(document.get("date"));
                if (parse.after(parse2)) {
                    if (z) {
                        this.stats.timestampChangeFalse++;
                        if (this.statsPerHost) {
                            statistics.timestampChangeFalse++;
                        }
                        logger.finest("Last-modified falsly predicts change on " + crawlURI.toString());
                        return;
                    }
                    this.stats.timestampChangeCorrect++;
                    if (this.statsPerHost) {
                        statistics.timestampChangeCorrect++;
                    }
                    logger.finest("Last-modified correctly predicts change on " + crawlURI.toString());
                    return;
                }
                if (z) {
                    this.stats.timestampNoChangeCorrect++;
                    if (this.statsPerHost) {
                        statistics.timestampNoChangeCorrect++;
                    }
                    logger.finest("Last-modified correctly predicts no-change on " + crawlURI.toString());
                    return;
                }
                logger.log(Level.INFO, "Last-modified incorrectly indicated no-change on " + crawlURI.toString() + " " + crawlURI.getContentType() + ". last-modified: " + parse + ". Last fetched: " + parse2);
                this.stats.timestampNoChangeFalse++;
                if (this.statsPerHost) {
                    statistics.timestampNoChangeFalse++;
                }
            } catch (ParseException e) {
                logger.log(Level.WARNING, "Exception parsing indexed date for " + document.get("url"), (Throwable) e);
            }
        } catch (ParseException e2) {
            logger.log(Level.INFO, "Exception parsing last modified of " + crawlURI.toString(), (Throwable) e2);
        }
    }

    protected Query queryField(String str, String str2) {
        BytesRef bytesRef = new BytesRef(str2.getBytes());
        return new ConstantScoreQuery(new TermRangeFilter(str, bytesRef, bytesRef, true, true));
    }
}
