package org.archive.modules.fetcher;

import com.google.common.net.InternetDomainName;
import com.sleepycat.bind.tuple.IntegerBinding;
import com.sleepycat.bind.tuple.StringBinding;
import com.sleepycat.collections.StoredSortedMap;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseException;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.net.whois.WhoisClient;
import org.archive.bdb.BdbModule;
import org.archive.modules.CoreAttributeConstants;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessResult;
import org.archive.modules.Processor;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.Hop;
import org.archive.modules.extractor.LinkContext;
import org.archive.modules.net.CrawlHost;
import org.archive.modules.net.ServerCache;
import org.archive.util.Recorder;
import org.archive.util.TextUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.Lifecycle;

/* loaded from: input_file:org/archive/modules/fetcher/FetchWhois.class */
public class FetchWhois extends Processor implements CoreAttributeConstants, FetchStatusCodes, Lifecycle {
    private static final long serialVersionUID = 1;
    private static Logger logger;
    public static final String IP_ADDRESS_REGEX = "\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}";
    protected static final String DEFAULT_IP_WHOIS_SERVER = "whois.arin.net";
    protected static final String ULTRA_SUFFIX_WHOIS_SERVER = "whois.iana.org";
    protected static String WHOIS_SERVER_REGEX;
    private transient Database whoisDb;
    private transient StoredSortedMap<String, String> referralServers;
    private transient StoredSortedMap<String, Integer> urlProgress;
    protected BdbModule bdb;
    protected Map<String, String> specialQueryTemplates = new HashMap();
    private boolean isRunning;
    protected ServerCache serverCache;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/archive/modules/fetcher/FetchWhois$UrlStatus.class */
    public enum UrlStatus {
        IN_PROGRESS,
        DONE
    }

    public FetchWhois() {
        this.specialQueryTemplates.put("whois.verisign-grs.com", "domain %s");
        this.specialQueryTemplates.put(DEFAULT_IP_WHOIS_SERVER, "z + %s");
        this.specialQueryTemplates.put("whois.denic.de", "-T dn %s");
        setSoTimeoutMs(20000);
        this.isRunning = false;
    }

    @Autowired
    public void setBdbModule(BdbModule bdbModule) {
        this.bdb = bdbModule;
    }

    public void setSpecialQueryTemplates(Map<String, String> map) {
        this.specialQueryTemplates.clear();
        this.specialQueryTemplates.putAll(map);
    }

    public int getSoTimeoutMs() {
        return ((Integer) this.kp.get("soTimeoutMs")).intValue();
    }

    public void setSoTimeoutMs(int i) {
        this.kp.put("soTimeoutMs", Integer.valueOf(i));
    }

    @Override // org.archive.modules.Processor
    public void start() {
        if (isRunning()) {
            return;
        }
        try {
            BdbModule.BdbConfig bdbConfig = new BdbModule.BdbConfig();
            bdbConfig.setTransactional(false);
            boolean z = this.recoveryCheckpoint != null;
            bdbConfig.setAllowCreate(!z);
            this.whoisDb = this.bdb.openDatabase("whoisKnowledge", bdbConfig, z);
            this.referralServers = new StoredSortedMap<>(this.whoisDb, new StringBinding(), new StringBinding(), true);
            this.urlProgress = new StoredSortedMap<>(this.whoisDb, new StringBinding(), new IntegerBinding(), true);
            this.isRunning = true;
        } catch (DatabaseException e) {
            throw new RuntimeException((Throwable) e);
        }
    }

    @Override // org.archive.modules.Processor
    public boolean isRunning() {
        return this.isRunning;
    }

    @Override // org.archive.modules.Processor
    public void stop() {
        this.isRunning = false;
        this.bdb = null;
    }

    @Override // org.archive.modules.Processor
    protected ProcessResult innerProcessResult(CrawlURI crawlURI) throws InterruptedException {
        if (!crawlURI.getUURI().getScheme().equals("whois")) {
            addWhoisLinks(crawlURI);
            return ProcessResult.PROCEED;
        }
        crawlURI.setFetchBeginTime(System.currentTimeMillis());
        String whoisServer = getWhoisServer(crawlURI);
        String whoisQuery = getWhoisQuery(crawlURI);
        if (whoisServer == null) {
            return deferOrFinishGeneric(crawlURI, whoisQuery);
        }
        fetch(crawlURI, whoisServer, whoisQuery);
        return ProcessResult.PROCEED;
    }

    protected ProcessResult deferOrFinishGeneric(CrawlURI crawlURI, String str) {
        String makeWhoisUrl;
        String lowerCase = str.substring(str.lastIndexOf(46) + 1).toLowerCase();
        if (this.referralServers.containsKey(str)) {
            makeWhoisUrl = "whois://" + ((String) this.referralServers.get(str)) + '/' + str;
        } else if (TextUtils.getMatcher(IP_ADDRESS_REGEX, str).matches()) {
            makeWhoisUrl = makeWhoisUrl(DEFAULT_IP_WHOIS_SERVER, str);
        } else if (this.referralServers.containsKey(lowerCase)) {
            makeWhoisUrl = makeWhoisUrl((String) this.referralServers.get(lowerCase), str);
        } else {
            if (this.urlProgress.get(makeWhoisUrl(ULTRA_SUFFIX_WHOIS_SERVER, lowerCase)) != null) {
                logger.warning("apparently no whois server for \"" + str + "\"");
                crawlURI.setFetchStatus(-62);
                return ProcessResult.PROCEED;
            }
            makeWhoisUrl = makeWhoisUrl(ULTRA_SUFFIX_WHOIS_SERVER, lowerCase);
        }
        if (!$assertionsDisabled && makeWhoisUrl == null) {
            throw new AssertionError();
        }
        Integer num = (Integer) this.urlProgress.get(makeWhoisUrl);
        if (makeWhoisUrl == null || (num != null && num.intValue() == UrlStatus.DONE.ordinal())) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("finished with generic serverless whois uri " + crawlURI);
            }
            crawlURI.setFetchStatus(FetchStatusCodes.S_WHOIS_GENERIC_FINISHED);
            return ProcessResult.PROCEED;
        }
        if (num == null) {
            try {
                if (logger.isLoggable(Level.FINE)) {
                    logger.fine(crawlURI + " marking prerequisite " + makeWhoisUrl + " and deferring");
                }
                crawlURI.markPrerequisite(makeWhoisUrl).setForceFetch(false);
                this.urlProgress.put(makeWhoisUrl, Integer.valueOf(UrlStatus.IN_PROGRESS.ordinal()));
            } catch (URIException e) {
                throw new RuntimeException((Throwable) e);
            }
        } else {
            crawlURI.incrementDeferrals();
            crawlURI.setFetchStatus(-50);
            if (logger.isLoggable(Level.FINE)) {
                logger.fine(crawlURI + ": prerequisite " + makeWhoisUrl + " is in progress, deferring");
            }
        }
        return ProcessResult.FINISH;
    }

    protected String makeWhoisUrl(String str, String str2) {
        try {
            String str3 = this.specialQueryTemplates.get(str.toLowerCase());
            return "whois://" + str + "/" + URLEncoder.encode(str3 != null ? str3.replaceAll("%s", str2) : str2, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
    }

    protected void fetch(CrawlURI crawlURI, String str, String str2) {
        WhoisClient whoisClient = new WhoisClient();
        Recorder recorder = crawlURI.getRecorder();
        try {
            try {
                whoisClient.setConnectTimeout(getSoTimeoutMs());
                whoisClient.setDefaultTimeout(getSoTimeoutMs());
                if (crawlURI.getUURI().getPort() > 0) {
                    whoisClient.connect(str, crawlURI.getUURI().getPort());
                } else {
                    whoisClient.connect(str);
                }
                whoisClient.setSoTimeout(getSoTimeoutMs());
                crawlURI.setServerIP(whoisClient.getRemoteAddress().getHostAddress());
                recorder.inputWrap(whoisClient.getInputStream(str2));
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) recorder.getRecordedInput(), "ASCII"));
                for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                    Matcher matcher = TextUtils.getMatcher(WHOIS_SERVER_REGEX, readLine);
                    if (matcher.find()) {
                        String lowerCase = str2.replaceFirst("(\\S+\\s+)+", "").toLowerCase();
                        this.referralServers.put(lowerCase, matcher.group(1).toLowerCase());
                        if (logger.isLoggable(Level.FINE)) {
                            logger.fine("added referral server " + matcher.group(1) + " to server list for " + lowerCase);
                        }
                    }
                }
                crawlURI.setContentType("text/plain");
                crawlURI.setFetchStatus(FetchStatusCodes.S_WHOIS_SUCCESS);
                recorder.close();
                crawlURI.setContentSize(recorder.getRecordedInput().getSize());
                logger.fine(crawlURI + ": " + recorder.getRecordedInput().getSize() + " bytes read");
                if (whoisClient != null && whoisClient.isConnected()) {
                    try {
                        whoisClient.disconnect();
                    } catch (IOException e) {
                        logger.fine("problem closing connection to whois server for uri " + crawlURI + ": " + e);
                    }
                }
                this.urlProgress.put(crawlURI.toString(), Integer.valueOf(UrlStatus.DONE.ordinal()));
            } catch (IOException e2) {
                if (logger.isLoggable(Level.FINE)) {
                    logger.fine("failed to connect to whois server for uri " + crawlURI + ": " + e2);
                }
                crawlURI.getNonFatalFailures().add(e2);
                crawlURI.setFetchStatus(-2);
                recorder.close();
                crawlURI.setContentSize(recorder.getRecordedInput().getSize());
                logger.fine(crawlURI + ": " + recorder.getRecordedInput().getSize() + " bytes read");
                if (whoisClient != null && whoisClient.isConnected()) {
                    try {
                        whoisClient.disconnect();
                    } catch (IOException e3) {
                        logger.fine("problem closing connection to whois server for uri " + crawlURI + ": " + e3);
                    }
                }
                this.urlProgress.put(crawlURI.toString(), Integer.valueOf(UrlStatus.DONE.ordinal()));
            }
        } catch (Throwable th) {
            recorder.close();
            crawlURI.setContentSize(recorder.getRecordedInput().getSize());
            logger.fine(crawlURI + ": " + recorder.getRecordedInput().getSize() + " bytes read");
            if (whoisClient != null && whoisClient.isConnected()) {
                try {
                    whoisClient.disconnect();
                } catch (IOException e4) {
                    logger.fine("problem closing connection to whois server for uri " + crawlURI + ": " + e4);
                }
            }
            this.urlProgress.put(crawlURI.toString(), Integer.valueOf(UrlStatus.DONE.ordinal()));
            throw th;
        }
    }

    protected String getWhoisQuery(CrawlURI crawlURI) {
        try {
            return crawlURI.getUURI().getAuthority() == null ? crawlURI.getUURI().getPathQuery() : crawlURI.getUURI().getPathQuery().substring(1);
        } catch (URIException e) {
            logger.log(Level.SEVERE, "Failed to get path/query from uri " + crawlURI, e);
            return null;
        }
    }

    protected String getWhoisServer(CrawlURI crawlURI) {
        String str;
        try {
            str = crawlURI.getUURI().getHost();
            if (str != null) {
                if (str.length() == 0) {
                    str = null;
                }
            }
        } catch (URIException e) {
            logger.warning("Failed to get host from uri " + crawlURI + ": " + e);
            str = null;
        }
        return str;
    }

    @Override // org.archive.modules.Processor
    protected boolean shouldProcess(CrawlURI crawlURI) {
        return true;
    }

    public ServerCache getServerCache() {
        return this.serverCache;
    }

    @Autowired
    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }

    protected void addWhoisLink(CrawlURI crawlURI, String str) {
        String str2 = "whois:" + str;
        try {
            Extractor.add(crawlURI, Integer.MAX_VALUE, str2, LinkContext.INFERRED_MISC, Hop.INFERRED);
        } catch (URIException e) {
            logger.log(Level.WARNING, "problem with url " + str2, e);
        }
    }

    protected void addWhoisLinks(CrawlURI crawlURI) throws InterruptedException {
        CrawlHost hostFor = this.serverCache.getHostFor(crawlURI.getUURI());
        if (hostFor == null) {
            return;
        }
        if (hostFor.getIP() != null) {
            addWhoisLink(crawlURI, hostFor.getIP().getHostAddress());
        }
        if (InternetDomainName.isValid(hostFor.getHostName())) {
            try {
                addWhoisLink(crawlURI, InternetDomainName.from(hostFor.getHostName()).topPrivateDomain().toString());
            } catch (IllegalStateException e) {
                logger.warning("problem resolving topmost assigned domain, will try whois lookup on the plain hostname " + hostFor.getHostName() + " - " + e);
                addWhoisLink(crawlURI, hostFor.getHostName());
            }
        }
    }

    @Override // org.archive.modules.Processor
    protected void innerProcess(CrawlURI crawlURI) throws InterruptedException {
        throw new RuntimeException("this method shouldn't be called - should use innerProcessResult()");
    }

    static {
        $assertionsDisabled = !FetchWhois.class.desiredAssertionStatus();
        logger = Logger.getLogger(FetchWhois.class.getName());
        WHOIS_SERVER_REGEX = "(?i)^\\s*(?:whois server|ReferralServer|whois)[^:]*:.*?([a-zA-Z0-9-]+\\.[a-zA-Z0-9.:-]+)/*$";
    }
}
