package org.archive.modules.extractor;

import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.net.UURIFactory;

/* loaded from: input_file:org/archive/modules/extractor/ExtractorImpliedURI.class */
public class ExtractorImpliedURI extends Extractor {
    private static final long serialVersionUID = 3;
    private static Logger LOGGER = Logger.getLogger(ExtractorImpliedURI.class.getName());

    public Pattern getRegex() {
        return (Pattern) this.kp.get("regex");
    }

    public void setRegex(Pattern pattern) {
        this.kp.put("regex", pattern);
    }

    public String getFormat() {
        return (String) this.kp.get("format");
    }

    public void setFormat(String str) {
        this.kp.put("format", str);
    }

    public boolean getRemoveTriggerUris() {
        return ((Boolean) this.kp.get("removeTriggerUris")).booleanValue();
    }

    public void setRemoveTriggerUris(boolean z) {
        this.kp.put("removeTriggerUris", Boolean.valueOf(z));
    }

    public ExtractorImpliedURI() {
        setRegex(Pattern.compile("^(.*)$"));
        setFormat("");
        setRemoveTriggerUris(false);
    }

    @Override // org.archive.modules.Processor
    protected boolean shouldProcess(CrawlURI crawlURI) {
        return true;
    }

    @Override // org.archive.modules.extractor.Extractor
    public void extract(CrawlURI crawlURI) {
        ArrayList arrayList = new ArrayList(crawlURI.getOutLinks());
        int size = arrayList.size();
        for (int i = 0; i < size; i++) {
            CrawlURI crawlURI2 = (CrawlURI) arrayList.get(i);
            String extractImplied = extractImplied(crawlURI2.getUURI(), getRegex(), getFormat());
            if (extractImplied != null) {
                try {
                    addOutlink(crawlURI, UURIFactory.getInstance(extractImplied), LinkContext.INFERRED_MISC, Hop.INFERRED);
                    this.numberOfLinksExtracted.incrementAndGet();
                    if (getRemoveTriggerUris()) {
                        if (crawlURI.getOutLinks().remove(crawlURI2)) {
                            LOGGER.log(Level.FINE, crawlURI2.getURI() + " has been removed from " + crawlURI.getURI() + " outlinks list.");
                            this.numberOfLinksExtracted.decrementAndGet();
                        } else {
                            LOGGER.log(Level.FINE, "Failed to remove " + crawlURI2.getURI() + " from " + crawlURI.getURI() + " outlinks list.");
                        }
                    }
                } catch (URIException e) {
                    LOGGER.log(Level.FINE, "bad URI", e);
                }
            }
        }
    }

    protected static String extractImplied(CharSequence charSequence, Pattern pattern, String str) {
        if (pattern == null) {
            return null;
        }
        Matcher matcher = pattern.matcher(charSequence);
        if (matcher.matches()) {
            return matcher.replaceFirst(str);
        }
        return null;
    }
}
