package org.archive.modules.extractor;

import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.exception.NestableRuntimeException;
import org.archive.modules.CrawlURI;
import org.archive.net.UURI;
import org.archive.net.UURIFactory;
import org.archive.util.TextUtils;

/* loaded from: input_file:org/archive/modules/extractor/KnowledgableExtractorJS.class */
public class KnowledgableExtractorJS extends ExtractorJS {
    private static Logger LOGGER = Logger.getLogger(KnowledgableExtractorJS.class.getName());

    /* loaded from: input_file:org/archive/modules/extractor/KnowledgableExtractorJS$CustomizedCrawlURIFacade.class */
    protected static class CustomizedCrawlURIFacade extends CrawlURI {
        private static final long serialVersionUID = 1;
        protected CrawlURI wrapped;
        protected UURI baseURI;

        public CustomizedCrawlURIFacade(CrawlURI crawlURI, UURI uuri) {
            super(crawlURI.getUURI(), crawlURI.getPathFromSeed(), crawlURI.getVia(), crawlURI.getViaContext());
            this.wrapped = crawlURI;
            this.baseURI = uuri;
        }

        public UURI getBaseURI() {
            return this.baseURI;
        }

        public CrawlURI createCrawlURI(UURI uuri, LinkContext linkContext, Hop hop) throws URIException {
            return this.wrapped.createCrawlURI(uuri, linkContext, hop);
        }

        public Collection<CrawlURI> getOutLinks() {
            return this.wrapped.getOutLinks();
        }

        public void incrementDiscardedOutLinks() {
            this.wrapped.incrementDiscardedOutLinks();
        }
    }

    public long considerStrings(Extractor extractor, CrawlURI crawlURI, CharSequence charSequence, boolean z) {
        CrawlURI crawlURI2 = crawlURI;
        Matcher matcher = TextUtils.getMatcher("jQuery\\.extend\\(Drupal\\.settings,[^'\"]*['\"]basePath['\"]:[^'\"]*['\"]([^'\"]+)['\"]", charSequence);
        if (matcher.find()) {
            String group = matcher.group(1);
            try {
                group = StringEscapeUtils.unescapeJavaScript(group);
            } catch (NestableRuntimeException e) {
                LOGGER.log(Level.WARNING, "problem unescaping purported drupal basePath '" + group + "'", e);
            }
            try {
                crawlURI2 = new CustomizedCrawlURIFacade(crawlURI, UURIFactory.getInstance(crawlURI.getUURI(), group));
            } catch (URIException e2) {
                LOGGER.log(Level.WARNING, "problem creating UURI from drupal basePath '" + group + "'", e2);
            }
        }
        TextUtils.recycleMatcher(matcher);
        Matcher matcher2 = TextUtils.getMatcher("new[\\s]+YT\\.Player\\(['\"][^'\"]+['\"],[\\s]+\\{[\\n\\s\\w:'\",]+videoId:[\\s]+['\"]([\\w-]+)['\"],", charSequence);
        if (matcher2.find()) {
            String str = "https://www.youtube.com/watch?v=" + matcher2.group(1);
            try {
                addRelativeToBase(crawlURI, extractor.getExtractorParameters().getMaxOutlinks(), str, LinkContext.INFERRED_MISC, Hop.INFERRED);
            } catch (URIException e3) {
                throw new IllegalStateException(str, e3);
            }
        }
        TextUtils.recycleMatcher(matcher2);
        return super.considerStrings(extractor, crawlURI2, charSequence, z);
    }
}
