package org.archive.modules.extractor;

import com.google.gson.JsonObject;
import com.google.gson.JsonParseException;
import com.google.gson.JsonStreamParser;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.reporting.CrawlerLoggerModule;
import org.archive.modules.CrawlURI;
import org.archive.net.UURIFactory;
import org.archive.util.ArchiveUtils;
import org.archive.util.MimetypeUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.Lifecycle;

/* loaded from: input_file:org/archive/modules/extractor/ExtractorYoutubeDL.class */
public class ExtractorYoutubeDL extends Extractor implements Lifecycle {
    private static Logger logger = Logger.getLogger(ExtractorYoutubeDL.class.getName());
    protected static final String YDL_CONTAINING_PAGE_DIGEST = "ydl-containing-page-digest";
    protected static final String YDL_CONTAINING_PAGE_TIMESTAMP = "ydl-containing-page-timestamp";
    protected static final String YDL_CONTAINING_PAGE_URI = "ydl-containing-page-uri";
    protected transient Logger ydlLogger = null;
    protected CrawlerLoggerModule crawlerLoggerModule;

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/archive/modules/extractor/ExtractorYoutubeDL$ProcessOutput.class */
    public static class ProcessOutput {
        public String stdout;
        public String stderr;

        protected ProcessOutput() {
        }
    }

    public CrawlerLoggerModule getCrawlerLoggerModule() {
        return this.crawlerLoggerModule;
    }

    @Autowired
    public void setCrawlerLoggerModule(CrawlerLoggerModule crawlerLoggerModule) {
        this.crawlerLoggerModule = crawlerLoggerModule;
    }

    public void start() {
        if (!this.isRunning) {
            getCrawlerLoggerModule().start();
            this.ydlLogger = getCrawlerLoggerModule().setupSimpleLog(getBeanName());
        }
        super.start();
    }

    protected String readToEnd(Reader reader) throws IOException {
        StringBuilder sb = new StringBuilder();
        char[] cArr = new char[4096];
        while (true) {
            int read = reader.read(cArr);
            if (read < 0) {
                return sb.toString();
            }
            sb.append(cArr, 0, read);
        }
    }

    protected void extract(CrawlURI crawlURI) {
        String findYdlAnnotation = findYdlAnnotation(crawlURI);
        if (findYdlAnnotation != null) {
            if (crawlURI.getFetchStatus() < 300 || crawlURI.getFetchStatus() >= 400) {
                logCapturedVideo(crawlURI, findYdlAnnotation);
                return;
            } else {
                doRedirectInheritance(crawlURI, findYdlAnnotation);
                return;
            }
        }
        List<JsonObject> runYoutubeDL = runYoutubeDL(crawlURI);
        if (runYoutubeDL == null || runYoutubeDL.isEmpty()) {
            return;
        }
        for (JsonObject jsonObject : runYoutubeDL) {
            if (jsonObject.get("url") != null) {
                addVideoOutlink(crawlURI, jsonObject, jsonObject.get("url").getAsString());
            }
        }
        String str = "youtube-dl:" + runYoutubeDL.size();
        crawlURI.getAnnotations().add(str);
        logContainingPage(crawlURI, str);
    }

    protected void addVideoOutlink(CrawlURI crawlURI, JsonObject jsonObject, String str) {
        try {
            CrawlURI createCrawlURI = crawlURI.createCrawlURI(UURIFactory.getInstance(crawlURI.getUURI(), str), LinkContext.EMBED_MISC, Hop.EMBED);
            createCrawlURI.getAnnotations().add(jsonObject.get("playlist_index").isJsonNull() ? "youtube-dl:1/1" : "youtube-dl:" + jsonObject.get("playlist_index") + "/" + jsonObject.get("n_entries"));
            createCrawlURI.getData().put(YDL_CONTAINING_PAGE_URI, crawlURI.toString());
            createCrawlURI.getData().put(YDL_CONTAINING_PAGE_TIMESTAMP, ArchiveUtils.get17DigitDate(crawlURI.getFetchBeginTime()));
            createCrawlURI.getData().put(YDL_CONTAINING_PAGE_DIGEST, crawlURI.getContentDigestSchemeString());
            crawlURI.getOutLinks().add(createCrawlURI);
        } catch (URIException e) {
            logUriError(e, crawlURI.getUURI(), str);
        }
    }

    protected String findYdlAnnotation(CrawlURI crawlURI) {
        for (String str : crawlURI.getAnnotations()) {
            if (str.startsWith("youtube-dl:")) {
                return str;
            }
        }
        return null;
    }

    protected void logCapturedVideo(CrawlURI crawlURI, String str) {
        String str2 = "-";
        if (crawlURI.isHttpTransaction()) {
            if (crawlURI.getContentLength() >= 0) {
                str2 = Long.toString(crawlURI.getContentLength());
            } else if (crawlURI.getContentSize() > 0) {
                str2 = Long.toString(crawlURI.getContentSize());
            }
        } else if (crawlURI.getContentSize() > 0) {
            str2 = Long.toString(crawlURI.getContentSize());
        }
        this.ydlLogger.info(crawlURI.getFetchStatus() + " " + str2 + " " + MimetypeUtils.truncate(crawlURI.getContentType()) + " " + crawlURI.getContentDigestSchemeString() + " " + ArchiveUtils.get17DigitDate(crawlURI.getFetchBeginTime()) + " " + crawlURI + " " + str + " " + crawlURI.getData().get(YDL_CONTAINING_PAGE_DIGEST) + " " + crawlURI.getData().get(YDL_CONTAINING_PAGE_TIMESTAMP) + " " + crawlURI.getData().get(YDL_CONTAINING_PAGE_URI) + " " + (crawlURI.containsDataKey("source") ? crawlURI.getSourceTag() : "-"));
    }

    protected void logContainingPage(CrawlURI crawlURI, String str) {
        this.ydlLogger.info("- - - - - - " + str + " " + crawlURI.getContentDigestSchemeString() + " " + ArchiveUtils.get17DigitDate(crawlURI.getFetchBeginTime()) + " " + crawlURI + " " + (crawlURI.containsDataKey("source") ? crawlURI.getSourceTag() : "-"));
    }

    protected void doRedirectInheritance(CrawlURI crawlURI, String str) {
        for (CrawlURI crawlURI2 : crawlURI.getOutLinks()) {
            if ("R".equals(crawlURI2.getLastHop())) {
                crawlURI2.getAnnotations().add(str);
                crawlURI2.getData().put(YDL_CONTAINING_PAGE_URI, crawlURI.getData().get(YDL_CONTAINING_PAGE_URI));
                crawlURI2.getData().put(YDL_CONTAINING_PAGE_TIMESTAMP, crawlURI.getData().get(YDL_CONTAINING_PAGE_TIMESTAMP));
                crawlURI2.getData().put(YDL_CONTAINING_PAGE_DIGEST, crawlURI.getData().get(YDL_CONTAINING_PAGE_DIGEST));
            }
        }
    }

    protected ProcessOutput readOutput(Process process) throws IOException {
        ProcessOutput processOutput = new ProcessOutput();
        final InputStreamReader inputStreamReader = new InputStreamReader(process.getErrorStream(), "UTF-8");
        InputStreamReader inputStreamReader2 = new InputStreamReader(process.getInputStream(), "UTF-8");
        ExecutorService newSingleThreadExecutor = Executors.newSingleThreadExecutor();
        Future submit = newSingleThreadExecutor.submit(new Callable<String>() { // from class: org.archive.modules.extractor.ExtractorYoutubeDL.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // java.util.concurrent.Callable
            public String call() throws IOException {
                return ExtractorYoutubeDL.this.readToEnd(inputStreamReader);
            }
        });
        processOutput.stdout = readToEnd(inputStreamReader2);
        try {
            try {
                processOutput.stderr = (String) submit.get();
                newSingleThreadExecutor.shutdown();
                return processOutput;
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e2) {
                if (e2.getCause() instanceof IOException) {
                    throw ((IOException) e2.getCause());
                }
                throw new IOException(e2);
            }
        } catch (Throwable th) {
            newSingleThreadExecutor.shutdown();
            throw th;
        }
    }

    protected List<JsonObject> runYoutubeDL(CrawlURI crawlURI) {
        ProcessBuilder processBuilder = new ProcessBuilder("youtube-dl", "--ignore-config", "--simulate", "--dump-json", "--format=best", crawlURI.toString());
        logger.fine("running " + processBuilder.command());
        try {
            Process start = processBuilder.start();
            try {
                ProcessOutput readOutput = readOutput(start);
                try {
                    if (start.waitFor() != 0) {
                        logger.fine("youtube-dl exited with status " + start.waitFor() + " " + processBuilder.command() + "\n=== stdout ===\n" + readOutput.stdout + "\n=== stderr ===\n" + readOutput.stderr);
                        return null;
                    }
                } catch (InterruptedException e) {
                    start.destroyForcibly();
                }
                ArrayList arrayList = new ArrayList();
                JsonStreamParser jsonStreamParser = new JsonStreamParser(readOutput.stdout);
                while (jsonStreamParser.hasNext()) {
                    try {
                        arrayList.add(jsonStreamParser.next());
                    } catch (JsonParseException e2) {
                        logger.log(Level.FINE, "problem parsing json from youtube-dl " + processBuilder.command() + "\n=== stdout ===\n" + readOutput.stdout + "\n=== stderr ===\n" + readOutput.stderr, e2);
                        return null;
                    }
                }
                return arrayList;
            } catch (IOException e3) {
                logger.log(Level.WARNING, "problem reading output from youtube-dl " + processBuilder.command(), (Throwable) e3);
                return null;
            }
        } catch (IOException e4) {
            logger.log(Level.WARNING, "youtube-dl failed " + processBuilder.command(), (Throwable) e4);
            return null;
        }
    }

    protected boolean shouldProcess(CrawlURI crawlURI) {
        if (findYdlAnnotation(crawlURI) != null) {
            return true;
        }
        return shouldExtract(crawlURI);
    }

    protected boolean shouldExtract(CrawlURI crawlURI) {
        if (crawlURI.getFetchStatus() != 200 || crawlURI.getContentLength() <= 0 || crawlURI.getContentLength() >= 200000000) {
            return false;
        }
        String lowerCase = crawlURI.getContentType().toLowerCase();
        return lowerCase.startsWith("text/html") || lowerCase.startsWith("application/xhtml") || lowerCase.startsWith("text/vnd.wap.wml") || lowerCase.startsWith("application/vnd.wap.wml") || lowerCase.startsWith("application/vnd.wap.xhtml");
    }
}
