package dk.netarkivet.viewerproxy.webinterface;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.SettingsFactory;
import dk.netarkivet.common.utils.batch.ArchiveBatchFilter;
import dk.netarkivet.common.utils.batch.FileBatchJob;
import dk.netarkivet.common.utils.batch.FileListJob;
import dk.netarkivet.common.utils.cdx.ArchiveExtractCDXJob;
import dk.netarkivet.common.utils.cdx.CDXRecord;
import dk.netarkivet.common.utils.hadoop.HadoopJob;
import dk.netarkivet.common.utils.hadoop.HadoopJobUtils;
import dk.netarkivet.common.utils.service.FileResolver;
import dk.netarkivet.viewerproxy.webinterface.hadoop.CrawlLogExtractionStrategy;
import dk.netarkivet.viewerproxy.webinterface.hadoop.MetadataCDXExtractionStrategy;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.validator.routines.UrlValidator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/viewerproxy/webinterface/Reporting.class */
public class Reporting {
    private static final Logger log = LoggerFactory.getLogger(Reporting.class);
    static final String archivefile_suffix = ".*\\.(w)?arc(\\.gz)?";
    static final String metadatafile_suffix = "-metadata-[0-9]+\\.(w)?arc(\\.gz)?";

    private Reporting() {
    }

    public static List<String> getFilesForJob(long j, String str) {
        return !Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND) ? getFilesForJobBatch(j, str) : getFilesForJobFileResolver(j, str);
    }

    private static List<String> getFilesForJobFileResolver(long j, String str) {
        FileResolver fileResolver = (FileResolver) SettingsFactory.getInstance(CommonSettings.FILE_RESOLVER_CLASS, new Object[0]);
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        log.debug("Looking for metadata files matching {}.", metadataFilePatternForJobId);
        List paths = fileResolver.getPaths(Pattern.compile(metadataFilePatternForJobId));
        log.debug("Initial found metadata files: {}", paths);
        String str2 = str + archivefile_suffix;
        log.debug("Looking for archive files matching {}.", str2);
        List paths2 = fileResolver.getPaths(Pattern.compile(str2));
        log.debug("Initial found archive files {}.", paths2);
        List<String> list = (List) Stream.concat(paths.stream(), paths2.stream()).filter(path -> {
            return fileResolver.getPath(path.getFileName().toString()) != null;
        }).map(path2 -> {
            return path2.getFileName().toString();
        }).distinct().sorted().collect(Collectors.toList());
        log.debug("After filtering by collection we have the following files: {}", list);
        return list;
    }

    private static List<String> getFilesForJobBatch(long j, String str) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        FileListJob fileListJob = new FileListJob();
        ArrayList arrayList = new ArrayList();
        arrayList.add(getMetadataFilePatternForJobId(j));
        arrayList.add(str + archivefile_suffix);
        fileListJob.processOnlyFilesMatching(arrayList);
        try {
            File createTempFile = File.createTempFile(j + "-files", ".txt", FileUtils.getTempDir());
            ArcRepositoryClientFactory.getViewerInstance().batch(fileListJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempFile);
            ArrayList arrayList2 = new ArrayList(FileUtils.readListFromFile(createTempFile));
            FileUtils.remove(createTempFile);
            HashSet hashSet = new HashSet();
            hashSet.addAll(arrayList2);
            ArrayList arrayList3 = new ArrayList();
            arrayList3.addAll(hashSet);
            Collections.sort(arrayList3);
            return arrayList3;
        } catch (IOException e) {
            throw new IOFailure("Could not create temporary file", e);
        }
    }

    public static List<CDXRecord> getMetadataCDXRecordsForJob(long j) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        return Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND) ? getRecordsUsingHadoop(j) : getRecordsUsingBatch(j);
    }

    private static File getCDXCacheFile(long j) {
        File file = new File(new File(Settings.get(CommonSettings.METADATA_CACHE)), "cdxcache");
        file.mkdirs();
        return new File(file, "" + j);
    }

    private static List<CDXRecord> getCachedCDXRecords(long j) {
        File cDXCacheFile = getCDXCacheFile(j);
        if (!cDXCacheFile.exists() || cDXCacheFile.length() == 0) {
            return null;
        }
        try {
            return HadoopJobUtils.getCDXRecordListFromCDXLines(org.apache.commons.io.FileUtils.readLines(cDXCacheFile));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX WARN: Failed to calculate best type for var: r12v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r12v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r13v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r13v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.RegisterArg.getSVar()" because the return value of "jadx.core.dex.nodes.InsnNode.getResult()" is null
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.collectRelatedVars(AbstractTypeConstraint.java:31)
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.<init>(AbstractTypeConstraint.java:19)
    	at jadx.core.dex.visitors.typeinference.TypeSearch$1.<init>(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeMoveConstraint(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeConstraint(TypeSearch.java:361)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.collectConstraints(TypeSearch.java:341)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:60)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 12, insn: 0x0156: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r12 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:55:0x0156 */
    /* JADX WARN: Not initialized variable reg: 13, insn: 0x015b: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r13 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:57:0x015b */
    /* JADX WARN: Type inference failed for: r12v1, types: [org.apache.hadoop.fs.FileSystem] */
    /* JADX WARN: Type inference failed for: r13v0, types: [java.lang.Throwable] */
    private static List<CDXRecord> getRecordsUsingHadoop(long j) {
        log.info("Getting records for jobid {}.", Long.valueOf(j));
        List<CDXRecord> cachedCDXRecords = getCachedCDXRecords(j);
        if (cachedCDXRecords != null) {
            log.info("Found {} cached records for jobid {}.", Integer.valueOf(cachedCDXRecords.size()), Long.valueOf(j));
            return cachedCDXRecords;
        }
        File cDXCacheFile = getCDXCacheFile(j);
        log.info("Cached records not found for jobid {} so fetching them to {} via hadoop.", Long.valueOf(j), cDXCacheFile.getAbsolutePath());
        Configuration conf = HadoopJobUtils.getConf();
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        try {
            try {
                FileSystem newInstance = FileSystem.newInstance(conf);
                Throwable th = null;
                HadoopJob hadoopJob = new HadoopJob(j, new MetadataCDXExtractionStrategy(j, newInstance));
                hadoopJob.processOnlyFilesMatching(metadataFilePatternForJobId);
                hadoopJob.prepareJobInputOutput(newInstance);
                hadoopJob.run();
                log.info("Collecting hadoop output from {} to {}", hadoopJob.getJobOutputDir(), cDXCacheFile.getAbsolutePath());
                FileOutputStream fileOutputStream = new FileOutputStream(cDXCacheFile);
                Throwable th2 = null;
                try {
                    HadoopJobUtils.collectOutputLines(newInstance, hadoopJob.getJobOutputDir(), fileOutputStream);
                    if (fileOutputStream != null) {
                        if (0 != 0) {
                            try {
                                fileOutputStream.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            fileOutputStream.close();
                        }
                    }
                    log.info("Collected {} bytes output to {}", Long.valueOf(cDXCacheFile.length()), cDXCacheFile.getAbsolutePath());
                    List<CDXRecord> cachedCDXRecords2 = getCachedCDXRecords(j);
                    if (newInstance != null) {
                        if (0 != 0) {
                            try {
                                newInstance.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            newInstance.close();
                        }
                    }
                    return cachedCDXRecords2;
                } catch (Throwable th5) {
                    if (fileOutputStream != null) {
                        if (0 != 0) {
                            try {
                                fileOutputStream.close();
                            } catch (Throwable th6) {
                                th2.addSuppressed(th6);
                            }
                        } else {
                            fileOutputStream.close();
                        }
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (IOException e) {
            log.error("Error instantiating Hadoop filesystem for job {}.", Long.valueOf(j), e);
            throw new IOFailure("Failed instantiating Hadoop filesystem.");
        }
    }

    private static List<CDXRecord> getRecordsUsingBatch(long j) {
        ArchiveExtractCDXJob archiveExtractCDXJob = new ArchiveExtractCDXJob(false) { // from class: dk.netarkivet.viewerproxy.webinterface.Reporting.1
            public ArchiveBatchFilter getFilter() {
                return ArchiveBatchFilter.EXCLUDE_NON_WARCINFO_RECORDS;
            }
        };
        archiveExtractCDXJob.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        try {
            File createTempFile = File.createTempFile(j + "-reports", ".cdx", FileUtils.getTempDir());
            ArcRepositoryClientFactory.getViewerInstance().batch(archiveExtractCDXJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempFile);
            BufferedReader bufferedReader = null;
            try {
                try {
                    bufferedReader = new BufferedReader(new FileReader(createTempFile));
                    ArrayList arrayList = new ArrayList();
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        System.out.println(readLine);
                        arrayList.add(new CDXRecord(readLine.split("\\s+")));
                    }
                    IOUtils.closeQuietly(bufferedReader);
                    FileUtils.remove(createTempFile);
                    return arrayList;
                } catch (IOException e) {
                    throw new IOFailure("Unable to read results from file '" + createTempFile + "'", e);
                }
            } catch (Throwable th) {
                IOUtils.closeQuietly(bufferedReader);
                FileUtils.remove(createTempFile);
                throw th;
            }
        } catch (IOException e2) {
            throw new IOFailure("Could not create temporary file", e2);
        }
    }

    public static File getCrawlLogForDomainInJob(String str, long j) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        ArgumentNotValid.checkNotNullOrEmpty(str, "String domain");
        HarvestedUrlsForDomainBatchJob harvestedUrlsForDomainBatchJob = new HarvestedUrlsForDomainBatchJob(str);
        harvestedUrlsForDomainBatchJob.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        return createSortedResultFile((FileBatchJob) harvestedUrlsForDomainBatchJob);
    }

    private static File createTempResultFile(String str) {
        try {
            File createTempFile = File.createTempFile("temp", str + ".txt", FileUtils.getTempDir());
            createTempFile.deleteOnExit();
            return createTempFile;
        } catch (IOException e) {
            throw new IOFailure("Unable to create temporary file", e);
        }
    }

    public static File getCrawlLoglinesMatchingRegexp(long j, String str) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        ArgumentNotValid.checkNotNullOrEmpty(str, "String regexp");
        if (Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND)) {
            return getCrawlLogLinesUsingHadoop(j, str);
        }
        CrawlLogLinesMatchingRegexp crawlLogLinesMatchingRegexp = new CrawlLogLinesMatchingRegexp(str);
        crawlLogLinesMatchingRegexp.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        return createSortedResultFile((FileBatchJob) crawlLogLinesMatchingRegexp);
    }

    private static File getCrawlLogCache(long j) {
        File file = new File(new File(Settings.get(CommonSettings.METADATA_CACHE)), "crawllog_cache");
        file.mkdirs();
        return new File(file, "" + j);
    }

    private static File getCrawlLogLinesUsingHadoop(long j, String str) {
        File crawlLogFromCacheOrHdfs = getCrawlLogFromCacheOrHdfs(j);
        Pattern compile = Pattern.compile(str);
        log.info("Filtering cache file {} with regexp {}", crawlLogFromCacheOrHdfs.getAbsolutePath(), str);
        return getFilteredFile(crawlLogFromCacheOrHdfs, str2 -> {
            return compile.matcher(str2).matches();
        });
    }

    public static File getCrawlLogLinesMatchingDomain(long j, String str) {
        log.info("Finding matching crawl log lines for {} in job {}", str, Long.valueOf(j));
        File crawlLogFromCacheOrHdfs = getCrawlLogFromCacheOrHdfs(j);
        log.info("Finding matching crawl log lines for {} in job {} in file {}", new Object[]{str, Long.valueOf(j), crawlLogFromCacheOrHdfs.getAbsoluteFile()});
        return getFilteredFile(crawlLogFromCacheOrHdfs, str2 -> {
            return lineMatchesDomain(str2, str);
        });
    }

    /* JADX WARN: Finally extract failed */
    private static File getFilteredFile(File file, Predicate<String> predicate) {
        String uuid = UUID.randomUUID().toString();
        File createTempResultFile = createTempResultFile(uuid);
        log.info("Unsorted results in {}." + createTempResultFile.getAbsolutePath());
        File createTempResultFile2 = createTempResultFile(uuid + "-sorted");
        log.info("Sorted results in {}.", createTempResultFile2.getAbsolutePath());
        try {
            BufferedWriter newBufferedWriter = Files.newBufferedWriter(createTempResultFile.toPath(), new OpenOption[0]);
            Throwable th = null;
            try {
                try {
                    BufferedReader newBufferedReader = Files.newBufferedReader(file.toPath());
                    Throwable th2 = null;
                    while (true) {
                        try {
                            try {
                                String readLine = newBufferedReader.readLine();
                                if (readLine == null) {
                                    break;
                                }
                                Optional findAny = Stream.of(readLine).filter(predicate).findAny();
                                if (findAny.isPresent()) {
                                    newBufferedWriter.write((String) findAny.get());
                                    newBufferedWriter.newLine();
                                }
                            } finally {
                            }
                        } catch (Throwable th3) {
                            if (newBufferedReader != null) {
                                if (th2 != null) {
                                    try {
                                        newBufferedReader.close();
                                    } catch (Throwable th4) {
                                        th2.addSuppressed(th4);
                                    }
                                } else {
                                    newBufferedReader.close();
                                }
                            }
                            throw th3;
                        }
                    }
                    if (newBufferedReader != null) {
                        if (0 != 0) {
                            try {
                                newBufferedReader.close();
                            } catch (Throwable th5) {
                                th2.addSuppressed(th5);
                            }
                        } else {
                            newBufferedReader.close();
                        }
                    }
                    if (newBufferedWriter != null) {
                        if (0 != 0) {
                            try {
                                newBufferedWriter.close();
                            } catch (Throwable th6) {
                                th.addSuppressed(th6);
                            }
                        } else {
                            newBufferedWriter.close();
                        }
                    }
                    FileUtils.sortCrawlLogOnTimestamp(createTempResultFile, createTempResultFile2);
                    FileUtils.remove(createTempResultFile);
                    return createTempResultFile2;
                } catch (Throwable th7) {
                    if (newBufferedWriter != null) {
                        if (0 != 0) {
                            try {
                                newBufferedWriter.close();
                            } catch (Throwable th8) {
                                th.addSuppressed(th8);
                            }
                        } else {
                            newBufferedWriter.close();
                        }
                    }
                    throw th7;
                }
            } catch (IOException e) {
                throw new RuntimeException("Error reading file " + file.getAbsolutePath(), e);
            }
        } catch (IOException e2) {
            throw new RuntimeException("Error writing to file " + createTempResultFile.getAbsolutePath());
        }
    }

    private static List<String> getMatchingStringsFromFile(File file, String str) {
        Pattern compile = Pattern.compile(str);
        try {
            return (List) org.apache.commons.io.FileUtils.readLines(file).stream().filter(str2 -> {
                return compile.matcher(str2).matches();
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static File createSortedResultFile(List<String> list) {
        String uuid = UUID.randomUUID().toString();
        File createTempResultFile = createTempResultFile(uuid);
        File createTempResultFile2 = createTempResultFile(uuid + "-sorted");
        FileUtils.writeCollectionToFile(createTempResultFile, list);
        FileUtils.sortCrawlLogOnTimestamp(createTempResultFile, createTempResultFile2);
        FileUtils.remove(createTempResultFile);
        return createTempResultFile2;
    }

    private static File createSortedResultFile(FileBatchJob fileBatchJob) {
        String uuid = UUID.randomUUID().toString();
        File createTempResultFile = createTempResultFile(uuid);
        File createTempResultFile2 = createTempResultFile(uuid);
        ArcRepositoryClientFactory.getViewerInstance().batch(fileBatchJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempResultFile);
        FileUtils.sortCrawlLogOnTimestamp(createTempResultFile, createTempResultFile2);
        FileUtils.remove(createTempResultFile);
        return createTempResultFile2;
    }

    private static List<String> getMatchingDomainStringsFromFile(File file, String str) {
        try {
            return (List) org.apache.commons.io.FileUtils.readLines(file).stream().filter(str2 -> {
                return lineMatchesDomain(str2, str);
            }).collect(Collectors.toList());
        } catch (IOException e) {
            e.printStackTrace();
            return new ArrayList();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean lineMatchesDomain(String str, String str2) {
        String str3 = str.split("\\s+")[10];
        UrlValidator urlValidator = new UrlValidator(new String[]{"http", "https"});
        try {
            URL url = null;
            if (urlValidator.isValid(str3)) {
                url = new URL(str3);
            } else if (urlValidator.isValid("http://" + str3)) {
                url = new URL("http://" + str3);
            }
            if (url.getHost().equals(str2) || url.getHost().endsWith("." + str2)) {
                log.debug("Domain {} found in crawlline {}", str2, str);
                return true;
            }
            log.debug("Domain {} not found in crawlline {}", str2, str);
            return false;
        } catch (Exception e) {
            log.warn("Exception finding seed domain. No domain to match found in element {} of '{}' which is '{}'", new Object[]{10, str, str3, e});
            return false;
        }
    }

    private static File getCrawlLogFromCacheOrHdfs(long j) {
        File crawlLogCache = getCrawlLogCache(j);
        if (crawlLogCache.exists() && crawlLogCache.length() == 0) {
            log.info("Overwriting empty cache file {}.", crawlLogCache.getAbsolutePath());
        }
        if (crawlLogCache.length() == 0 || !crawlLogCache.exists()) {
            File crawlLogUsingHadoop = getCrawlLogUsingHadoop(j);
            try {
                log.info("Copying {} to {}", crawlLogUsingHadoop.getAbsolutePath(), crawlLogCache.getAbsolutePath());
                org.apache.commons.io.FileUtils.copyFile(crawlLogUsingHadoop, crawlLogCache);
                if (crawlLogUsingHadoop.delete()) {
                    log.info("Deleted {}", crawlLogUsingHadoop.getAbsolutePath());
                } else {
                    log.warn("Could not delete {}", crawlLogUsingHadoop.getAbsolutePath());
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        return crawlLogCache;
    }

    /* JADX WARN: Failed to calculate best type for var: r10v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r10v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r11v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r11v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.RegisterArg.getSVar()" because the return value of "jadx.core.dex.nodes.InsnNode.getResult()" is null
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.collectRelatedVars(AbstractTypeConstraint.java:31)
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.<init>(AbstractTypeConstraint.java:19)
    	at jadx.core.dex.visitors.typeinference.TypeSearch$1.<init>(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeMoveConstraint(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeConstraint(TypeSearch.java:361)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.collectConstraints(TypeSearch.java:341)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:60)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 10, insn: 0x0185: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r10 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:56:0x0185 */
    /* JADX WARN: Not initialized variable reg: 11, insn: 0x018a: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r11 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:58:0x018a */
    /* JADX WARN: Type inference failed for: r10v1, types: [org.apache.hadoop.fs.FileSystem] */
    /* JADX WARN: Type inference failed for: r11v0, types: [java.lang.Throwable] */
    private static File getCrawlLogUsingHadoop(long j) {
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        Configuration conf = HadoopJobUtils.getConf();
        conf.setPattern("regex", Pattern.compile(".*"));
        try {
            try {
                FileSystem newInstance = FileSystem.newInstance(conf);
                Throwable th = null;
                HadoopJob hadoopJob = new HadoopJob(j, new CrawlLogExtractionStrategy(j, newInstance));
                hadoopJob.processOnlyFilesMatching(metadataFilePatternForJobId);
                hadoopJob.prepareJobInputOutput(newInstance);
                hadoopJob.run();
                File createTempFile = File.createTempFile("unsorted_crawl", "log");
                File createTempFile2 = File.createTempFile("sorted_crawl", "log");
                log.info("Collecting output from {} to {}", hadoopJob.getJobOutputDir(), createTempFile.getAbsolutePath());
                FileOutputStream fileOutputStream = new FileOutputStream(createTempFile);
                Throwable th2 = null;
                try {
                    try {
                        HadoopJobUtils.collectOutputLines(newInstance, hadoopJob.getJobOutputDir(), fileOutputStream);
                        if (fileOutputStream != null) {
                            if (0 != 0) {
                                try {
                                    fileOutputStream.close();
                                } catch (Throwable th3) {
                                    th2.addSuppressed(th3);
                                }
                            } else {
                                fileOutputStream.close();
                            }
                        }
                        log.info("Collected {} bytes to {}", Long.valueOf(createTempFile.length()), createTempFile.getAbsolutePath());
                        log.info("Sorting {} to {}", createTempFile.getAbsolutePath(), createTempFile2.getAbsolutePath());
                        FileUtils.sortCrawlLogOnTimestamp(createTempFile, createTempFile2);
                        log.info("Collected {} bytes to {}", Long.valueOf(createTempFile2.length()), createTempFile2.getAbsolutePath());
                        if (createTempFile.delete()) {
                            log.info("Deleted {}", createTempFile.getAbsolutePath());
                        } else {
                            log.warn("Could not delete {}", createTempFile.getAbsolutePath());
                        }
                        if (newInstance != null) {
                            if (0 != 0) {
                                try {
                                    newInstance.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                newInstance.close();
                            }
                        }
                        return createTempFile2;
                    } finally {
                    }
                } catch (Throwable th5) {
                    if (fileOutputStream != null) {
                        if (th2 != null) {
                            try {
                                fileOutputStream.close();
                            } catch (Throwable th6) {
                                th2.addSuppressed(th6);
                            }
                        } else {
                            fileOutputStream.close();
                        }
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (IOException e) {
            log.error("Error instantiating Hadoop filesystem for job {}.", Long.valueOf(j), e);
            throw new IOFailure("Failed instantiating Hadoop filesystem.");
        }
    }

    private static String getMetadataFilePatternForJobId(long j) {
        return "(.*-)?" + j + "(-.*)?" + metadatafile_suffix;
    }
}
