package dk.netarkivet.viewerproxy.webinterface;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.SettingsFactory;
import dk.netarkivet.common.utils.batch.ArchiveBatchFilter;
import dk.netarkivet.common.utils.batch.FileBatchJob;
import dk.netarkivet.common.utils.batch.FileListJob;
import dk.netarkivet.common.utils.cdx.ArchiveExtractCDXJob;
import dk.netarkivet.common.utils.cdx.CDXRecord;
import dk.netarkivet.common.utils.hadoop.HadoopJob;
import dk.netarkivet.common.utils.hadoop.HadoopJobUtils;
import dk.netarkivet.common.utils.service.FileResolver;
import dk.netarkivet.viewerproxy.webinterface.hadoop.CrawlLogExtractionStrategy;
import dk.netarkivet.viewerproxy.webinterface.hadoop.MetadataCDXExtractionStrategy;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.net.URL;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.archive.url.UsableURIFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/viewerproxy/webinterface/Reporting.class */
public class Reporting {
    private static final Logger log = LoggerFactory.getLogger((Class<?>) Reporting.class);
    static final String archivefile_suffix = ".*\\.(w)?arc(\\.gz)?";
    static final String metadatafile_suffix = "-metadata-[0-9]+\\.(w)?arc(\\.gz)?";

    private Reporting() {
    }

    public static List<String> getFilesForJob(long j, String str) {
        return !Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND) ? getFilesForJobBatch(j, str) : getFilesForJobFileResolver(j, str);
    }

    private static List<String> getFilesForJobFileResolver(long j, String str) {
        FileResolver fileResolver = (FileResolver) SettingsFactory.getInstance(CommonSettings.FILE_RESOLVER_CLASS, new Object[0]);
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        log.debug("Looking for metadata files matching {}.", metadataFilePatternForJobId);
        List<Path> paths = fileResolver.getPaths(Pattern.compile(metadataFilePatternForJobId));
        log.debug("Initial found metadata files: {}", paths);
        String str2 = str + archivefile_suffix;
        log.debug("Looking for archive files matching {}.", str2);
        List<Path> paths2 = fileResolver.getPaths(Pattern.compile(str2));
        log.debug("Initial found archive files {}.", paths2);
        List<String> list = (List) Stream.concat(paths.stream(), paths2.stream()).filter(path -> {
            return fileResolver.getPath(path.getFileName().toString()) != null;
        }).map(path2 -> {
            return path2.getFileName().toString();
        }).distinct().sorted().collect(Collectors.toList());
        log.debug("After filtering by collection we have the following files: {}", list);
        return list;
    }

    private static List<String> getFilesForJobBatch(long j, String str) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        FileListJob fileListJob = new FileListJob();
        ArrayList arrayList = new ArrayList();
        arrayList.add(getMetadataFilePatternForJobId(j));
        arrayList.add(str + archivefile_suffix);
        fileListJob.processOnlyFilesMatching(arrayList);
        try {
            File createTempFile = File.createTempFile(j + "-files", ".txt", FileUtils.getTempDir());
            ArcRepositoryClientFactory.getViewerInstance().batch(fileListJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempFile);
            ArrayList arrayList2 = new ArrayList(FileUtils.readListFromFile(createTempFile));
            FileUtils.remove(createTempFile);
            HashSet hashSet = new HashSet();
            hashSet.addAll(arrayList2);
            ArrayList arrayList3 = new ArrayList();
            arrayList3.addAll(hashSet);
            Collections.sort(arrayList3);
            return arrayList3;
        } catch (IOException e) {
            throw new IOFailure("Could not create temporary file", e);
        }
    }

    public static List<CDXRecord> getMetadataCDXRecordsForJob(long j) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        return Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND) ? getRecordsUsingHadoop(j) : getRecordsUsingBatch(j);
    }

    private static File getCDXCacheFile(long j) {
        File file = new File(new File(Settings.get(CommonSettings.METADATA_CACHE)), "cdxcache");
        file.mkdirs();
        return new File(file, "" + j);
    }

    private static List<CDXRecord> getCachedCDXRecords(long j) {
        File cDXCacheFile = getCDXCacheFile(j);
        if (!cDXCacheFile.exists() || cDXCacheFile.length() == 0) {
            return null;
        }
        try {
            return HadoopJobUtils.getCDXRecordListFromCDXLines(org.apache.commons.io.FileUtils.readLines(cDXCacheFile));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /* JADX WARN: Failed to calculate best type for var: r12v1 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Failed to calculate best type for var: r13v0 ??
    java.lang.NullPointerException
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.RegisterArg.getSVar()" because the return value of "jadx.core.dex.nodes.InsnNode.getResult()" is null
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.collectRelatedVars(AbstractTypeConstraint.java:31)
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.<init>(AbstractTypeConstraint.java:19)
    	at jadx.core.dex.visitors.typeinference.TypeSearch$1.<init>(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeMoveConstraint(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeConstraint(TypeSearch.java:361)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.collectConstraints(TypeSearch.java:341)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:60)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 12, insn: 0x0107: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r12 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:33:0x0107 */
    /* JADX WARN: Not initialized variable reg: 13, insn: 0x010c: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r13 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:35:0x010c */
    /* JADX WARN: Type inference failed for: r12v1, types: [org.apache.hadoop.fs.FileSystem] */
    /* JADX WARN: Type inference failed for: r13v0, types: [java.lang.Throwable] */
    private static List<CDXRecord> getRecordsUsingHadoop(long j) {
        log.info("Getting records for jobid {}.", Long.valueOf(j));
        List<CDXRecord> cachedCDXRecords = getCachedCDXRecords(j);
        if (cachedCDXRecords != null) {
            log.info("Found {} cached records for jobid {}.", Integer.valueOf(cachedCDXRecords.size()), Long.valueOf(j));
            return cachedCDXRecords;
        }
        File cDXCacheFile = getCDXCacheFile(j);
        log.info("Cached records not found for jobid {} so fetching them to {} via hadoop.", Long.valueOf(j), cDXCacheFile.getAbsolutePath());
        Configuration conf = HadoopJobUtils.getConf();
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        try {
            try {
                FileSystem newInstance = FileSystem.newInstance(conf);
                Throwable th = null;
                HadoopJob hadoopJob = new HadoopJob(j, new MetadataCDXExtractionStrategy(j, newInstance));
                hadoopJob.processOnlyFilesMatching(metadataFilePatternForJobId);
                hadoopJob.prepareJobInputOutput(newInstance);
                hadoopJob.run();
                try {
                    org.apache.commons.io.FileUtils.writeLines(cDXCacheFile, HadoopJobUtils.collectOutputLines(newInstance, hadoopJob.getJobOutputDir()));
                    List<CDXRecord> cachedCDXRecords2 = getCachedCDXRecords(j);
                    if (newInstance != null) {
                        if (0 != 0) {
                            try {
                                newInstance.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            newInstance.close();
                        }
                    }
                    return cachedCDXRecords2;
                } catch (IOException e) {
                    log.error("Failed getting CDX lines output for Hadoop job with ID: {}", Long.valueOf(j));
                    throw new IOFailure("Failed getting " + hadoopJob.getJobType() + " job results");
                }
            } finally {
            }
        } catch (IOException e2) {
            log.error("Error instantiating Hadoop filesystem for job {}.", Long.valueOf(j), e2);
            throw new IOFailure("Failed instantiating Hadoop filesystem.");
        }
    }

    private static List<CDXRecord> getRecordsUsingBatch(long j) {
        ArchiveExtractCDXJob archiveExtractCDXJob = new ArchiveExtractCDXJob(false) { // from class: dk.netarkivet.viewerproxy.webinterface.Reporting.1
            @Override // dk.netarkivet.common.utils.cdx.ArchiveExtractCDXJob, dk.netarkivet.common.utils.archive.ArchiveBatchJob
            public ArchiveBatchFilter getFilter() {
                return ArchiveBatchFilter.EXCLUDE_NON_WARCINFO_RECORDS;
            }
        };
        archiveExtractCDXJob.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        try {
            File createTempFile = File.createTempFile(j + "-reports", FileUtils.CDX_EXTENSION, FileUtils.getTempDir());
            ArcRepositoryClientFactory.getViewerInstance().batch(archiveExtractCDXJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempFile);
            BufferedReader bufferedReader = null;
            try {
                try {
                    bufferedReader = new BufferedReader(new FileReader(createTempFile));
                    ArrayList arrayList = new ArrayList();
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        System.out.println(readLine);
                        arrayList.add(new CDXRecord(readLine.split("\\s+")));
                    }
                    IOUtils.closeQuietly((Reader) bufferedReader);
                    FileUtils.remove(createTempFile);
                    return arrayList;
                } catch (IOException e) {
                    throw new IOFailure("Unable to read results from file '" + createTempFile + UsableURIFactory.SQUOT, e);
                }
            } catch (Throwable th) {
                IOUtils.closeQuietly((Reader) bufferedReader);
                FileUtils.remove(createTempFile);
                throw th;
            }
        } catch (IOException e2) {
            throw new IOFailure("Could not create temporary file", e2);
        }
    }

    public static File getCrawlLogForDomainInJob(String str, long j) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        ArgumentNotValid.checkNotNullOrEmpty(str, "String domain");
        HarvestedUrlsForDomainBatchJob harvestedUrlsForDomainBatchJob = new HarvestedUrlsForDomainBatchJob(str);
        harvestedUrlsForDomainBatchJob.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        return createSortedResultFile(harvestedUrlsForDomainBatchJob);
    }

    private static File createTempResultFile(String str) {
        try {
            File createTempFile = File.createTempFile("temp", str + ".txt", FileUtils.getTempDir());
            createTempFile.deleteOnExit();
            return createTempFile;
        } catch (IOException e) {
            throw new IOFailure("Unable to create temporary file", e);
        }
    }

    private static File createSortedResultFile(List<String> list) {
        String uuid = UUID.randomUUID().toString();
        File createTempResultFile = createTempResultFile(uuid);
        File createTempResultFile2 = createTempResultFile(uuid + "-sorted");
        FileUtils.writeCollectionToFile(createTempResultFile, list);
        FileUtils.sortCrawlLogOnTimestamp(createTempResultFile, createTempResultFile2);
        FileUtils.remove(createTempResultFile);
        return createTempResultFile2;
    }

    private static File createSortedResultFile(FileBatchJob fileBatchJob) {
        String uuid = UUID.randomUUID().toString();
        File createTempResultFile = createTempResultFile(uuid);
        File createTempResultFile2 = createTempResultFile(uuid);
        ArcRepositoryClientFactory.getViewerInstance().batch(fileBatchJob, Settings.get(CommonSettings.USE_REPLICA_ID), new String[0]).getResultFile().copyTo(createTempResultFile);
        FileUtils.sortCrawlLogOnTimestamp(createTempResultFile, createTempResultFile2);
        FileUtils.remove(createTempResultFile);
        return createTempResultFile2;
    }

    public static File getCrawlLoglinesMatchingRegexp(long j, String str) {
        ArgumentNotValid.checkPositive(j, Constants.JOBID_PARAM);
        ArgumentNotValid.checkNotNullOrEmpty(str, "String regexp");
        if (Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND)) {
            return getCrawlLogLinesUsingHadoop(j, str);
        }
        CrawlLogLinesMatchingRegexp crawlLogLinesMatchingRegexp = new CrawlLogLinesMatchingRegexp(str);
        crawlLogLinesMatchingRegexp.processOnlyFilesMatching(getMetadataFilePatternForJobId(j));
        return createSortedResultFile(crawlLogLinesMatchingRegexp);
    }

    private static File getCrawlLogCache(long j) {
        File file = new File(new File(Settings.get(CommonSettings.METADATA_CACHE)), "crawllog_cache");
        file.mkdirs();
        return new File(file, "" + j);
    }

    private static File getCrawlLogLinesUsingHadoop(long j, String str) {
        return createSortedResultFile(getMatchingStringsFromFile(getCrawlLogFromCacheOrHdfs(j), str));
    }

    public static File getCrawlLogLinesMatchingDomain(long j, String str) {
        return createSortedResultFile(getMatchingDomainStringsFromFile(getCrawlLogFromCacheOrHdfs(j), str));
    }

    private static List<String> getMatchingDomainStringsFromFile(File file, String str) {
        try {
            return (List) org.apache.commons.io.FileUtils.readLines(file).stream().filter(str2 -> {
                return lineMatchesDomain(str2, str);
            }).collect(Collectors.toList());
        } catch (IOException e) {
            e.printStackTrace();
            return new ArrayList();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean lineMatchesDomain(String str, String str2) {
        try {
            URL url = new URL(str.split("\\s+")[10]);
            if (!url.getHost().equals(str2)) {
                if (!url.getHost().endsWith("." + str2)) {
                    return false;
                }
            }
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    private static List<String> getMatchingStringsFromFile(File file, String str) {
        Pattern compile = Pattern.compile(str);
        try {
            return (List) org.apache.commons.io.FileUtils.readLines(file).stream().filter(str2 -> {
                return compile.matcher(str2).matches();
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private static File getCrawlLogFromCacheOrHdfs(long j) {
        File crawlLogCache = getCrawlLogCache(j);
        if (crawlLogCache.exists() && crawlLogCache.length() == 0) {
            log.info("Overwriting empty cache file {}.", crawlLogCache.getAbsolutePath());
        }
        if (crawlLogCache.length() == 0 || !crawlLogCache.exists()) {
            try {
                org.apache.commons.io.FileUtils.copyFile(getCrawlLogUsingHadoop(j), crawlLogCache);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
        return crawlLogCache;
    }

    private static File getCrawlLogUsingHadoop(long j) {
        String metadataFilePatternForJobId = getMetadataFilePatternForJobId(j);
        Configuration conf = HadoopJobUtils.getConf();
        conf.setPattern("regex", Pattern.compile(dk.netarkivet.common.Constants.ALL_PATTERN));
        try {
            FileSystem newInstance = FileSystem.newInstance(conf);
            Throwable th = null;
            try {
                HadoopJob hadoopJob = new HadoopJob(j, new CrawlLogExtractionStrategy(j, newInstance));
                hadoopJob.processOnlyFilesMatching(metadataFilePatternForJobId);
                hadoopJob.prepareJobInputOutput(newInstance);
                hadoopJob.run();
                try {
                    File createSortedResultFile = createSortedResultFile(HadoopJobUtils.collectOutputLines(newInstance, hadoopJob.getJobOutputDir()));
                    if (newInstance != null) {
                        if (0 != 0) {
                            try {
                                newInstance.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            newInstance.close();
                        }
                    }
                    return createSortedResultFile;
                } catch (IOException e) {
                    log.error("Failed getting crawl log lines output for job with ID: {}", Long.valueOf(j));
                    throw new IOFailure("Failed getting " + hadoopJob.getJobType() + " job results");
                }
            } finally {
            }
        } catch (IOException e2) {
            log.error("Error instantiating Hadoop filesystem for job {}.", Long.valueOf(j), e2);
            throw new IOFailure("Failed instantiating Hadoop filesystem.");
        }
    }

    private static String getMetadataFilePatternForJobId(long j) {
        return "(.*-)?" + j + "(-.*)?" + metadatafile_suffix;
    }
}
