package dk.netarkivet.wayback.indexer;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
import dk.netarkivet.common.distribute.arcrepository.PreservationArcRepositoryClient;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.SettingsFactory;
import dk.netarkivet.common.utils.arc.ARCUtils;
import dk.netarkivet.common.utils.batch.FileBatchJob;
import dk.netarkivet.common.utils.hadoop.HadoopJob;
import dk.netarkivet.common.utils.hadoop.HadoopJobUtils;
import dk.netarkivet.common.utils.service.FileResolver;
import dk.netarkivet.common.utils.service.SimpleFileResolver;
import dk.netarkivet.common.utils.warc.WARCUtils;
import dk.netarkivet.wayback.WaybackSettings;
import dk.netarkivet.wayback.batch.DeduplicationCDXExtractionBatchJob;
import dk.netarkivet.wayback.batch.WaybackCDXExtractionARCBatchJob;
import dk.netarkivet.wayback.batch.WaybackCDXExtractionWARCBatchJob;
import dk.netarkivet.wayback.hadoop.CDXStrategy;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.Date;
import java.util.Iterator;
import java.util.UUID;
import javax.persistence.Entity;
import javax.persistence.Id;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.security.krb5.KrbException;

@Entity
/* loaded from: input_file:dk/netarkivet/wayback/indexer/ArchiveFile.class */
public class ArchiveFile {
    private static final Logger log = LoggerFactory.getLogger(ArchiveFile.class);
    private String filename;
    private String originalIndexFileName;
    private int indexingFailedAttempts;
    private boolean isIndexed = false;
    private Date indexedDate = null;

    public String getOriginalIndexFileName() {
        return this.originalIndexFileName;
    }

    public void setOriginalIndexFileName(String str) {
        this.originalIndexFileName = str;
    }

    public Date getIndexedDate() {
        return this.indexedDate;
    }

    public void setIndexedDate(Date date) {
        this.indexedDate = date;
    }

    @Id
    public String getFilename() {
        return this.filename;
    }

    public void setFilename(String str) {
        this.filename = str;
    }

    public boolean isIndexed() {
        return this.isIndexed;
    }

    public void setIndexed(boolean z) {
        this.isIndexed = z;
    }

    public int getIndexingFailedAttempts() {
        return this.indexingFailedAttempts;
    }

    public void setIndexingFailedAttempts(int i) {
        this.indexingFailedAttempts = i;
    }

    public void index() throws IllegalState {
        log.info("Indexing {}", getFilename());
        if (this.isIndexed) {
            throw new IllegalState("Attempted to index file '" + this.filename + "' which is already indexed");
        }
        if (Settings.getBoolean(CommonSettings.USE_BITMAG_HADOOP_BACKEND)) {
            hadoopIndex();
        } else {
            batchIndex();
        }
    }

    /* JADX WARN: Failed to calculate best type for var: r10v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r10v0 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Failed to calculate best type for var: r9v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.calculateFromBounds(FixTypesVisitor.java:156)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.setBestType(FixTypesVisitor.java:133)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.deduceType(FixTypesVisitor.java:238)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.tryDeduceTypes(FixTypesVisitor.java:221)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Failed to calculate best type for var: r9v1 ??
    java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.InsnArg.getType()" because "changeArg" is null
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.moveListener(TypeUpdate.java:439)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.runListeners(TypeUpdate.java:232)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.requestUpdate(TypeUpdate.java:212)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeForSsaVar(TypeUpdate.java:183)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.updateTypeChecked(TypeUpdate.java:112)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:83)
    	at jadx.core.dex.visitors.typeinference.TypeUpdate.apply(TypeUpdate.java:56)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.calculateFromBounds(TypeInferenceVisitor.java:145)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.setBestType(TypeInferenceVisitor.java:123)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.lambda$runTypePropagation$2(TypeInferenceVisitor.java:101)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.runTypePropagation(TypeInferenceVisitor.java:101)
    	at jadx.core.dex.visitors.typeinference.TypeInferenceVisitor.visit(TypeInferenceVisitor.java:75)
     */
    /* JADX WARN: Multi-variable type inference failed. Error: java.lang.NullPointerException: Cannot invoke "jadx.core.dex.instructions.args.RegisterArg.getSVar()" because the return value of "jadx.core.dex.nodes.InsnNode.getResult()" is null
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.collectRelatedVars(AbstractTypeConstraint.java:31)
    	at jadx.core.dex.visitors.typeinference.AbstractTypeConstraint.<init>(AbstractTypeConstraint.java:19)
    	at jadx.core.dex.visitors.typeinference.TypeSearch$1.<init>(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeMoveConstraint(TypeSearch.java:376)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.makeConstraint(TypeSearch.java:361)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.collectConstraints(TypeSearch.java:341)
    	at java.base/java.util.ArrayList.forEach(ArrayList.java:1596)
    	at jadx.core.dex.visitors.typeinference.TypeSearch.run(TypeSearch.java:60)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.runMultiVariableSearch(FixTypesVisitor.java:116)
    	at jadx.core.dex.visitors.typeinference.FixTypesVisitor.visit(FixTypesVisitor.java:91)
     */
    /* JADX WARN: Not initialized variable reg: 10, insn: 0x00f9: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r10 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]), block:B:37:0x00f9 */
    /* JADX WARN: Not initialized variable reg: 9, insn: 0x00f5: MOVE (r0 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) = (r9 I:??[int, float, boolean, short, byte, char, OBJECT, ARRAY]) A[TRY_LEAVE], block:B:35:0x00f5 */
    /* JADX WARN: Type inference failed for: r10v0, types: [java.lang.Throwable] */
    /* JADX WARN: Type inference failed for: r9v1, types: [org.apache.hadoop.fs.FileSystem] */
    private void hadoopIndex() {
        if (!(ARCUtils.isARC(this.filename) || WARCUtils.isWarc(this.filename))) {
            log.warn("Skipping indexing of file with filename '{}'", this.filename);
            return;
        }
        Configuration conf = HadoopJobUtils.getConf();
        conf.set("cdx_filename", this.filename);
        try {
            try {
                FileSystem newInstance = FileSystem.newInstance(conf);
                Throwable th = null;
                CDXStrategy cDXStrategy = new CDXStrategy(0L, newInstance);
                HadoopJob hadoopJob = new HadoopJob(0L, cDXStrategy);
                UUID randomUUID = UUID.randomUUID();
                Path createJobInputFile = cDXStrategy.createJobInputFile(randomUUID);
                hadoopJob.setJobInputFile(createJobInputFile);
                createJobInputFile(this.filename, createJobInputFile, newInstance);
                Path createJobOutputDir = cDXStrategy.createJobOutputDir(randomUUID);
                hadoopJob.setJobOutputDir(createJobOutputDir);
                int runJob = cDXStrategy.runJob(createJobInputFile, createJobOutputDir);
                if (runJob == 0) {
                    log.info("CDX job for file {} was a success!", this.filename);
                    collectHadoopResults(newInstance, createJobOutputDir);
                } else {
                    log.warn("Hadoop job failed with exit code '{}'", Integer.valueOf(runJob));
                }
                if (newInstance != null) {
                    if (0 != 0) {
                        try {
                            newInstance.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        newInstance.close();
                    }
                }
            } finally {
            }
        } catch (IOException e) {
            log.warn("Failure in indexing {}", this.filename, e);
        }
    }

    public static void main(String[] strArr) throws KrbException, IOException {
        HadoopJobUtils.doKerberosLogin();
        ArchiveFile archiveFile = new ArchiveFile();
        archiveFile.setFilename(strArr[0]);
        archiveFile.hadoopIndex();
    }

    private void createJobInputFile(String str, Path path, FileSystem fileSystem) throws IOException {
        File createTempFile = File.createTempFile("cdxextract", ".txt", Settings.getFile(CommonSettings.DIR_COMMONTEMPDIR));
        SimpleFileResolver simpleFileResolver = (FileResolver) SettingsFactory.getInstance(CommonSettings.FILE_RESOLVER_CLASS, new Object[0]);
        if (simpleFileResolver instanceof SimpleFileResolver) {
            simpleFileResolver.setDirectory(Paths.get(Settings.get(CommonSettings.HADOOP_MAPRED_INPUT_FILES_PARENT_DIR), new String[0]));
        }
        java.nio.file.Path path2 = simpleFileResolver.getPath(str);
        if (path2 == null) {
            log.warn("No path identified for file '{}'", str);
            throw new FileNotFoundException("File resolver failed to identity file " + str);
        }
        String str2 = "file://" + path2.toString();
        log.info("Inserting {} in {}.", str2, createTempFile);
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(createTempFile));
        Throwable th = null;
        try {
            try {
                bufferedWriter.write(str2);
                bufferedWriter.newLine();
                if (bufferedWriter != null) {
                    if (0 != 0) {
                        try {
                            bufferedWriter.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedWriter.close();
                    }
                }
                log.info("Copying file with input paths {} to hdfs filesystem {}, {}.", new Object[]{createTempFile, fileSystem, path});
                Path path3 = new Path(createTempFile.getAbsolutePath());
                log.info("Copying from {}", path3);
                fileSystem.copyFromLocalFile(path3, path);
            } finally {
            }
        } catch (Throwable th3) {
            if (bufferedWriter != null) {
                if (th != null) {
                    try {
                        bufferedWriter.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    bufferedWriter.close();
                }
            }
            throw th3;
        }
    }

    private void collectHadoopResults(FileSystem fileSystem, Path path) {
        File makeNewFileInWaybackTempDir = makeNewFileInWaybackTempDir();
        log.info("Collecting results for {} from {} to {}", new Object[]{getFilename(), path, makeNewFileInWaybackTempDir.getAbsolutePath()});
        try {
            FileOutputStream fileOutputStream = new FileOutputStream(makeNewFileInWaybackTempDir);
            Throwable th = null;
            try {
                try {
                    HadoopJobUtils.collectOutputLines(fileSystem, path, fileOutputStream);
                    if (fileOutputStream != null) {
                        if (0 != 0) {
                            try {
                                fileOutputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            fileOutputStream.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            log.warn("Could not collect index results from '{}'", path.toString(), e);
        }
        log.info("Collected {} bytes of index for {} from {} to {}", new Object[]{Long.valueOf(makeNewFileInWaybackTempDir.length()), getFilename(), path, makeNewFileInWaybackTempDir.getAbsolutePath()});
        File moveFileToWaybackOutputDir = moveFileToWaybackOutputDir(makeNewFileInWaybackTempDir);
        log.info("Moved index for {} to {}", getFilename(), moveFileToWaybackOutputDir.getAbsolutePath());
        this.originalIndexFileName = makeNewFileInWaybackTempDir.getName();
        this.isIndexed = true;
        log.info("Indexed '{}' to '{}'. Marking as indexed in DB.", this.filename, moveFileToWaybackOutputDir.getAbsolutePath());
        new ArchiveFileDAO().update(this);
    }

    private void batchIndex() {
        FileBatchJob waybackCDXExtractionWARCBatchJob;
        if (this.filename.matches("(.*)" + Settings.get(CommonSettings.METADATAFILE_REGEX_SUFFIX))) {
            waybackCDXExtractionWARCBatchJob = new DeduplicationCDXExtractionBatchJob();
        } else if (ARCUtils.isARC(this.filename)) {
            waybackCDXExtractionWARCBatchJob = new WaybackCDXExtractionARCBatchJob();
        } else {
            if (!WARCUtils.isWarc(this.filename)) {
                log.warn("Skipping indexing of file with filename '{}'", this.filename);
                return;
            }
            waybackCDXExtractionWARCBatchJob = new WaybackCDXExtractionWARCBatchJob();
        }
        waybackCDXExtractionWARCBatchJob.processOnlyFileNamed(this.filename);
        PreservationArcRepositoryClient preservationInstance = ArcRepositoryClientFactory.getPreservationInstance();
        String str = Settings.get(WaybackSettings.WAYBACK_REPLICA);
        log.info("Submitting {} for {} to {}", new Object[]{waybackCDXExtractionWARCBatchJob.getClass().getName(), getFilename(), str});
        BatchStatus batch = preservationInstance.batch(waybackCDXExtractionWARCBatchJob, str, new String[0]);
        log.info("Batch job for {} returned", getFilename());
        if (!batch.getFilesFailed().isEmpty() || batch.getNoOfFilesProcessed() == 0 || !batch.getExceptions().isEmpty()) {
            logBatchError(batch);
            return;
        }
        if (batch.getNoOfFilesProcessed() > 1) {
            log.warn("Processed '{}' files for {}.\n This may indicate a doublet in the arcrepository. Proceeding with caution.", Integer.valueOf(batch.getNoOfFilesProcessed()), getFilename());
        }
        try {
            collectResults(batch);
        } catch (Exception e) {
            logBatchError(batch);
            log.error("Failed to retrieve results", e);
        }
    }

    private void collectResults(BatchStatus batchStatus) {
        File makeNewFileInWaybackTempDir = makeNewFileInWaybackTempDir();
        log.info("Collecting index for '{}' to '{}'", getFilename(), makeNewFileInWaybackTempDir.getAbsolutePath());
        batchStatus.copyResults(makeNewFileInWaybackTempDir);
        log.info("Finished collecting index for '{}' to '{}'", getFilename(), makeNewFileInWaybackTempDir.getAbsolutePath());
        File moveFileToWaybackOutputDir = moveFileToWaybackOutputDir(makeNewFileInWaybackTempDir);
        this.originalIndexFileName = makeNewFileInWaybackTempDir.getName();
        this.isIndexed = true;
        log.info("Indexed '{}' to '{}'", this.filename, moveFileToWaybackOutputDir.getAbsolutePath());
        new ArchiveFileDAO().update(this);
    }

    private File makeNewFileInWaybackTempDir() {
        String uuid = UUID.randomUUID().toString();
        File file = new File(Settings.get(WaybackSettings.WAYBACK_INDEX_TEMPDIR));
        FileUtils.createDir(file);
        return new File(file, uuid);
    }

    private File moveFileToWaybackOutputDir(File file) {
        File file2 = new File(Settings.get(WaybackSettings.WAYBACK_BATCH_OUTPUTDIR));
        FileUtils.createDir(file2);
        File file3 = new File(file2, file.getName());
        file.renameTo(file3);
        return file3;
    }

    private void logBatchError(BatchStatus batchStatus) {
        String str = "Error indexing file '" + getFilename() + "'\nNumber of files processed: '" + batchStatus.getNoOfFilesProcessed() + "'\nNumber of files failed '" + batchStatus.getFilesFailed().size() + "'";
        if (!batchStatus.getExceptions().isEmpty()) {
            str = str + "\n Exceptions thrown: \n";
            Iterator it = batchStatus.getExceptions().iterator();
            while (it.hasNext()) {
                str = str + ((FileBatchJob.ExceptionOccurrence) it.next()).toString() + "\n";
            }
        }
        log.error(str);
        this.indexingFailedAttempts++;
        new ArchiveFileDAO().update(this);
    }

    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        ArchiveFile archiveFile = (ArchiveFile) obj;
        if (this.indexingFailedAttempts != archiveFile.indexingFailedAttempts || this.isIndexed != archiveFile.isIndexed || !this.filename.equals(archiveFile.filename)) {
            return false;
        }
        if (this.indexedDate != null) {
            if (!this.indexedDate.equals(archiveFile.indexedDate)) {
                return false;
            }
        } else if (archiveFile.indexedDate != null) {
            return false;
        }
        return this.originalIndexFileName != null ? this.originalIndexFileName.equals(archiveFile.originalIndexFileName) : archiveFile.originalIndexFileName == null;
    }

    public int hashCode() {
        return (31 * ((31 * ((31 * ((31 * this.filename.hashCode()) + (this.isIndexed ? 1 : 0))) + (this.originalIndexFileName != null ? this.originalIndexFileName.hashCode() : 0))) + this.indexingFailedAttempts)) + (this.indexedDate != null ? this.indexedDate.hashCode() : 0);
    }
}
