package dk.netarkivet.wayback.hadoop;

import dk.netarkivet.wayback.batch.DeduplicationCDXExtractionBatchJob;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/* loaded from: input_file:dk/netarkivet/wayback/hadoop/DedupIndexer.class */
public class DedupIndexer implements Indexer {
    @Override // dk.netarkivet.wayback.hadoop.Indexer
    public List<String> indexFile(File file) throws IOException {
        DeduplicationCDXExtractionBatchJob deduplicationCDXExtractionBatchJob = new DeduplicationCDXExtractionBatchJob();
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        deduplicationCDXExtractionBatchJob.initialize(byteArrayOutputStream);
        deduplicationCDXExtractionBatchJob.processFile(file, byteArrayOutputStream);
        deduplicationCDXExtractionBatchJob.finish(byteArrayOutputStream);
        byteArrayOutputStream.flush();
        return Arrays.asList(byteArrayOutputStream.toString().split("\\n"));
    }
}
