package is.hi.bok.deduplicator;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.NoSuchElementException;

/* loaded from: input_file:is/hi/bok/deduplicator/CrawlLogIterator.class */
public class CrawlLogIterator extends CrawlDataIterator {
    protected final SimpleDateFormat crawlDateFormat;
    protected final SimpleDateFormat crawlDataItemFormat;
    protected BufferedReader in;
    protected CrawlDataItem next;

    public CrawlLogIterator(String str) throws IOException {
        super(str);
        this.crawlDateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
        this.crawlDataItemFormat = new SimpleDateFormat(CrawlDataItem.dateFormat);
        this.in = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str))));
    }

    @Override // is.hi.bok.deduplicator.CrawlDataIterator
    public boolean hasNext() throws IOException {
        if (this.next == null) {
            prepareNext();
        }
        return this.next != null;
    }

    @Override // is.hi.bok.deduplicator.CrawlDataIterator
    public CrawlDataItem next() throws IOException {
        if (!hasNext()) {
            throw new NoSuchElementException("No more items");
        }
        CrawlDataItem crawlDataItem = this.next;
        this.next = null;
        return crawlDataItem;
    }

    protected void prepareNext() throws IOException {
        do {
            String readLine = this.in.readLine();
            if (readLine == null) {
                return;
            } else {
                this.next = parseLine(readLine);
            }
        } while (this.next == null);
    }

    protected CrawlDataItem parseLine(String str) {
        if (str == null || str.length() <= 42) {
            return null;
        }
        String[] split = str.split("\\s+", 12);
        if (split.length < 10) {
            return null;
        }
        try {
            String format = this.crawlDataItemFormat.format(this.crawlDateFormat.parse(split[0]));
            String str2 = split[3];
            String str3 = split[6];
            String str4 = split[9];
            if (str4.lastIndexOf(":") >= 0) {
                str4 = str4.substring(str4.lastIndexOf(":") + 1);
            }
            String str5 = null;
            boolean z = false;
            if (split.length == 12) {
                String str6 = split[11];
                int indexOf = str6.indexOf("duplicate:\"");
                if (indexOf >= 0) {
                    int i = indexOf + 11;
                    str5 = str6.substring(i, str6.indexOf(34, i + 1));
                    z = true;
                } else if (str6.contains("duplicate")) {
                    z = true;
                }
            }
            return new CrawlDataItem(str2, str4, format, null, str3, str5, z);
        } catch (ParseException e) {
            System.err.println("Error parsing date for: " + str);
            e.printStackTrace();
            return null;
        }
    }

    @Override // is.hi.bok.deduplicator.CrawlDataIterator
    public void close() throws IOException {
        this.in.close();
    }

    @Override // is.hi.bok.deduplicator.CrawlDataIterator
    public String getSourceType() {
        return "Handles Heritrix style crawl.log files";
    }
}
