package org.archive.modules.net;

import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;
import java.nio.CharBuffer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.archive.bdb.AutoKryo;
import org.archive.io.ReadSource;

/* loaded from: input_file:org/archive/modules/net/Robotstxt.class */
public class Robotstxt implements Serializable {
    static final long serialVersionUID = 7025386509301303890L;
    protected static final int MAX_SIZE = 512000;
    protected LinkedList<String> namedUserAgents = new LinkedList<>();
    protected Map<String, RobotsDirectives> agentsToDirectives = new HashMap();
    protected RobotsDirectives wildcardDirectives = null;
    protected boolean hasErrors = false;
    private static final Logger logger = Logger.getLogger(Robotstxt.class.getName());
    private static final Pattern LINE_SEPARATOR = Pattern.compile("\r\n|\r|\n");
    protected static RobotsDirectives NO_DIRECTIVES = new RobotsDirectives();
    public static Robotstxt NO_ROBOTS = new Robotstxt();

    public Robotstxt() {
    }

    public Robotstxt(Reader reader) throws IOException {
        try {
            initializeFromReader(reader);
        } finally {
            IOUtils.closeQuietly(reader);
        }
    }

    public Robotstxt(ReadSource readSource) {
        Reader obtainReader = readSource.obtainReader();
        try {
            try {
                initializeFromReader(obtainReader);
                IOUtils.closeQuietly(obtainReader);
            } catch (IOException e) {
                logger.log(Level.SEVERE, "robots ReadSource problem: potential for inadvertent overcrawling", (Throwable) e);
                IOUtils.closeQuietly(obtainReader);
            }
        } catch (Throwable th) {
            IOUtils.closeQuietly(obtainReader);
            throw th;
        }
    }

    protected void initializeFromReader(Reader reader) throws IOException {
        CharBuffer allocate = CharBuffer.allocate(MAX_SIZE);
        while (allocate.hasRemaining() && reader.read(allocate) >= 0) {
        }
        allocate.flip();
        String[] split = LINE_SEPARATOR.split(allocate);
        if (allocate.limit() == allocate.capacity()) {
            int capacity = allocate.capacity();
            if (split.length != 0) {
                int length = split.length - 1;
                capacity -= split[length].length();
                split[length] = "";
            }
            logger.warning("processed " + capacity + " characters, ignoring the rest (see HER-1990)");
        }
        RobotsDirectives robotsDirectives = null;
        for (String str : split) {
            String trim = str.trim();
            if (!trim.isEmpty() && !trim.startsWith("#")) {
                String replaceAll = trim.replaceAll("<[^>]+>", "");
                int indexOf = replaceAll.indexOf("#");
                if (indexOf > -1) {
                    replaceAll = replaceAll.substring(0, indexOf);
                }
                String trim2 = replaceAll.trim();
                if (trim2.matches("(?i)^User-agent:.*")) {
                    String lowerCase = trim2.substring(11).trim().toLowerCase();
                    RobotsDirectives robotsDirectives2 = lowerCase.equals("*") ? this.wildcardDirectives : this.agentsToDirectives.get(lowerCase);
                    if (robotsDirectives2 != null && robotsDirectives2.hasDirectives) {
                        robotsDirectives = robotsDirectives2;
                    } else if (robotsDirectives == null || robotsDirectives.hasDirectives) {
                        robotsDirectives = new RobotsDirectives();
                    }
                    if (lowerCase.equals("*")) {
                        this.wildcardDirectives = robotsDirectives;
                    } else {
                        this.namedUserAgents.addLast(lowerCase);
                        this.agentsToDirectives.put(lowerCase, robotsDirectives);
                    }
                } else if (trim2.matches("(?i)Disallow:.*")) {
                    if (robotsDirectives == null) {
                        this.hasErrors = true;
                    } else {
                        String trim3 = trim2.substring(9).trim();
                        if (trim3.endsWith("*")) {
                            trim3 = trim3.substring(0, trim3.length() - 1);
                        }
                        robotsDirectives.addDisallow(trim3);
                    }
                } else if (trim2.matches("(?i)Crawl-delay:.*")) {
                    if (robotsDirectives == null) {
                        this.hasErrors = true;
                    } else {
                        try {
                            robotsDirectives.setCrawlDelay(Float.parseFloat(trim2.substring(12).trim().split("[^\\d\\.]+")[0]));
                        } catch (ArrayIndexOutOfBoundsException e) {
                        } catch (NumberFormatException e2) {
                        }
                    }
                } else if (trim2.matches("(?i)Allow:.*")) {
                    if (robotsDirectives == null) {
                        this.hasErrors = true;
                    } else {
                        String trim4 = trim2.substring(6).trim();
                        if (trim4.endsWith("*")) {
                            trim4 = trim4.substring(0, trim4.length() - 1);
                        }
                        robotsDirectives.addAllow(trim4);
                    }
                }
            }
        }
    }

    public boolean allowsAll() {
        return this.agentsToDirectives.isEmpty();
    }

    public List<String> getNamedUserAgents() {
        return this.namedUserAgents;
    }

    public RobotsDirectives getDirectivesFor(String str, boolean z) {
        Iterator<String> it = this.namedUserAgents.iterator();
        while (it.hasNext()) {
            String next = it.next();
            if (str.indexOf(next) > -1) {
                return this.agentsToDirectives.get(next);
            }
        }
        if (z) {
            return this.wildcardDirectives != null ? this.wildcardDirectives : NO_DIRECTIVES;
        }
        return null;
    }

    public RobotsDirectives getDirectivesFor(String str) {
        return getDirectivesFor(str, true);
    }

    public static void autoregisterTo(AutoKryo autoKryo) {
        autoKryo.register(Robotstxt.class);
        autoKryo.autoregister(HashMap.class);
        autoKryo.autoregister(LinkedList.class);
        autoKryo.autoregister(RobotsDirectives.class);
        autoKryo.setRegistrationOptional(true);
    }
}
