package org.archive.modules.forms;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.lang.StringUtils;
import org.archive.modules.CrawlURI;
import org.archive.modules.extractor.Extractor;
import org.archive.modules.extractor.ExtractorHTML;
import org.archive.util.TextUtils;

/* loaded from: input_file:org/archive/modules/forms/ExtractorHTMLForms.class */
public class ExtractorHTMLForms extends Extractor {
    private static final long serialVersionUID = 2;
    public static final String A_HTML_FORM_OBJECTS = "html-form-objects";
    private static Logger logger = Logger.getLogger(ExtractorHTMLForms.class.getName());

    public boolean getExtractAllForms() {
        return ((Boolean) this.kp.get("extractAllForms")).booleanValue();
    }

    public void setExtractAllForms(boolean z) {
        this.kp.put("extractAllForms", Boolean.valueOf(z));
    }

    public ExtractorHTMLForms() {
        setExtractAllForms(false);
    }

    @Override // org.archive.modules.Processor
    protected boolean shouldProcess(CrawlURI crawlURI) {
        return crawlURI.containsDataKey(ExtractorHTML.A_FORM_OFFSETS);
    }

    @Override // org.archive.modules.extractor.Extractor
    public void extract(CrawlURI crawlURI) {
        try {
            analyze(crawlURI, crawlURI.getRecorder().getContentReplayCharSequence());
        } catch (IOException e) {
            crawlURI.getNonFatalFailures().add(e);
            logger.log(Level.WARNING, "Failed get of replay char sequence in " + Thread.currentThread().getName(), (Throwable) e);
        }
    }

    protected void analyze(CrawlURI crawlURI, CharSequence charSequence) {
        Iterator<Object> it = crawlURI.getDataList(ExtractorHTML.A_FORM_OFFSETS).iterator();
        while (it.hasNext()) {
            CharSequence subSequence = charSequence.subSequence(((Integer) it.next()).intValue(), charSequence.length());
            String findAttributeValueGroup = findAttributeValueGroup("(?i)^[^>]*\\smethod\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, subSequence);
            String findAttributeValueGroup2 = findAttributeValueGroup("(?i)^[^>]*\\saction\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, subSequence);
            String findAttributeValueGroup3 = findAttributeValueGroup("(?i)^[^>]*\\senctype\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, subSequence);
            HTMLForm hTMLForm = new HTMLForm();
            hTMLForm.setMethod(findAttributeValueGroup);
            hTMLForm.setAction(findAttributeValueGroup2);
            hTMLForm.setEnctype(findAttributeValueGroup3);
            for (CharSequence charSequence2 : findGroups("(?i)(<input\\s[^>]*>)|(</?form>)", 1, subSequence)) {
                String findAttributeValueGroup4 = findAttributeValueGroup("(?i)^[^>]*\\stype\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, charSequence2);
                String findAttributeValueGroup5 = findAttributeValueGroup("(?i)^[^>]*\\sname\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, charSequence2);
                String findAttributeValueGroup6 = findAttributeValueGroup("(?i)^[^>]*\\svalue\\s*=\\s*([^>\\s]{1,50000})[^>]*>", 1, charSequence2);
                Matcher matcher = TextUtils.getMatcher("(?i)^[^>]*\\schecked\\s*[^>]*>", charSequence2);
                try {
                    boolean find = matcher.find();
                    TextUtils.recycleMatcher(matcher);
                    hTMLForm.addField(findAttributeValueGroup4, findAttributeValueGroup5, findAttributeValueGroup6, find);
                } catch (Throwable th) {
                    TextUtils.recycleMatcher(matcher);
                    throw th;
                }
            }
            if (hTMLForm.seemsLoginForm() || getExtractAllForms()) {
                crawlURI.getDataList(A_HTML_FORM_OBJECTS).add(hTMLForm);
                crawlURI.getAnnotations().add(hTMLForm.asAnnotation());
            }
        }
    }

    protected List<CharSequence> findGroups(String str, int i, CharSequence charSequence) {
        ArrayList arrayList = new ArrayList();
        Matcher matcher = TextUtils.getMatcher(str, charSequence);
        while (matcher.find() && matcher.group(i) != null) {
            try {
                arrayList.add(charSequence.subSequence(matcher.start(i), matcher.end(i)));
            } finally {
                TextUtils.recycleMatcher(matcher);
            }
        }
        return arrayList;
    }

    protected String findAttributeValueGroup(String str, int i, CharSequence charSequence) {
        Matcher matcher = TextUtils.getMatcher(str, charSequence);
        try {
            if (!matcher.find()) {
                return null;
            }
            String strip = StringUtils.strip(StringUtils.removeEnd(StringUtils.removeEnd(matcher.group(i), "'/"), "\"/"), "'\"");
            TextUtils.recycleMatcher(matcher);
            return strip;
        } finally {
            TextUtils.recycleMatcher(matcher);
        }
    }
}
