package org.archive.modules.extractor;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlURI;
import org.archive.util.UriUtils;

/* loaded from: input_file:org/archive/modules/extractor/ExtractorJson.class */
public class ExtractorJson extends ContentExtractor {
    public static final String JSON_URI = "^https?://[^/]+/.+\\.json\\b.*$";
    private static final Logger LOGGER = Logger.getLogger(ExtractorJson.class.getName());
    private JsonFactory factory = new JsonFactory();
    private ObjectMapper mapper = new ObjectMapper(this.factory);

    @Override // org.archive.modules.extractor.ContentExtractor
    protected boolean innerExtract(CrawlURI crawlURI) {
        try {
            ArrayList arrayList = new ArrayList();
            parse(this.mapper.readTree(crawlURI.getRecorder().getContentReplayInputStream()), arrayList);
            for (String str : arrayList) {
                try {
                    addRelativeToBase(crawlURI, getExtractorParameters().getMaxOutlinks(), str, LinkContext.INFERRED_MISC, Hop.INFERRED);
                    this.numberOfLinksExtracted.incrementAndGet();
                } catch (URIException e) {
                    logUriError(e, crawlURI.getUURI(), str);
                }
            }
            return false;
        } catch (Exception e2) {
            LOGGER.log(Level.INFO, crawlURI.getURI() + " : " + e2.getMessage());
            return false;
        }
    }

    @Override // org.archive.modules.extractor.ContentExtractor
    protected boolean shouldExtract(CrawlURI crawlURI) {
        String contentType = crawlURI.getContentType();
        if (contentType == null || contentType.indexOf("json") == -1) {
            return crawlURI.isSuccess() && crawlURI.toString().matches(JSON_URI);
        }
        return true;
    }

    protected List<String> parse(JsonNode jsonNode, List<String> list) {
        Iterator fields = jsonNode.fields();
        while (fields.hasNext()) {
            Map.Entry entry = (Map.Entry) fields.next();
            if (((JsonNode) entry.getValue()).textValue() != null && UriUtils.isVeryLikelyUri(((JsonNode) entry.getValue()).textValue())) {
                list.add(((JsonNode) entry.getValue()).textValue());
            } else if (((JsonNode) entry.getValue()).isObject()) {
                parse((JsonNode) entry.getValue(), list);
            } else if (((JsonNode) entry.getValue()).isArray()) {
                Iterator elements = ((JsonNode) entry.getValue()).elements();
                while (elements.hasNext()) {
                    parse((JsonNode) elements.next(), list);
                }
            }
        }
        return list;
    }
}
