package org.archive.modules.extractor;

import org.archive.modules.CrawlURI;

/* loaded from: input_file:org/archive/modules/extractor/ContentExtractor.class */
public abstract class ContentExtractor extends Extractor {
    @Override // org.archive.modules.extractor.Extractor
    protected final void extract(CrawlURI crawlURI) {
        if (innerExtract(crawlURI)) {
            crawlURI.linkExtractorFinished();
        }
    }

    @Override // org.archive.modules.Processor
    protected final boolean shouldProcess(CrawlURI crawlURI) {
        if (!crawlURI.isSuccess()) {
            return false;
        }
        if ((getExtractorParameters().getExtractIndependently() || !crawlURI.hasBeenLinkExtracted()) && crawlURI.getContentLength() > 0) {
            return (getExtractorParameters().getExtract404s() || crawlURI.getFetchStatus() != 404) && shouldExtract(crawlURI);
        }
        return false;
    }

    protected abstract boolean shouldExtract(CrawlURI crawlURI);

    protected abstract boolean innerExtract(CrawlURI crawlURI);
}
