public class ExtractorJS extends org.archive.crawler.extractor.Extractor implements org.archive.crawler.datamodel.CoreAttributeConstants
contributor gojomo contributor szznax contributor svc
Modifier and Type | Field and Description |
---|---|
protected static String[] |
EXTRACTOR_URI_EXCEPTIONS |
protected long |
numberOfCURIsHandled |
protected static long |
numberOfLinksExtracted |
ATTR_DECIDE_RULES, ATTR_ENABLED, attrDecideRules
A_ANNOTATIONS, A_CONTENT_DIGEST, A_CONTENT_TYPE, A_CREDENTIAL_AVATARS_KEY, A_DELAY_FACTOR, A_DISTANCE_FROM_SEED, A_DNS_FETCH_TIME, A_DNS_SERVER_IP_LABEL, A_ETAG_HEADER, A_FETCH_BEGAN_TIME, A_FETCH_COMPLETED_TIME, A_FETCH_HISTORY, A_FORCE_RETIRE, A_FTP_CONTROL_CONVERSATION, A_FTP_FETCH_STATUS, A_HERITABLE_KEYS, A_HTML_BASE, A_HTTP_BIND_ADDRESS, A_HTTP_PROXY_HOST, A_HTTP_PROXY_PORT, A_HTTP_TRANSACTION, A_LAST_MODIFIED_HEADER, A_LOCALIZED_ERRORS, A_META_ROBOTS, A_MINIMUM_DELAY, A_MIRROR_PATH, A_PREREQUISITE_URI, A_REFERENCE_LENGTH, A_RETRY_DELAY, A_RRECORD_SET_LABEL, A_RUNTIME_EXCEPTION, A_SOURCE_TAG, A_STATUS, HEADER_TRUNC, LENGTH_TRUNC, TIMER_TRUNC, TRUNC_SUFFIX
Constructor and Description |
---|
ExtractorJS(String name) |
Modifier and Type | Method and Description |
---|---|
static long |
considerStrings(org.archive.crawler.datamodel.CrawlURI curi,
CharSequence cs,
org.archive.crawler.framework.CrawlController controller,
boolean handlingJSFile) |
void |
extract(org.archive.crawler.datamodel.CrawlURI curi) |
String |
report() |
checkForInterrupt, finalTasks, getController, getDecideRule, getDefaultNextProcessor, initialTasks, innerRejectProcess, isContentToProcess, isEnabled, isExpectedMimeType, isHttpTransactionContentToProcess, kickUpdate, process, rulesAccept, rulesAccept, setDefaultNextProcessor, spawn
addElementToDefinition, checkValue, earlyInitialize, getAbsoluteName, getAttribute, getAttribute, getAttribute, getAttributeInfo, getAttributeInfo, getAttributeInfoIterator, getAttributes, getDataContainerRecursive, getDataContainerRecursive, getDefaultValue, getDescription, getElementFromDefinition, getLegalValues, getLocalAttribute, getMBeanInfo, getMBeanInfo, getParent, getPreservedFields, getSettingsHandler, getUncheckedAttribute, getValue, globalSettings, invoke, isInitialized, isOverridden, iterator, removeElementFromDefinition, setAsOrder, setAttribute, setAttribute, setAttributes, setDescription, setPreservedFields, toString, unsetAttribute
protected long numberOfCURIsHandled
protected static long numberOfLinksExtracted
protected static final String[] EXTRACTOR_URI_EXCEPTIONS
public ExtractorJS(String name)
name
- public void extract(org.archive.crawler.datamodel.CrawlURI curi)
extract
in class org.archive.crawler.extractor.Extractor
public static long considerStrings(org.archive.crawler.datamodel.CrawlURI curi, CharSequence cs, org.archive.crawler.framework.CrawlController controller, boolean handlingJSFile)
Copyright © 2005–2016 The Royal Danish Library, the Danish State and University Library, the National Library of France and the Austrian National Library.. All rights reserved.