|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectjavax.management.Attribute
org.archive.crawler.settings.Type
org.archive.crawler.settings.ComplexType
org.archive.crawler.settings.ModuleType
org.archive.crawler.framework.Processor
org.archive.crawler.extractor.Extractor
dk.netarkivet.harvester.harvesting.extractor.ExtractorJS
public class ExtractorJS
Processes Javascript files for strings that are likely to be crawlable URIs. contributor gojomo contributor szznax contributor svc
Nested Class Summary |
---|
Nested classes/interfaces inherited from class org.archive.crawler.settings.ComplexType |
---|
org.archive.crawler.settings.ComplexType.MBeanAttributeInfoIterator |
Field Summary | |
---|---|
protected static java.lang.String[] |
EXTRACTOR_URI_EXCEPTIONS
|
(package private) static java.lang.String |
JAVASCRIPT_STRING_EXTRACTOR
|
protected long |
numberOfCURIsHandled
|
protected static long |
numberOfLinksExtracted
|
Fields inherited from class org.archive.crawler.framework.Processor |
---|
ATTR_DECIDE_RULES, ATTR_ENABLED, attrDecideRules |
Fields inherited from class org.archive.crawler.settings.ComplexType |
---|
definition, definitionMap |
Fields inherited from interface org.archive.crawler.datamodel.CoreAttributeConstants |
---|
A_ANNOTATIONS, A_CONTENT_DIGEST, A_CONTENT_TYPE, A_CREDENTIAL_AVATARS_KEY, A_DELAY_FACTOR, A_DISTANCE_FROM_SEED, A_DNS_FETCH_TIME, A_DNS_SERVER_IP_LABEL, A_ETAG_HEADER, A_FETCH_BEGAN_TIME, A_FETCH_COMPLETED_TIME, A_FETCH_HISTORY, A_FORCE_RETIRE, A_FTP_CONTROL_CONVERSATION, A_FTP_FETCH_STATUS, A_HERITABLE_KEYS, A_HTML_BASE, A_HTTP_BIND_ADDRESS, A_HTTP_PROXY_HOST, A_HTTP_PROXY_PORT, A_HTTP_TRANSACTION, A_LAST_MODIFIED_HEADER, A_LOCALIZED_ERRORS, A_META_ROBOTS, A_MINIMUM_DELAY, A_MIRROR_PATH, A_PREREQUISITE_URI, A_REFERENCE_LENGTH, A_RETRY_DELAY, A_RRECORD_SET_LABEL, A_RUNTIME_EXCEPTION, A_SOURCE_TAG, A_STATUS, HEADER_TRUNC, LENGTH_TRUNC, TIMER_TRUNC, TRUNC_SUFFIX |
Constructor Summary | |
---|---|
ExtractorJS(java.lang.String name)
|
Method Summary | |
---|---|
static long |
considerStrings(org.archive.crawler.datamodel.CrawlURI curi,
java.lang.CharSequence cs,
org.archive.crawler.framework.CrawlController controller,
boolean handlingJSFile)
|
void |
extract(org.archive.crawler.datamodel.CrawlURI curi)
|
java.lang.String |
report()
|
Methods inherited from class org.archive.crawler.extractor.Extractor |
---|
innerProcess |
Methods inherited from class org.archive.crawler.framework.Processor |
---|
checkForInterrupt, finalTasks, getController, getDecideRule, getDefaultNextProcessor, initialTasks, innerRejectProcess, isContentToProcess, isEnabled, isExpectedMimeType, isHttpTransactionContentToProcess, kickUpdate, process, rulesAccept, rulesAccept, setDefaultNextProcessor, spawn |
Methods inherited from class org.archive.crawler.settings.ModuleType |
---|
addElement, listUsedFiles |
Methods inherited from class org.archive.crawler.settings.ComplexType |
---|
addElementToDefinition, checkValue, earlyInitialize, getAbsoluteName, getAttribute, getAttribute, getAttribute, getAttributeInfo, getAttributeInfo, getAttributeInfoIterator, getAttributes, getDataContainerRecursive, getDataContainerRecursive, getDefaultValue, getDescription, getElementFromDefinition, getLegalValues, getLocalAttribute, getMBeanInfo, getMBeanInfo, getParent, getPreservedFields, getSettingsHandler, getUncheckedAttribute, getValue, globalSettings, invoke, isInitialized, isOverridden, iterator, removeElementFromDefinition, setAsOrder, setAttribute, setAttribute, setAttributes, setDescription, setPreservedFields, toString, unsetAttribute |
Methods inherited from class org.archive.crawler.settings.Type |
---|
addConstraint, equals, getConstraints, getLegalValueType, isExpertSetting, isOverrideable, isTransient, setExpertSetting, setLegalValueType, setOverrideable, setTransient |
Methods inherited from class javax.management.Attribute |
---|
getName, hashCode |
Methods inherited from class java.lang.Object |
---|
clone, finalize, getClass, notify, notifyAll, wait, wait, wait |
Field Detail |
---|
static final java.lang.String JAVASCRIPT_STRING_EXTRACTOR
protected long numberOfCURIsHandled
protected static long numberOfLinksExtracted
protected static final java.lang.String[] EXTRACTOR_URI_EXCEPTIONS
Constructor Detail |
---|
public ExtractorJS(java.lang.String name)
name
- Method Detail |
---|
public void extract(org.archive.crawler.datamodel.CrawlURI curi)
extract
in class org.archive.crawler.extractor.Extractor
public static long considerStrings(org.archive.crawler.datamodel.CrawlURI curi, java.lang.CharSequence cs, org.archive.crawler.framework.CrawlController controller, boolean handlingJSFile)
public java.lang.String report()
report
in class org.archive.crawler.framework.Processor
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |