Class ExtendedDNSFetcher
- java.lang.Object
-
- org.archive.modules.Processor
-
- dk.netarkivet.harvester.harvesting.ExtendedDNSFetcher
-
- All Implemented Interfaces:
org.archive.checkpointing.Checkpointable
,org.archive.spring.HasKeyedProperties
,org.springframework.beans.factory.Aware
,org.springframework.beans.factory.BeanNameAware
,org.springframework.context.Lifecycle
public class ExtendedDNSFetcher extends org.archive.modules.Processor
Processor to resolve 'dns:' URIs. Based on version of FetchDNS taken from https://github.com/internetarchive/heritrix3/commit/aee83dfe26ea5a36a4eb3092380e1b0d7b242aab Makes it possible to avoid lookup of bad hostnames, e.g. 'components' or 'www' without any valid domain-information- Author:
- multiple
sample usage:
bean id="fetchDns" class="dk.netarkivet.harvester.harvesting.ExtendedDNSFetcher">
-
-
Field Summary
Fields Modifier and Type Field Description protected String
digestAlgorithm
Which algorithm (for example MD5 or SHA-1) to use to perform an on-the-fly digest hash of retrieved content-bodies.protected org.archive.modules.net.ServerCache
serverCache
Used to do DNS lookups.protected InetAddress
serverInetAddr
-
Constructor Summary
Constructors Constructor Description ExtendedDNSFetcher()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description boolean
getAcceptNonDnsResolves()
String
getDigestAlgorithm()
boolean
getDigestContent()
boolean
getDisableJavaDnsResolves()
protected byte[]
getDNSRecord(long fetchStart, org.xbill.DNS.Record[] rrecordSet)
protected org.xbill.DNS.ARecord
getFirstARecord(org.xbill.DNS.Record[] rrecordSet)
boolean
getPrevalidateHostname()
org.archive.modules.net.ServerCache
getServerCache()
protected void
innerProcess(org.archive.modules.CrawlURI curi)
protected boolean
isQuadAddress(org.archive.modules.CrawlURI curi, String dnsName, org.archive.modules.net.CrawlHost targetHost)
protected void
recordDNS(org.archive.modules.CrawlURI curi, org.xbill.DNS.Record[] rrecordSet)
void
setAcceptNonDnsResolves(boolean acceptNonDnsResolves)
void
setDigestAlgorithm(String digestAlgorithm)
void
setDigestContent(boolean digest)
void
setDisableJavaDnsResolves(boolean disableJavaDnsResolves)
void
setPrevalidateHostname(boolean prevalidateHostname)
void
setServerCache(org.archive.modules.net.ServerCache serverCache)
protected void
setUnresolvable(org.archive.modules.CrawlURI curi, org.archive.modules.net.CrawlHost host)
protected boolean
shouldProcess(org.archive.modules.CrawlURI curi)
protected void
storeDNSRecord(org.archive.modules.CrawlURI curi, String dnsName, org.archive.modules.net.CrawlHost targetHost, org.xbill.DNS.Record[] rrecordSet)
-
Methods inherited from class org.archive.modules.Processor
doCheckpoint, finishCheckpoint, flattenVia, fromCheckpointJson, getBeanName, getEnabled, getKeyedProperties, getRecordedSize, getShouldProcessRule, getURICount, hasHttpAuthenticationCredential, innerProcessResult, innerRejectProcess, isRunning, isSuccess, process, report, setBeanName, setEnabled, setRecoveryCheckpoint, setShouldProcessRule, start, startCheckpoint, stop, toCheckpointJson
-
-
-
-
Field Detail
-
serverInetAddr
protected InetAddress serverInetAddr
-
serverCache
protected org.archive.modules.net.ServerCache serverCache
Used to do DNS lookups.
-
digestAlgorithm
protected String digestAlgorithm
Which algorithm (for example MD5 or SHA-1) to use to perform an on-the-fly digest hash of retrieved content-bodies. The default is 'sha1'
-
-
Method Detail
-
getAcceptNonDnsResolves
public boolean getAcceptNonDnsResolves()
-
setAcceptNonDnsResolves
public void setAcceptNonDnsResolves(boolean acceptNonDnsResolves)
-
getDisableJavaDnsResolves
public boolean getDisableJavaDnsResolves()
-
setDisableJavaDnsResolves
public void setDisableJavaDnsResolves(boolean disableJavaDnsResolves)
-
getServerCache
public org.archive.modules.net.ServerCache getServerCache()
-
setServerCache
@Autowired public void setServerCache(org.archive.modules.net.ServerCache serverCache)
-
getDigestContent
public boolean getDigestContent()
-
setDigestContent
public void setDigestContent(boolean digest)
-
getDigestAlgorithm
public String getDigestAlgorithm()
-
setDigestAlgorithm
public void setDigestAlgorithm(String digestAlgorithm)
-
getPrevalidateHostname
public boolean getPrevalidateHostname()
-
setPrevalidateHostname
public void setPrevalidateHostname(boolean prevalidateHostname)
-
shouldProcess
protected boolean shouldProcess(org.archive.modules.CrawlURI curi)
- Specified by:
shouldProcess
in classorg.archive.modules.Processor
-
innerProcess
protected void innerProcess(org.archive.modules.CrawlURI curi)
- Specified by:
innerProcess
in classorg.archive.modules.Processor
-
storeDNSRecord
protected void storeDNSRecord(org.archive.modules.CrawlURI curi, String dnsName, org.archive.modules.net.CrawlHost targetHost, org.xbill.DNS.Record[] rrecordSet)
-
isQuadAddress
protected boolean isQuadAddress(org.archive.modules.CrawlURI curi, String dnsName, org.archive.modules.net.CrawlHost targetHost)
-
recordDNS
protected void recordDNS(org.archive.modules.CrawlURI curi, org.xbill.DNS.Record[] rrecordSet) throws IOException
- Throws:
IOException
-
getDNSRecord
protected byte[] getDNSRecord(long fetchStart, org.xbill.DNS.Record[] rrecordSet) throws IOException
- Throws:
IOException
-
setUnresolvable
protected void setUnresolvable(org.archive.modules.CrawlURI curi, org.archive.modules.net.CrawlHost host)
-
getFirstARecord
protected org.xbill.DNS.ARecord getFirstARecord(org.xbill.DNS.Record[] rrecordSet)
-
-