public class NasWARCProcessor extends org.archive.modules.writer.WARCWriterProcessor
Modifier and Type | Field and Description |
---|---|
protected Map<String,String> |
metadataMap
metadata items.
|
ANNOTATION_UNWRITTEN, compress, directory, frequentFlushes, maxFileSizeBytes, maxTotalBytesToWrite, maxWaitForIdleMs, poolMaxActive, prefix, serverCache, skipIdenticalDigests, startNewFilesOnCheckpoint, storePaths, template, writeBufferSize
Constructor and Description |
---|
NasWARCProcessor() |
Modifier and Type | Method and Description |
---|---|
Map<String,String> |
getFormItems() |
List<String> |
getMetadata() |
boolean |
getWriteMetadataOutlinks() |
void |
setMetadataItems(Map<String,String> metadataItems) |
void |
setWriteMetadataOutlinks(boolean writeMetadataOutlinks) |
protected URI |
writeMetadata(org.archive.io.warc.WARCWriter w,
String timestamp,
URI baseid,
org.archive.modules.CrawlURI curi,
org.archive.util.anvl.ANVLRecord namedFields)
modify default writeMetadata method to handle the write of outlinks
in metadata or not
|
addIfNotBlank, addStats, copyStats, fromCheckpointJson, getDefaultMaxFileSize, getDefaultStorePaths, getRecordID, getRecordIDGenerator, getStats, getWriteMetadata, getWriteRequests, innerProcessResult, qualifyRecordID, report, saveHeader, setRecordIDGenerator, setupPool, setWriteMetadata, setWriteRequests, setWriteRevisitForIdenticalDigests, setWriteRevisitForNotModified, toCheckpointJson, updateMetadataAfterWrite, write, writeDnsRecords, writeFtpControlConversation, writeFtpRecords, writeHttpRecords, writeRequest, writeResource, writeResponse, writeRevisit, writeRevisit, writeWhoisRecords
calcOutputDirs, checkBytesWritten, copyForwardWriteTagIfDupe, doCheckpoint, getCompress, getDirectory, getFrequentFlushes, getHostAddress, getMaxFileSizeBytes, getMaxTotalBytesToWrite, getMaxWaitForIdleMs, getMetadataProvider, getPool, getPoolMaxActive, getPrefix, getSerialNo, getServerCache, getSkipIdenticalDigests, getStartNewFilesOnCheckpoint, getStorePaths, getTemplate, getTotalBytesWritten, getWriteBufferSize, innerProcess, innerRejectProcess, setCompress, setDirectory, setFrequentFlushes, setMaxFileSizeBytes, setMaxTotalBytesToWrite, setMaxWaitForIdleMs, setMetadataProvider, setPool, setPoolMaxActive, setPrefix, setServerCache, setSkipIdenticalDigests, setStartNewFilesOnCheckpoint, setStorePaths, setTemplate, setTotalBytesWritten, setWriteBufferSize, shouldProcess, shouldWrite, start, stop
finishCheckpoint, flattenVia, getBeanName, getEnabled, getKeyedProperties, getRecordedSize, getShouldProcessRule, getURICount, hasHttpAuthenticationCredential, isRunning, isSuccess, process, setBeanName, setEnabled, setRecoveryCheckpoint, setShouldProcessRule, startCheckpoint
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
protected Map<String,String> metadataMap
public NasWARCProcessor()
public boolean getWriteMetadataOutlinks()
public void setWriteMetadataOutlinks(boolean writeMetadataOutlinks)
public Map<String,String> getFormItems()
public void setMetadataItems(Map<String,String> metadataItems)
public List<String> getMetadata()
getMetadata
in interface org.archive.io.WriterPoolSettings
getMetadata
in class org.archive.modules.writer.WARCWriterProcessor
protected URI writeMetadata(org.archive.io.warc.WARCWriter w, String timestamp, URI baseid, org.archive.modules.CrawlURI curi, org.archive.util.anvl.ANVLRecord namedFields) throws IOException
writeMetadata
in class org.archive.modules.writer.WARCWriterProcessor
IOException
Copyright © 2005–2016 The Royal Danish Library, the Danish State and University Library, the National Library of France and the Austrian National Library.. All rights reserved.