dk.netarkivet.common.utils.warc
Class WARCWriterNAS

java.lang.Object
  extended by org.archive.io.WriterPoolMember
      extended by org.archive.io.warc.WARCWriter
          extended by dk.netarkivet.common.utils.warc.WARCWriterNAS
All Implemented Interfaces:
org.archive.io.ArchiveFileConstants, org.archive.io.warc.WARCConstants

public class WARCWriterNAS
extends org.archive.io.warc.WARCWriter


Field Summary
 
Fields inherited from class org.archive.io.warc.WARCWriter
CRLF_BYTES
 
Fields inherited from class org.archive.io.WriterPoolMember
DEFAULT_PREFIX, DEFAULT_SUFFIX, HOSTNAME_ADMINPORT_VARIABLE, HOSTNAME_VARIABLE, UTF8
 
Fields inherited from interface org.archive.io.warc.WARCConstants
COLON_SPACE, COMPRESSED_WARC_FILE_EXTENSION, CONTENT_DESCRIPTION, CONTENT_LENGTH, CONTENT_TYPE, CONTINUATION, CONTINUATION_INDEX, CONVERSION, CONVERSION_INDEX, DEFAULT_ENCODING, DEFAULT_MAX_WARC_FILE_SIZE, DOT_COMPRESSED_FILE_EXTENSION, DOT_COMPRESSED_WARC_FILE_EXTENSION, DOT_WARC_FILE_EXTENSION, FTP_CONTROL_CONVERSATION_MIMETYPE, HEADER_FIELD_KEYS, HEADER_FIELD_SEPARATOR, HEADER_KEY_BLOCK_DIGEST, HEADER_KEY_CONCURRENT_TO, HEADER_KEY_DATE, HEADER_KEY_ETAG, HEADER_KEY_FILENAME, HEADER_KEY_ID, HEADER_KEY_IP, HEADER_KEY_LAST_MODIFIED, HEADER_KEY_PAYLOAD_DIGEST, HEADER_KEY_PROFILE, HEADER_KEY_TRUNCATED, HEADER_KEY_TYPE, HEADER_KEY_URI, HEADER_LINE_ENCODING, HTTP_REQUEST_MIMETYPE, HTTP_RESPONSE_MIMETYPE, MAX_LINE_LENGTH, MAX_WARC_HEADER_LINE_LENGTH, METADATA, METADATA_INDEX, NAMED_FIELD_CHECKSUM_LABEL, NAMED_FIELD_DESCRIPTION, NAMED_FIELD_FILEDESC, NAMED_FIELD_IP_LABEL, NAMED_FIELD_RELATED_LABEL, NAMED_FIELD_TRUNCATED, NAMED_FIELD_TRUNCATED_VALUE_HEAD, NAMED_FIELD_TRUNCATED_VALUE_LENGTH, NAMED_FIELD_TRUNCATED_VALUE_TIME, NAMED_FIELD_TRUNCATED_VALUE_UNSPECIFIED, NAMED_FIELD_WARCFILENAME, PLACEHOLDER_RECORD_LENGTH_STRING, PROFILE_REVISIT_IDENTICAL_DIGEST, PROFILE_REVISIT_NOT_MODIFIED, REQUEST, REQUEST_INDEX, RESOURCE, RESOURCE_INDEX, RESPONSE, RESPONSE_INDEX, REVISIT, REVISIT_INDEX, TRUNCATED_VALUE_UNSPECIFIED, TYPE, TYPES, TYPES_LIST, WARC_010_ID, WARC_010_MAGIC, WARC_FILE_EXTENSION, WARC_HEADER_ENCODING, WARC_ID, WARC_MAGIC, WARC_VERSION, WARCINFO, WARCINFO_INDEX, WSP
 
Fields inherited from interface org.archive.io.ArchiveFileConstants
ABSOLUTE_OFFSET_KEY, CDX, CDX_FILE, CDX_LINE_BUFFER_SIZE, COMPRESSED_FILE_EXTENSION, CRLF, DATE_FIELD_KEY, DEFAULT_DIGEST_METHOD, DUMP, GZIP_DUMP, HEADER, INVALID_SUFFIX, LENGTH_FIELD_KEY, MIMETYPE_FIELD_KEY, NOHEAD, OCCUPIED_SUFFIX, READER_IDENTIFIER_FIELD_KEY, RECORD_IDENTIFIER_FIELD_KEY, SINGLE_SPACE, TYPE_FIELD_KEY, URL_FIELD_KEY, VERSION_FIELD_KEY
 
Constructor Summary
WARCWriterNAS(java.util.concurrent.atomic.AtomicInteger serialNo, java.util.List<java.io.File> dirs, java.lang.String prefix, java.lang.String suffix, boolean cmprs, long maxSize, java.util.List<java.lang.String> warcinfoData)
          Constructor.
WARCWriterNAS(java.util.concurrent.atomic.AtomicInteger serialNo, java.io.OutputStream out, java.io.File f, boolean cmprs, java.lang.String a14DigitDate, java.util.List<java.lang.String> warcinfoData)
          Constructor.
 
Method Summary
protected  void writeRecord(java.lang.String type, java.lang.String url, java.lang.String create14DigitDate, java.lang.String mimetype, java.net.URI recordId, org.archive.util.anvl.ANVLRecord xtraHeaders, java.io.InputStream contentStream, long contentLength, boolean enforceLength)
           
 
Methods inherited from class org.archive.io.warc.WARCWriter
baseCharacterCheck, checkHeaderLineMimetypeParameter, checkHeaderValue, createFile, createRecordHeader, generateRecordId, generateRecordId, getRecordID, writeMetadataRecord, writeRecord, writeRequestRecord, writeResourceRecord, writeResourceRecord, writeResponseRecord, writeRevisitRecord, writeWarcinfoRecord, writeWarcinfoRecord, writeWarcinfoRecord, writeWarcinfoRecord
 
Methods inherited from class org.archive.io.WriterPoolMember
checkSize, checkWriteable, close, copyFrom, createFile, flush, getBaseFilename, getCreateTimestamp, getFile, getNextDirectory, getOutputStream, getPosition, getTimestampSerialNo, getTimestampSerialNo, isCompressed, postWriteRecordTasks, preWriteRecordTasks, readFullyFrom, readToLimitFrom, write, write, write
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

WARCWriterNAS

public WARCWriterNAS(java.util.concurrent.atomic.AtomicInteger serialNo,
                     java.io.OutputStream out,
                     java.io.File f,
                     boolean cmprs,
                     java.lang.String a14DigitDate,
                     java.util.List<java.lang.String> warcinfoData)
              throws java.io.IOException
Constructor. Takes a stream. Use with caution. There is no upperbound check on size. Will just keep writing. Only pass Streams that are bounded.

Parameters:
serialNo - used to generate unique file name sequences
out - Where to write.
f - File the out is connected to.
cmprs - Compress the content written.
a14DigitDate - If null, we'll write current time.
Throws:
java.io.IOException

WARCWriterNAS

public WARCWriterNAS(java.util.concurrent.atomic.AtomicInteger serialNo,
                     java.util.List<java.io.File> dirs,
                     java.lang.String prefix,
                     java.lang.String suffix,
                     boolean cmprs,
                     long maxSize,
                     java.util.List<java.lang.String> warcinfoData)
Constructor.

Parameters:
dirs - Where to drop files.
prefix - File prefix to use.
cmprs - Compress the records written.
maxSize - Maximum size for ARC files written.
suffix - File tail to use. If null, unused.
warcinfoData - File metadata for warcinfo record.
Method Detail

writeRecord

protected void writeRecord(java.lang.String type,
                           java.lang.String url,
                           java.lang.String create14DigitDate,
                           java.lang.String mimetype,
                           java.net.URI recordId,
                           org.archive.util.anvl.ANVLRecord xtraHeaders,
                           java.io.InputStream contentStream,
                           long contentLength,
                           boolean enforceLength)
                    throws java.io.IOException
Overrides:
writeRecord in class org.archive.io.warc.WARCWriter
Throws:
java.io.IOException