001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.harvesting.metadata; 025 026import java.io.ByteArrayInputStream; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.FileNotFoundException; 030import java.io.IOException; 031import java.io.InputStream; 032import java.util.Date; 033 034import org.apache.commons.io.IOUtils; 035import org.jwat.arc.ArcFileNaming; 036import org.jwat.arc.ArcFileNamingSingleFile; 037import org.jwat.arc.ArcFileWriter; 038import org.jwat.arc.ArcFileWriterConfig; 039import org.jwat.arc.ArcHeader; 040import org.jwat.arc.ArcRecord; 041import org.jwat.arc.ArcRecordBase; 042import org.jwat.arc.ArcVersion; 043import org.jwat.arc.ArcVersionBlock; 044import org.jwat.arc.ArcVersionHeader; 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048import dk.netarkivet.common.exceptions.IOFailure; 049import dk.netarkivet.common.utils.SystemUtils; 050 051/** 052 * MetadataFileWriter that writes to ARC files. 053 */ 054public class MetadataFileWriterArc extends MetadataFileWriter { 055 056 private static final Logger log = LoggerFactory.getLogger(MetadataFileWriterArc.class); 057 058 /** Writer to this jobs metadatafile. This is closed when the metadata is marked as ready. */ 059 private ArcFileWriter writer = null; 060 061 /** 062 * Create a <code>MetadataFileWriter</code> for ARC output. 063 * 064 * @param metadataARCFile The metadata ARC <code>File</code> 065 * @return <code>MetadataFileWriter</code> for writing metadata files in ARC 066 */ 067 public static MetadataFileWriter createWriter(File metadataARCFile) { 068 MetadataFileWriterArc mtfw = new MetadataFileWriterArc(); 069 ArcFileNaming naming = new ArcFileNamingSingleFile(metadataARCFile); 070 ArcFileWriterConfig config = new ArcFileWriterConfig(metadataARCFile.getParentFile(), compressRecords(), Long.MAX_VALUE, true); 071 mtfw.writer = ArcFileWriter.getArcWriterInstance(naming, config); 072 mtfw.open(); 073 return mtfw; 074 } 075 076 protected void open() { 077 ArcVersionHeader versionHeader; 078 ArcRecordBase record; 079 byte[] versionHeaderBytes; 080 try { 081 writer.open(); 082 versionHeader = ArcVersionHeader.create(ArcVersion.VERSION_1, "InternetArchive"); 083 versionHeader.rebuild(); 084 versionHeaderBytes = versionHeader.getHeader(); 085 record = ArcVersionBlock.createRecord(writer.writer); 086 record.header.recordFieldVersion = 1; 087 record.header.urlStr = "filedesc://" + writer.getFile().getName(); 088 record.header.ipAddressStr = "0.0.0.0"; 089 record.header.archiveDate = new Date(); 090 record.header.contentTypeStr = "text/plain"; 091 record.header.archiveLength = new Long(versionHeaderBytes.length); 092 writer.writer.writeHeader(record); 093 writer.writer.writePayload(versionHeaderBytes); 094 writer.writer.closeRecord(); 095 } catch (IOException e) { 096 throw new IOFailure("Error opening MetadataFileWriterArc", e); 097 } 098 } 099 100 @Override 101 public void close() { 102 if (writer != null) { 103 try { 104 writer.close(); 105 } catch (IOException e) { 106 throw new IOFailure("Error closing MetadataFileWriterArc", e); 107 } 108 writer = null; 109 } 110 } 111 112 @Override 113 public File getFile() { 114 return writer.getFile(); 115 } 116 117 @Override 118 public void writeFileTo(File file, String uri, String mime) { 119 writeTo(file, uri, mime); 120 } 121 122 /** 123 * Writes a File to an ArcWriter, if available, otherwise logs the failure to the class-logger. 124 * 125 * @param fileToArchive the File to archive 126 * @param URL the URL with which it is stored in the arcfile 127 * @param mimetype The mimetype of the File-contents 128 * @return true, if file exists, and is written to the arcfile. 129 */ 130 @Override 131 public boolean writeTo(File fileToArchive, String URL, String mimetype) { 132 if (!fileToArchive.isFile()) { 133 throw new IOFailure("Not a file: " + fileToArchive.getPath()); 134 } 135 log.info("Writing file '{}' to ARC file: {}", fileToArchive, fileToArchive.length()); 136 InputStream in = null; 137 try { 138 ArcRecordBase record = ArcRecord.createRecord(writer.writer); 139 ArcHeader header = record.header; 140 header.urlStr = URL; 141 header.archiveDate = new Date(fileToArchive.lastModified()); 142 header.ipAddressStr = SystemUtils.getLocalIP(); 143 header.contentTypeStr = mimetype; 144 header.archiveLength = fileToArchive.length(); 145 in = new FileInputStream(fileToArchive); 146 writer.writer.writeHeader(record); 147 writer.writer.streamPayload(in); 148 writer.writer.closeRecord(); 149 } catch (FileNotFoundException e) { 150 throw new IOFailure("Unable to open file: " + fileToArchive.getPath(), e); 151 } catch (IOException e) { 152 throw new IOFailure("Epic IO fail while writing to ARC file: " + fileToArchive.getPath(), e); 153 } finally { 154 IOUtils.closeQuietly(in); 155 } 156 return true; 157 } 158 159 /* Copied from the ArcWriter. (Before change to JWAT) */ 160 @Override 161 public void write(String uri, String contentType, String hostIP, long fetchBeginTimeStamp, byte[] payload) 162 throws IOException { 163 ByteArrayInputStream in = null; 164 try { 165 ArcRecordBase record = ArcRecord.createRecord(writer.writer); 166 ArcHeader header = record.header; 167 header.urlStr = uri; 168 header.archiveDate = new Date(fetchBeginTimeStamp); 169 header.ipAddressStr = hostIP; 170 header.archiveLength = new Long(payload.length); 171 header.contentTypeStr = contentType; 172 in = new ByteArrayInputStream(payload); 173 writer.writer.writeHeader(record); 174 writer.writer.streamPayload(in); 175 writer.writer.closeRecord(); 176 } catch (IOException e) { 177 throw new IOFailure("Epic IO fail while writing payload to ARC file.", e); 178 } finally { 179 IOUtils.closeQuietly(in); 180 } 181 } 182 183}