001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.harvesting.metadata;
025
026import java.io.ByteArrayInputStream;
027import java.io.File;
028import java.io.FileInputStream;
029import java.io.FileNotFoundException;
030import java.io.IOException;
031import java.io.InputStream;
032import java.util.Date;
033
034import org.apache.commons.io.IOUtils;
035import org.jwat.arc.ArcFileNaming;
036import org.jwat.arc.ArcFileNamingSingleFile;
037import org.jwat.arc.ArcFileWriter;
038import org.jwat.arc.ArcFileWriterConfig;
039import org.jwat.arc.ArcHeader;
040import org.jwat.arc.ArcRecord;
041import org.jwat.arc.ArcRecordBase;
042import org.jwat.arc.ArcVersion;
043import org.jwat.arc.ArcVersionBlock;
044import org.jwat.arc.ArcVersionHeader;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048import dk.netarkivet.common.exceptions.IOFailure;
049import dk.netarkivet.common.utils.SystemUtils;
050
051/**
052 * MetadataFileWriter that writes to ARC files.
053 */
054public class MetadataFileWriterArc extends MetadataFileWriter {
055
056    private static final Logger log = LoggerFactory.getLogger(MetadataFileWriterArc.class);
057
058    /** Writer to this jobs metadatafile. This is closed when the metadata is marked as ready. */
059    private ArcFileWriter writer = null;
060
061    /**
062     * Create a <code>MetadataFileWriter</code> for ARC output.
063     *
064     * @param metadataARCFile The metadata ARC <code>File</code>
065     * @return <code>MetadataFileWriter</code> for writing metadata files in ARC
066     */
067    public static MetadataFileWriter createWriter(File metadataARCFile) {
068        MetadataFileWriterArc mtfw = new MetadataFileWriterArc();
069        ArcFileNaming naming = new ArcFileNamingSingleFile(metadataARCFile);
070        ArcFileWriterConfig config = new ArcFileWriterConfig(metadataARCFile.getParentFile(), compressRecords(), Long.MAX_VALUE, true);
071        mtfw.writer = ArcFileWriter.getArcWriterInstance(naming, config);
072        mtfw.open();
073        return mtfw;
074    }
075
076    protected void open() {
077        ArcVersionHeader versionHeader;
078        ArcRecordBase record;
079        byte[] versionHeaderBytes;
080        try {
081            writer.open();
082            versionHeader = ArcVersionHeader.create(ArcVersion.VERSION_1, "InternetArchive");
083            versionHeader.rebuild();
084            versionHeaderBytes = versionHeader.getHeader();
085            record = ArcVersionBlock.createRecord(writer.writer);
086            record.header.recordFieldVersion = 1;
087            record.header.urlStr = "filedesc://" + writer.getFile().getName();
088            record.header.ipAddressStr = "0.0.0.0";
089            record.header.archiveDate = new Date();
090            record.header.contentTypeStr = "text/plain";
091            record.header.archiveLength = new Long(versionHeaderBytes.length);
092            writer.writer.writeHeader(record);
093            writer.writer.writePayload(versionHeaderBytes);
094            writer.writer.closeRecord();
095        } catch (IOException e) {
096            throw new IOFailure("Error opening MetadataFileWriterArc", e);
097        }
098    }
099
100    @Override
101    public void close() {
102        if (writer != null) {
103            try {
104                writer.close();
105            } catch (IOException e) {
106                throw new IOFailure("Error closing MetadataFileWriterArc", e);
107            }
108            writer = null;
109        }
110    }
111
112    @Override
113    public File getFile() {
114        return writer.getFile();
115    }
116
117    @Override
118    public void writeFileTo(File file, String uri, String mime) {
119        writeTo(file, uri, mime);
120    }
121
122    /**
123     * Writes a File to an ArcWriter, if available, otherwise logs the failure to the class-logger.
124     *
125     * @param fileToArchive the File to archive
126     * @param URL the URL with which it is stored in the arcfile
127     * @param mimetype The mimetype of the File-contents
128     * @return true, if file exists, and is written to the arcfile.
129     */
130    @Override
131    public boolean writeTo(File fileToArchive, String URL, String mimetype) {
132        if (!fileToArchive.isFile()) {
133            throw new IOFailure("Not a file: " + fileToArchive.getPath());
134        }
135        log.info("Writing file '{}' to ARC file: {}", fileToArchive, fileToArchive.length());
136        InputStream in = null;
137        try {
138            ArcRecordBase record = ArcRecord.createRecord(writer.writer);
139            ArcHeader header = record.header;
140            header.urlStr = URL;
141            header.archiveDate = new Date(fileToArchive.lastModified());
142            header.ipAddressStr = SystemUtils.getLocalIP();
143            header.contentTypeStr = mimetype;
144            header.archiveLength = fileToArchive.length();
145            in = new FileInputStream(fileToArchive);
146            writer.writer.writeHeader(record);
147            writer.writer.streamPayload(in);
148            writer.writer.closeRecord();
149        } catch (FileNotFoundException e) {
150            throw new IOFailure("Unable to open file: " + fileToArchive.getPath(), e);
151        } catch (IOException e) {
152            throw new IOFailure("Epic IO fail while writing to ARC file: " + fileToArchive.getPath(), e);
153        } finally {
154            IOUtils.closeQuietly(in);
155        }
156        return true;
157    }
158
159    /* Copied from the ArcWriter. (Before change to JWAT) */
160    @Override
161    public void write(String uri, String contentType, String hostIP, long fetchBeginTimeStamp, byte[] payload)
162            throws IOException {
163        ByteArrayInputStream in = null;
164        try {
165            ArcRecordBase record = ArcRecord.createRecord(writer.writer);
166            ArcHeader header = record.header;
167            header.urlStr = uri;
168            header.archiveDate = new Date(fetchBeginTimeStamp);
169            header.ipAddressStr = hostIP;
170            header.archiveLength = new Long(payload.length);
171            header.contentTypeStr = contentType;
172            in = new ByteArrayInputStream(payload);
173            writer.writer.writeHeader(record);
174            writer.writer.streamPayload(in);
175            writer.writer.closeRecord();
176        } catch (IOException e) {
177            throw new IOFailure("Epic IO fail while writing payload to ARC file.", e);
178        } finally {
179            IOUtils.closeQuietly(in);
180        }
181    }
182
183}