001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.distribute.arcrepository;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileOutputStream;
028import java.io.FilterInputStream;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.OutputStream;
032import java.io.Serializable;
033
034import org.archive.io.ArchiveReader;
035import org.archive.io.ArchiveReaderFactory;
036import org.archive.io.ArchiveRecord;
037import org.archive.io.arc.ARCRecord;
038import org.archive.io.warc.WARCRecord;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import dk.netarkivet.common.CommonSettings;
043import dk.netarkivet.common.distribute.RemoteFile;
044import dk.netarkivet.common.distribute.RemoteFileFactory;
045import dk.netarkivet.common.exceptions.ArgumentNotValid;
046import dk.netarkivet.common.exceptions.IOFailure;
047import dk.netarkivet.common.exceptions.IllegalState;
048import dk.netarkivet.common.utils.FileUtils;
049import dk.netarkivet.common.utils.Settings;
050import dk.netarkivet.common.utils.arc.ARCUtils;
051import dk.netarkivet.common.utils.warc.WARCUtils;
052
053/**
054 * Class to hold the result of a lookup operation in the bitarchive: The metadata information associated with the record
055 * The actual byte content The name of the file the data were retrieved from If length of record exceeds value of
056 * Settings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE The record is stored in a RemoteFile.
057 */
058@SuppressWarnings({"serial"})
059public class BitarchiveRecord implements Serializable {
060
061    /** the log. */
062    private static final transient Logger log = LoggerFactory.getLogger(BitarchiveRecord.class);
063
064    /** The file the data were retrieved from. */
065    private String fileName;
066
067    /** The actual data. */
068    private byte[] objectBuffer;
069
070    /** The offset of the ArchiveRecord contained. */
071    private long offset;
072
073    /** The length of the ArchiveRecord contained. */
074    private long length;
075
076    /** The actual data as a remote file. */
077    private RemoteFile objectAsRemoteFile;
078
079    /** Is the data stored in a RemoteFile. */
080    private boolean isStoredAsRemoteFile = false;
081
082    /** Set after deleting RemoteFile. */
083    private boolean hasRemoteFileBeenDeleted = false;
084
085    /** How large the ARCRecord can before saving as RemoteFile. */
086    private final long LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER = Settings
087            .getLong(CommonSettings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE);
088
089    /**
090     * Creates a BitarchiveRecord from the a ArchiveRecord, which can be either a ARCRecord or WARCRecord. Note that
091     * record metadata is not included with the BitarchiveRecord, only the payload of the record.
092     * <p>
093     * If the length of the record is higher than Settings .BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE the data is
094     * stored in a RemoteFile, otherwise the data is stored in a byte array.
095     *
096     * @param record the ArchiveRecord that the data should come from. We do not close the ArchiveRecord.
097     * @param filename The filename of the ArchiveFile
098     */
099    public BitarchiveRecord(ArchiveRecord record, String filename) {
100        ArgumentNotValid.checkNotNull(record, "ArchiveRecord record");
101        ArgumentNotValid.checkNotNull(filename, "String filename");
102        this.fileName = filename;
103        this.offset = record.getHeader().getOffset();
104        if (record instanceof ARCRecord) {
105            length = record.getHeader().getLength();
106        } else if (record instanceof WARCRecord) {
107            // The length of the payload of the warc-record is not getLength(),
108            // but getLength minus getContentBegin(), which is the number of
109            // bytes used for the record-header!
110            length = record.getHeader().getLength() - record.getHeader().getContentBegin();
111        } else {
112            throw new ArgumentNotValid("Unknown type of ArchiveRecord");
113        }
114        if (length > LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER) {
115            // copy arc-data to local file and create a RemoteFile based on this
116            log.info("Record exceeds limit of {} bytes. Length is {} bytes, Storing as instance of {}",
117                    LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER, length, Settings.get(CommonSettings.REMOTE_FILE_CLASS));
118            if (RemoteFileFactory.isExtendedRemoteFile()) {
119                objectAsRemoteFile = RemoteFileFactory.getExtendedInstance(record);
120                isStoredAsRemoteFile = true;
121            } else {
122                File localTmpFile = null;
123                try {
124                    localTmpFile = File.createTempFile("BitarchiveRecord-" + fileName, ".tmp", FileUtils.getTempDir());
125                    record.dump(new FileOutputStream(localTmpFile));
126                    objectAsRemoteFile = RemoteFileFactory.getMovefileInstance(localTmpFile);
127                    isStoredAsRemoteFile = true;
128                } catch (IOException e) {
129                    throw new IOFailure("Unable to store record(" + fileName + "," + offset + ") as remotefile", e);
130                }
131            }
132        } else { // Store data in objectbuffer
133            try {
134                if (record instanceof ARCRecord) {
135                    objectBuffer = ARCUtils.readARCRecord((ARCRecord) record);
136                } else if (record instanceof WARCRecord) {
137                    objectBuffer = WARCUtils.readWARCRecord((WARCRecord) record);
138                }
139                log.debug("Bytes stored in objectBuffer: {}", objectBuffer.length);
140            } catch (IOException e) {
141                throw new ExceptionInInitializerError(e);
142            }
143        }
144    }
145
146    /**
147     * Returns the file that this information was loaded from.
148     *
149     * @return the file that this ARC record comes from.
150     */
151    public String getFile() {
152        return fileName;
153    }
154
155    /**
156     * Returns the length of the ARCRecord contained.
157     *
158     * @return the length of the ARCRecord contained
159     */
160    public long getLength() {
161        return length;
162    }
163
164    /**
165     * Retrieve the data in the record. If data is in RemoteFile, this operation deletes the RemoteFile.
166     *
167     * @return the data from the ARCRecord as an InputStream.
168     * @throws IllegalState if remotefile already deleted
169     */
170    public InputStream getData() {
171        InputStream result = null;
172        if (isStoredAsRemoteFile) {
173            if (hasRemoteFileBeenDeleted) {
174                throw new IllegalState("RemoteFile has already been deleted");
175            }
176            log.info("Reading {} bytes from RemoteFile", length);
177            InputStream rfInputStream = objectAsRemoteFile.getInputStream();
178            result = new FilterInputStream(rfInputStream) {
179                public void close() throws IOException {
180                    super.close();
181                    objectAsRemoteFile.cleanup();
182                    hasRemoteFileBeenDeleted = true;
183                }
184            };
185        } else {
186            log.debug("Reading {} bytes from objectBuffer", length);
187            result = new ByteArrayInputStream(objectBuffer);
188        }
189        return result;
190    }
191
192    /**
193     * Deliver the data in the record to a given OutputStream. If data is in RemoteFile, this operation deletes the
194     * RemoteFile
195     *
196     * @param out deliver the data to this outputstream
197     * @throws IOFailure if any IOException occurs reading or writing the data
198     * @throws IllegalState if remotefile already deleted
199     */
200    public void getData(OutputStream out) {
201        ArgumentNotValid.checkNotNull(out, "OutputStream out");
202        if (isStoredAsRemoteFile) {
203            if (hasRemoteFileBeenDeleted) {
204                throw new IllegalState("RemoteFile has already been deleted");
205            }
206            try {
207                log.debug("Reading {} bytes from RemoteFile", length);
208                objectAsRemoteFile.appendTo(out);
209            } finally {
210                log.trace("Deleting the RemoteFile '{}'.", objectAsRemoteFile.getName());
211                objectAsRemoteFile.cleanup();
212                hasRemoteFileBeenDeleted = true;
213            }
214        } else {
215            try {
216                log.debug("Reading {} bytes from objectBuffer", length);
217                out.write(objectBuffer, 0, objectBuffer.length);
218            } catch (IOException e) {
219                throw new IOFailure("Unable to write data from " + "objectBuffer to the outputstream", e);
220            }
221        }
222    }
223
224
225    public static BitarchiveRecord getBitarchiveRecord(String filename, File f, long index) {
226        try (ArchiveReader reader = ArchiveReaderFactory.get(f, index); ArchiveRecord record = reader.get();){
227            return new BitarchiveRecord(record, filename);
228        } catch (IOException e) {
229            throw new IOFailure("Error reading record from '" + filename + "' in file '"+f.getAbsolutePath()+"' offset " + index, e);
230        }
231    }
232
233}