001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.distribute.arcrepository;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileOutputStream;
028import java.io.FilterInputStream;
029import java.io.IOException;
030import java.io.InputStream;
031import java.io.OutputStream;
032import java.io.Serializable;
033
034import org.archive.io.ArchiveRecord;
035import org.archive.io.arc.ARCRecord;
036import org.archive.io.warc.WARCRecord;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import dk.netarkivet.common.CommonSettings;
041import dk.netarkivet.common.distribute.RemoteFile;
042import dk.netarkivet.common.distribute.RemoteFileFactory;
043import dk.netarkivet.common.exceptions.ArgumentNotValid;
044import dk.netarkivet.common.exceptions.IOFailure;
045import dk.netarkivet.common.exceptions.IllegalState;
046import dk.netarkivet.common.utils.FileUtils;
047import dk.netarkivet.common.utils.Settings;
048import dk.netarkivet.common.utils.arc.ARCUtils;
049import dk.netarkivet.common.utils.warc.WARCUtils;
050
051/**
052 * Class to hold the result of a lookup operation in the bitarchive: The metadata information associated with the record
053 * The actual byte content The name of the file the data were retrieved from If length of record exceeds value of
054 * Settings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE The record is stored in a RemoteFile.
055 */
056@SuppressWarnings({"serial"})
057public class BitarchiveRecord implements Serializable {
058
059    /** the log. */
060    private static final transient Logger log = LoggerFactory.getLogger(BitarchiveRecord.class);
061
062    /** The file the data were retrieved from. */
063    private String fileName;
064
065    /** The actual data. */
066    private byte[] objectBuffer;
067
068    /** The offset of the ArchiveRecord contained. */
069    private long offset;
070
071    /** The length of the ArchiveRecord contained. */
072    private long length;
073
074    /** The actual data as a remote file. */
075    private RemoteFile objectAsRemoteFile;
076
077    /** Is the data stored in a RemoteFile. */
078    private boolean isStoredAsRemoteFile = false;
079
080    /** Set after deleting RemoteFile. */
081    private boolean hasRemoteFileBeenDeleted = false;
082
083    /** How large the ARCRecord can before saving as RemoteFile. */
084    private final long LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER = Settings
085            .getLong(CommonSettings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE);
086
087    /**
088     * Creates a BitarchiveRecord from the a ArchiveRecord, which can be either a ARCRecord or WARCRecord. Note that
089     * record metadata is not included with the BitarchiveRecord, only the payload of the record.
090     * <p>
091     * If the length of the record is higher than Settings .BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE the data is
092     * stored in a RemoteFile, otherwise the data is stored in a byte array.
093     *
094     * @param record the ArchiveRecord that the data should come from. We do not close the ArchiveRecord.
095     * @param filename The filename of the ArchiveFile
096     */
097    public BitarchiveRecord(ArchiveRecord record, String filename) {
098        ArgumentNotValid.checkNotNull(record, "ArchiveRecord record");
099        ArgumentNotValid.checkNotNull(filename, "String filename");
100        this.fileName = filename;
101        this.offset = record.getHeader().getOffset();
102        if (record instanceof ARCRecord) {
103            length = record.getHeader().getLength();
104        } else if (record instanceof WARCRecord) {
105            // The length of the payload of the warc-record is not getLength(),
106            // but getLength minus getContentBegin(), which is the number of
107            // bytes used for the record-header!
108            length = record.getHeader().getLength() - record.getHeader().getContentBegin();
109        } else {
110            throw new ArgumentNotValid("Unknown type of ArchiveRecord");
111        }
112        if (length > LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER) {
113            // copy arc-data to local file and create a RemoteFile based on this
114            log.info("Record exceeds limit of {} bytes. Length is {} bytes, Storing as instance of {}",
115                    LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER, length, Settings.get(CommonSettings.REMOTE_FILE_CLASS));
116            if (RemoteFileFactory.isExtendedRemoteFile()) {
117                objectAsRemoteFile = RemoteFileFactory.getExtendedInstance(record);
118                isStoredAsRemoteFile = true;
119            } else {
120                File localTmpFile = null;
121                try {
122                    localTmpFile = File.createTempFile("BitarchiveRecord-" + fileName, ".tmp", FileUtils.getTempDir());
123                    record.dump(new FileOutputStream(localTmpFile));
124                    objectAsRemoteFile = RemoteFileFactory.getMovefileInstance(localTmpFile);
125                    isStoredAsRemoteFile = true;
126                } catch (IOException e) {
127                    throw new IOFailure("Unable to store record(" + fileName + "," + offset + ") as remotefile", e);
128                }
129            }
130        } else { // Store data in objectbuffer
131            try {
132                if (record instanceof ARCRecord) {
133                    objectBuffer = ARCUtils.readARCRecord((ARCRecord) record);
134                } else if (record instanceof WARCRecord) {
135                    objectBuffer = WARCUtils.readWARCRecord((WARCRecord) record);
136                }
137                log.debug("Bytes stored in objectBuffer: {}", objectBuffer.length);
138            } catch (IOException e) {
139                throw new ExceptionInInitializerError(e);
140            }
141        }
142    }
143
144    /**
145     * Returns the file that this information was loaded from.
146     *
147     * @return the file that this ARC record comes from.
148     */
149    public String getFile() {
150        return fileName;
151    }
152
153    /**
154     * Returns the length of the ARCRecord contained.
155     *
156     * @return the length of the ARCRecord contained
157     */
158    public long getLength() {
159        return length;
160    }
161
162    /**
163     * Retrieve the data in the record. If data is in RemoteFile, this operation deletes the RemoteFile.
164     *
165     * @return the data from the ARCRecord as an InputStream.
166     * @throws IllegalState if remotefile already deleted
167     */
168    public InputStream getData() {
169        InputStream result = null;
170        if (isStoredAsRemoteFile) {
171            if (hasRemoteFileBeenDeleted) {
172                throw new IllegalState("RemoteFile has already been deleted");
173            }
174            log.info("Reading {} bytes from RemoteFile", length);
175            InputStream rfInputStream = objectAsRemoteFile.getInputStream();
176            result = new FilterInputStream(rfInputStream) {
177                public void close() throws IOException {
178                    super.close();
179                    objectAsRemoteFile.cleanup();
180                    hasRemoteFileBeenDeleted = true;
181                }
182            };
183        } else {
184            log.debug("Reading {} bytes from objectBuffer", length);
185            result = new ByteArrayInputStream(objectBuffer);
186        }
187        return result;
188    }
189
190    /**
191     * Deliver the data in the record to a given OutputStream. If data is in RemoteFile, this operation deletes the
192     * RemoteFile
193     *
194     * @param out deliver the data to this outputstream
195     * @throws IOFailure if any IOException occurs reading or writing the data
196     * @throws IllegalState if remotefile already deleted
197     */
198    public void getData(OutputStream out) {
199        ArgumentNotValid.checkNotNull(out, "OutputStream out");
200        if (isStoredAsRemoteFile) {
201            if (hasRemoteFileBeenDeleted) {
202                throw new IllegalState("RemoteFile has already been deleted");
203            }
204            try {
205                log.debug("Reading {} bytes from RemoteFile", length);
206                objectAsRemoteFile.appendTo(out);
207            } finally {
208                log.trace("Deleting the RemoteFile '{}'.", objectAsRemoteFile.getName());
209                objectAsRemoteFile.cleanup();
210                hasRemoteFileBeenDeleted = true;
211            }
212        } else {
213            try {
214                log.debug("Reading {} bytes from objectBuffer", length);
215                out.write(objectBuffer, 0, objectBuffer.length);
216            } catch (IOException e) {
217                throw new IOFailure("Unable to write data from " + "objectBuffer to the outputstream", e);
218            }
219        }
220    }
221
222}