001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.archive.bitarchive;
024
025import java.io.File;
026import java.io.FileOutputStream;
027import java.io.IOException;
028import java.io.OutputStream;
029import java.util.Date;
030
031import org.archive.io.ArchiveReader;
032import org.archive.io.ArchiveReaderFactory;
033import org.archive.io.ArchiveRecord;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import dk.netarkivet.common.distribute.RemoteFile;
038import dk.netarkivet.common.distribute.RemoteFileFactory;
039import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
040import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.exceptions.PermissionDenied;
044import dk.netarkivet.common.exceptions.UnknownID;
045import dk.netarkivet.common.utils.FileUtils;
046import dk.netarkivet.common.utils.batch.BatchLocalFiles;
047import dk.netarkivet.common.utils.batch.FileBatchJob;
048
049/**
050 * The central class in the bit archive. Implements the API: upload(), get(), correct(), batch(). A bit archive is
051 * expected to not know about any other bit archives, and is not considered responsible for making MD5 checksums.
052 */
053public class Bitarchive {
054
055    /** Administrative data for the current bitarchive. */
056    private BitarchiveAdmin admin;
057
058    /** Logging output place. */
059    protected static final Logger log = LoggerFactory.getLogger(Bitarchive.class);
060
061    /** The instance of the bitarchive. */
062    private static Bitarchive instance;
063
064    /**
065     * Create a new Bitarchive with files stored on local disk in one or more directories. This can reopen an existing
066     * bit archive or create a Bitarchive from scratch, with no files on disk.
067     *
068     * @throws PermissionDenied if creating directory fails.
069     */
070    private Bitarchive() throws PermissionDenied {
071        log.debug("Starting bit archive");
072        admin = BitarchiveAdmin.getInstance();
073    }
074
075    /**
076     * Release all resources allocated by the bitarchive Ensures that all admin data and log data are flushed.
077     */
078    public void close() {
079        admin.close();
080        instance = null;
081    }
082
083    /**
084     * Get an ARC or WARC record out of the archive. Returns null if the archive file is not found in this bitarchive.
085     *
086     * @param arcfile The name of an Archive file.
087     * @param index Index of the Archive record in the file
088     * @return A BitarchiveRecord object for the record in question. This record contains the data from the file.
089     * @throws ArgumentNotValid If arcfile is null/empty, or if index is out of bounds
090     * @throws IOFailure If there were problems reading the arcfile.
091     * @throws UnknownID Does it really, and when ?
092     */
093    public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid, UnknownID, IOFailure {
094        /*
095         * TODO Change return type into RemoteFile. This should only cause changes in GetFileMessage.
096         */
097        log.info("GET: {}:{}", arcfile, index);
098        ArgumentNotValid.checkNotNullOrEmpty(arcfile, "arcfile");
099        BitarchiveARCFile barc = admin.lookup(arcfile);
100        if (barc == null) {
101            log.debug("Get request for file not on this machine: {}", arcfile);
102            return null;
103        }
104        ArchiveReader arcReader = null;
105        ArchiveRecord arc = null;
106        try {
107            if ((barc.getSize() <= index) || (index < 0)) {
108                log.warn("GET: index out of bounds: {}:{} > {}", arcfile, index, barc.getSize());
109                throw new ArgumentNotValid("GET: index out of bounds: " + arcfile + ":" + index + " > "
110                        + barc.getSize());
111            }
112            File in = barc.getFilePath();
113            arcReader = ArchiveReaderFactory.get(in);
114            arc = arcReader.get(index);
115            BitarchiveRecord result = new BitarchiveRecord(arc, arcfile);
116
117            // release resources locked
118            log.info("GET: Got {} bytes of data from {}:{}", result.getLength(), arcfile, index);
119            // try {
120            // Thread.sleep(1000);
121            // } catch (InterruptedException e) {
122            //
123            // }
124            return result;
125        } catch (IOException e) {
126            log.warn("Could not get data from {} at: {}; Stored at: {}", arcfile, index, barc.getFilePath());
127            throw new IOFailure("Could not get data from " + arcfile + " at: " + index + "; Stored at: "
128                    + barc.getFilePath(), e);
129        } catch (IndexOutOfBoundsException e) {
130            log.warn("Could not get data from {} at: {}; Stored at: {}", arcfile, index, barc.getFilePath());
131            throw new IOFailure("Could not get data from " + arcfile + " at: " + index + "; Stored at: "
132                    + barc.getFilePath(), e);
133        } finally {
134            try {
135                if (arc != null) {
136                    arc.close();
137                }
138                if (arcReader != null) {
139                    arcReader.close();
140                }
141            } catch (IOException e) {
142                log.warn("Could not close ARCReader or ARCRecord!", e);
143            }
144        }
145    }
146
147    /**
148     * Upload an ARC file to this archive.
149     *
150     * @param arcfile A file to add to the archive.
151     * @param fileName the arcfiles filename. The file will be identified in the archive by this filename
152     * @throws PermissionDenied if arcfile already exists in the archive
153     * @throws IOFailure if an IO failure occurs (e.g. running out of disk space)
154     * @throws ArgumentNotValid if arcfile is null or the filename is null or empty.
155     */
156    public void upload(RemoteFile arcfile, String fileName) throws PermissionDenied, ArgumentNotValid, IOFailure {
157        log.info("Upload: {}", arcfile);
158        // Verify input parameters
159        ArgumentNotValid.checkNotNull(arcfile, "arcfile");
160        ArgumentNotValid.checkNotNullOrEmpty(fileName, "fileName");
161
162        // Check if file already exists in the archive
163        if (admin.lookup(fileName) != null) {
164            log.warn("Upload: file already exists: '{}' while uploading '{}'.", fileName, arcfile);
165            throw new PermissionDenied("Upload: file already exists: '" + fileName + "' while uploading '" + arcfile
166                    + "'.");
167        }
168
169        // Everything seems ok, initiate copy of file into archive
170        copyRemoteFileToArchive(arcfile, fileName);
171        log.info("Upload: completed uploading {}", fileName);
172    }
173
174    /**
175     * Run a batch job on all ARC entries in the archive.
176     * <p>
177     * This currently runs synchronously, and returns only after finish() has been called.
178     *
179     * @param bitarchiveAppId A String representing the bitarchive AppId.
180     * @param job An object that implements the ARCBatchJob interface. The initialize() method will be called before
181     * processing and the finish() method will be called afterwards. The process() method will be called with each ARC
182     * entry.
183     * @return A localBatchStatus
184     * @throws ArgumentNotValid if job or file is null.
185     * @throws IOFailure if there was problems writing to the RemoteFile
186     */
187    public BatchStatus batch(String bitarchiveAppId, final FileBatchJob job) throws ArgumentNotValid, IOFailure {
188        ArgumentNotValid.checkNotNullOrEmpty(bitarchiveAppId, "String bitarchiveAppId");
189        ArgumentNotValid.checkNotNull(job, "FileBatchJob job");
190        log.info("Starting batch job on bitarchive application with id '{}': '{}', on filename-pattern: '{}'",
191                bitarchiveAppId, job.getClass().getName(), job.getFilenamePattern());
192        BatchStatus returnStatus;
193
194        File tmpFile = null;
195        try {
196            tmpFile = File.createTempFile("BatchOutput", "", FileUtils.getTempDir());
197            final OutputStream os = new FileOutputStream(tmpFile);
198
199            try {
200                // Run the batch job
201                log.debug("Batch: Job {} started at {}", job, new Date());
202                File[] processFiles = admin.getFilesMatching(job.getFilenamePattern());
203
204                final BatchLocalFiles localBatchRunner = new BatchLocalFiles(processFiles);
205                localBatchRunner.run(job, os);
206                log.debug("Batch: Job {} finished at {}", job, new Date());
207            } finally { // Make sure the OutputStream is closed no matter what.
208                // This allows us to delete the file on Windows
209                // in case of error.
210                try {
211                    os.close();
212                } catch (IOException e) {
213                    // We're cleaning up, failing to close won't stop us
214                    log.warn("Failed to close outputstream in batch");
215                }
216            }
217            // write output from batch job back to remote file
218            returnStatus = new BatchStatus(bitarchiveAppId, job.getFilesFailed(), job.getNoOfFilesProcessed(),
219                    RemoteFileFactory.getMovefileInstance(tmpFile), job.getExceptions());
220        } catch (IOException e) {
221            log.error("Failed to create temporary file for batch {}", job, e);
222            throw new IOFailure("Failed to create temporary file for batch " + job, e);
223        }
224        log.info(
225                "Finished batch job on bitarchive application with id '{}': '{}', on filename-pattern: '{}' + with result: {}",
226                bitarchiveAppId, job.getClass().getName(), job.getFilenamePattern(), returnStatus);
227        return returnStatus;
228    }
229
230    /**
231     * Copies a remote file into the bitarchive storage and returns the storage position of the file.
232     *
233     * @param arcfile The source file.
234     * @param fileName the source files filename.
235     * @return the storage position of the file.
236     * @throws IOFailure if an error occurs while copying into the archive.
237     */
238    private File copyRemoteFileToArchive(RemoteFile arcfile, String fileName) throws IOFailure {
239        File tempDestination = admin.getTemporaryPath(fileName, arcfile.getSize());
240        File destination = null;
241        try {
242            // The file is first copied to a temporary destination on the same
243            // mount. The reason for this is to eliminate that there are files
244            // in the file-directory that are currupted because of upload
245            // errors. For example if the there is a break down after only half
246            // the file is uploaded. It also means that we do not need to clean
247            // up in the file directory, in case of failure - only the temporary
248            // destination needs clean up.
249            arcfile.copyTo(tempDestination);
250            // Note that the move operation is a constant time operation within
251            // the same mount
252            destination = admin.moveToStorage(tempDestination);
253        } catch (Throwable e) {
254            // destination is known to be null here, so don't worry about it.
255            if (tempDestination.exists()) {
256                tempDestination.delete();
257            }
258            throw new IOFailure("Can't copy file into archive: " + fileName, e);
259        }
260        return destination;
261    }
262
263    /**
264     * Get a file for a given arcFileID.
265     *
266     * @param arcFileID name of the file to be retrieved.
267     * @return The file requested or null if not found
268     * @throws ArgumentNotValid If arcFileID was null or empty.
269     */
270    public File getFile(String arcFileID) throws ArgumentNotValid {
271        log.info("Get file '{}'", arcFileID);
272        ArgumentNotValid.checkNotNullOrEmpty(arcFileID, "arcFileID");
273        BitarchiveARCFile barc = admin.lookup(arcFileID);
274        if (barc == null) { // the file with ID: arcFileID was not found
275            log.debug("File '{}' not found on this machine", arcFileID);
276            return null;
277        }
278
279        File path = barc.getFilePath();
280        log.info("Getting file '{}'", path);
281        return path;
282    }
283
284    /**
285     * Get the one instance of the bitarchive.
286     *
287     * @return An instance of the Bitarchive class.
288     * @throws PermissionDenied If the storage area used for files is not accessible.
289     */
290    public static Bitarchive getInstance() throws PermissionDenied {
291        if (instance == null) {
292            instance = new Bitarchive();
293        }
294        return instance;
295    }
296
297}