001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.archive.bitarchive; 024 025import java.io.File; 026import java.io.FileOutputStream; 027import java.io.IOException; 028import java.io.OutputStream; 029import java.util.Date; 030 031import org.archive.io.ArchiveReader; 032import org.archive.io.ArchiveReaderFactory; 033import org.archive.io.ArchiveRecord; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import dk.netarkivet.common.distribute.RemoteFile; 038import dk.netarkivet.common.distribute.RemoteFileFactory; 039import dk.netarkivet.common.distribute.arcrepository.BatchStatus; 040import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord; 041import dk.netarkivet.common.exceptions.ArgumentNotValid; 042import dk.netarkivet.common.exceptions.IOFailure; 043import dk.netarkivet.common.exceptions.PermissionDenied; 044import dk.netarkivet.common.exceptions.UnknownID; 045import dk.netarkivet.common.utils.FileUtils; 046import dk.netarkivet.common.utils.batch.BatchLocalFiles; 047import dk.netarkivet.common.utils.batch.FileBatchJob; 048 049/** 050 * The central class in the bit archive. Implements the API: upload(), get(), correct(), batch(). A bit archive is 051 * expected to not know about any other bit archives, and is not considered responsible for making MD5 checksums. 052 */ 053public class Bitarchive { 054 055 /** Administrative data for the current bitarchive. */ 056 private BitarchiveAdmin admin; 057 058 /** Logging output place. */ 059 protected static final Logger log = LoggerFactory.getLogger(Bitarchive.class); 060 061 /** The instance of the bitarchive. */ 062 private static Bitarchive instance; 063 064 /** 065 * Create a new Bitarchive with files stored on local disk in one or more directories. This can reopen an existing 066 * bit archive or create a Bitarchive from scratch, with no files on disk. 067 * 068 * @throws PermissionDenied if creating directory fails. 069 */ 070 private Bitarchive() throws PermissionDenied { 071 log.debug("Starting bit archive"); 072 admin = BitarchiveAdmin.getInstance(); 073 } 074 075 /** 076 * Release all resources allocated by the bitarchive Ensures that all admin data and log data are flushed. 077 */ 078 public void close() { 079 admin.close(); 080 instance = null; 081 } 082 083 /** 084 * Get an ARC or WARC record out of the archive. Returns null if the archive file is not found in this bitarchive. 085 * 086 * @param arcfile The name of an Archive file. 087 * @param index Index of the Archive record in the file 088 * @return A BitarchiveRecord object for the record in question. This record contains the data from the file. 089 * @throws ArgumentNotValid If arcfile is null/empty, or if index is out of bounds 090 * @throws IOFailure If there were problems reading the arcfile. 091 * @throws UnknownID Does it really, and when ? 092 */ 093 public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid, UnknownID, IOFailure { 094 /* 095 * TODO Change return type into RemoteFile. This should only cause changes in GetFileMessage. 096 */ 097 log.info("GET: {}:{}", arcfile, index); 098 ArgumentNotValid.checkNotNullOrEmpty(arcfile, "arcfile"); 099 BitarchiveARCFile barc = admin.lookup(arcfile); 100 if (barc == null) { 101 log.debug("Get request for file not on this machine: {}", arcfile); 102 return null; 103 } 104 ArchiveReader arcReader = null; 105 ArchiveRecord arc = null; 106 try { 107 if ((barc.getSize() <= index) || (index < 0)) { 108 log.warn("GET: index out of bounds: {}:{} > {}", arcfile, index, barc.getSize()); 109 throw new ArgumentNotValid("GET: index out of bounds: " + arcfile + ":" + index + " > " 110 + barc.getSize()); 111 } 112 File in = barc.getFilePath(); 113 arcReader = ArchiveReaderFactory.get(in); 114 arc = arcReader.get(index); 115 BitarchiveRecord result = new BitarchiveRecord(arc, arcfile); 116 117 // release resources locked 118 log.info("GET: Got {} bytes of data from {}:{}", result.getLength(), arcfile, index); 119 // try { 120 // Thread.sleep(1000); 121 // } catch (InterruptedException e) { 122 // 123 // } 124 return result; 125 } catch (IOException e) { 126 log.warn("Could not get data from {} at: {}; Stored at: {}", arcfile, index, barc.getFilePath()); 127 throw new IOFailure("Could not get data from " + arcfile + " at: " + index + "; Stored at: " 128 + barc.getFilePath(), e); 129 } catch (IndexOutOfBoundsException e) { 130 log.warn("Could not get data from {} at: {}; Stored at: {}", arcfile, index, barc.getFilePath()); 131 throw new IOFailure("Could not get data from " + arcfile + " at: " + index + "; Stored at: " 132 + barc.getFilePath(), e); 133 } finally { 134 try { 135 if (arc != null) { 136 arc.close(); 137 } 138 if (arcReader != null) { 139 arcReader.close(); 140 } 141 } catch (IOException e) { 142 log.warn("Could not close ARCReader or ARCRecord!", e); 143 } 144 } 145 } 146 147 /** 148 * Upload an ARC file to this archive. 149 * 150 * @param arcfile A file to add to the archive. 151 * @param fileName the arcfiles filename. The file will be identified in the archive by this filename 152 * @throws PermissionDenied if arcfile already exists in the archive 153 * @throws IOFailure if an IO failure occurs (e.g. running out of disk space) 154 * @throws ArgumentNotValid if arcfile is null or the filename is null or empty. 155 */ 156 public void upload(RemoteFile arcfile, String fileName) throws PermissionDenied, ArgumentNotValid, IOFailure { 157 log.info("Upload: {}", arcfile); 158 // Verify input parameters 159 ArgumentNotValid.checkNotNull(arcfile, "arcfile"); 160 ArgumentNotValid.checkNotNullOrEmpty(fileName, "fileName"); 161 162 // Check if file already exists in the archive 163 if (admin.lookup(fileName) != null) { 164 log.warn("Upload: file already exists: '{}' while uploading '{}'.", fileName, arcfile); 165 throw new PermissionDenied("Upload: file already exists: '" + fileName + "' while uploading '" + arcfile 166 + "'."); 167 } 168 169 // Everything seems ok, initiate copy of file into archive 170 copyRemoteFileToArchive(arcfile, fileName); 171 log.info("Upload: completed uploading {}", fileName); 172 } 173 174 /** 175 * Run a batch job on all ARC entries in the archive. 176 * <p> 177 * This currently runs synchronously, and returns only after finish() has been called. 178 * 179 * @param bitarchiveAppId A String representing the bitarchive AppId. 180 * @param job An object that implements the ARCBatchJob interface. The initialize() method will be called before 181 * processing and the finish() method will be called afterwards. The process() method will be called with each ARC 182 * entry. 183 * @return A localBatchStatus 184 * @throws ArgumentNotValid if job or file is null. 185 * @throws IOFailure if there was problems writing to the RemoteFile 186 */ 187 public BatchStatus batch(String bitarchiveAppId, final FileBatchJob job) throws ArgumentNotValid, IOFailure { 188 ArgumentNotValid.checkNotNullOrEmpty(bitarchiveAppId, "String bitarchiveAppId"); 189 ArgumentNotValid.checkNotNull(job, "FileBatchJob job"); 190 log.info("Starting batch job on bitarchive application with id '{}': '{}', on filename-pattern: '{}'", 191 bitarchiveAppId, job.getClass().getName(), job.getFilenamePattern()); 192 BatchStatus returnStatus; 193 194 File tmpFile = null; 195 try { 196 tmpFile = File.createTempFile("BatchOutput", "", FileUtils.getTempDir()); 197 final OutputStream os = new FileOutputStream(tmpFile); 198 199 try { 200 // Run the batch job 201 log.debug("Batch: Job {} started at {}", job, new Date()); 202 File[] processFiles = admin.getFilesMatching(job.getFilenamePattern()); 203 204 final BatchLocalFiles localBatchRunner = new BatchLocalFiles(processFiles); 205 localBatchRunner.run(job, os); 206 log.debug("Batch: Job {} finished at {}", job, new Date()); 207 } finally { // Make sure the OutputStream is closed no matter what. 208 // This allows us to delete the file on Windows 209 // in case of error. 210 try { 211 os.close(); 212 } catch (IOException e) { 213 // We're cleaning up, failing to close won't stop us 214 log.warn("Failed to close outputstream in batch"); 215 } 216 } 217 // write output from batch job back to remote file 218 returnStatus = new BatchStatus(bitarchiveAppId, job.getFilesFailed(), job.getNoOfFilesProcessed(), 219 RemoteFileFactory.getMovefileInstance(tmpFile), job.getExceptions()); 220 } catch (IOException e) { 221 log.error("Failed to create temporary file for batch {}", job, e); 222 throw new IOFailure("Failed to create temporary file for batch " + job, e); 223 } 224 log.info( 225 "Finished batch job on bitarchive application with id '{}': '{}', on filename-pattern: '{}' + with result: {}", 226 bitarchiveAppId, job.getClass().getName(), job.getFilenamePattern(), returnStatus); 227 return returnStatus; 228 } 229 230 /** 231 * Copies a remote file into the bitarchive storage and returns the storage position of the file. 232 * 233 * @param arcfile The source file. 234 * @param fileName the source files filename. 235 * @return the storage position of the file. 236 * @throws IOFailure if an error occurs while copying into the archive. 237 */ 238 private File copyRemoteFileToArchive(RemoteFile arcfile, String fileName) throws IOFailure { 239 File tempDestination = admin.getTemporaryPath(fileName, arcfile.getSize()); 240 File destination = null; 241 try { 242 // The file is first copied to a temporary destination on the same 243 // mount. The reason for this is to eliminate that there are files 244 // in the file-directory that are currupted because of upload 245 // errors. For example if the there is a break down after only half 246 // the file is uploaded. It also means that we do not need to clean 247 // up in the file directory, in case of failure - only the temporary 248 // destination needs clean up. 249 arcfile.copyTo(tempDestination); 250 // Note that the move operation is a constant time operation within 251 // the same mount 252 destination = admin.moveToStorage(tempDestination); 253 } catch (Throwable e) { 254 // destination is known to be null here, so don't worry about it. 255 if (tempDestination.exists()) { 256 tempDestination.delete(); 257 } 258 throw new IOFailure("Can't copy file into archive: " + fileName, e); 259 } 260 return destination; 261 } 262 263 /** 264 * Get a file for a given arcFileID. 265 * 266 * @param arcFileID name of the file to be retrieved. 267 * @return The file requested or null if not found 268 * @throws ArgumentNotValid If arcFileID was null or empty. 269 */ 270 public File getFile(String arcFileID) throws ArgumentNotValid { 271 log.info("Get file '{}'", arcFileID); 272 ArgumentNotValid.checkNotNullOrEmpty(arcFileID, "arcFileID"); 273 BitarchiveARCFile barc = admin.lookup(arcFileID); 274 if (barc == null) { // the file with ID: arcFileID was not found 275 log.debug("File '{}' not found on this machine", arcFileID); 276 return null; 277 } 278 279 File path = barc.getFilePath(); 280 log.info("Getting file '{}'", path); 281 return path; 282 } 283 284 /** 285 * Get the one instance of the bitarchive. 286 * 287 * @return An instance of the Bitarchive class. 288 * @throws PermissionDenied If the storage area used for files is not accessible. 289 */ 290 public static Bitarchive getInstance() throws PermissionDenied { 291 if (instance == null) { 292 instance = new Bitarchive(); 293 } 294 return instance; 295 } 296 297}