001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.common.distribute.arcrepository; 025 026import java.io.File; 027import java.io.FileOutputStream; 028import java.io.FilenameFilter; 029import java.io.IOException; 030import java.io.OutputStream; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.List; 034import java.util.regex.Pattern; 035 036import org.archive.io.ArchiveReader; 037import org.archive.io.ArchiveReaderFactory; 038import org.archive.io.ArchiveRecord; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import dk.netarkivet.common.distribute.FileRemoteFile; 043import dk.netarkivet.common.exceptions.ArgumentNotValid; 044import dk.netarkivet.common.exceptions.IOFailure; 045import dk.netarkivet.common.exceptions.IllegalState; 046import dk.netarkivet.common.exceptions.PermissionDenied; 047import dk.netarkivet.common.utils.ChecksumCalculator; 048import dk.netarkivet.common.utils.FileUtils; 049import dk.netarkivet.common.utils.Settings; 050import dk.netarkivet.common.utils.batch.BatchLocalFiles; 051import dk.netarkivet.common.utils.batch.ChecksumJob; 052import dk.netarkivet.common.utils.batch.FileBatchJob; 053 054/** 055 * A simple implementation of ArcRepositoryClient that just has a number of local directories where it stores its files. 056 * This class doesn't implement credentials checking or checksum storing! 057 */ 058public class LocalArcRepositoryClient implements ArcRepositoryClient { 059 060 /** The logger for this class. */ 061 private static final Logger log = LoggerFactory.getLogger(LocalArcRepositoryClient.class); 062 063 /** The default place in classpath where the settings file can be found. */ 064 private static String defaultSettingsClasspath = "dk/netarkivet/common/distribute/arcrepository/" 065 + "LocalArcRepositoryClientSettings.xml"; 066 067 /* 068 * The static initialiser is called when the class is loaded. It will add default values for all settings defined in 069 * this class, by loading them from a settings.xml file in classpath. 070 */ 071 static { 072 Settings.addDefaultClasspathSettings(defaultSettingsClasspath); 073 } 074 075 /** List of the directories that we store files in. Non-absolute dirs are relative to the current directory. */ 076 private final List<File> storageDirs = new ArrayList<File>(1); 077 078 /** Store the file in the directories designated by this setting. */ 079 private static final String FILE_DIRS = "settings.common.arcrepositoryClient.fileDir"; 080 /** The credentials used to correct data in the archive. */ 081 private static final String CREDENTIALS_SETTING = "settings.archive.bitarchive.thisCredentials"; 082 083 /** Create a new LocalArcRepositoryClient based on current settings. */ 084 public LocalArcRepositoryClient() { 085 List<String> fileDirs = Arrays.asList(Settings.getAll(FILE_DIRS)); 086 for (String fileName : fileDirs) { 087 File f = new File(fileName); 088 FileUtils.createDir(f); 089 log.info("directory '{}' is part of this local archive repository", f.getAbsolutePath()); 090 storageDirs.add(f); 091 } 092 } 093 094 @Override 095 public void close() { 096 } 097 098 /** 099 * Store the given file in the ArcRepository. After storing, the file is deleted. 100 * 101 * @param file A file to be stored. Must exist. 102 * @throws IOFailure thrown if store is unsuccessful, or failed to clean up files after the store operation. 103 * @throws IllegalState if file already exists. 104 * @throws ArgumentNotValid if file parameter is null or file is not an existing file. 105 */ 106 @Override 107 public void store(File file) throws IOFailure, ArgumentNotValid { 108 ArgumentNotValid.checkNotNull(file, "File file"); 109 ArgumentNotValid.checkTrue(file.exists(), "File '" + file + "' does not exist"); 110 if (findFile(file.getName()) != null) { 111 throw new IllegalState("A file with the name '" + file.getName() + " is already stored"); 112 } 113 for (File dir : storageDirs) { 114 if (dir.canWrite() && FileUtils.getBytesFree(dir) > file.length()) { 115 FileUtils.moveFile(file, new File(dir, file.getName())); 116 return; 117 } 118 } 119 throw new IOFailure("Not enough room for '" + file + "' in any of the dirs " + storageDirs); 120 } 121 122 /** 123 * Gets a single ARC record out of the ArcRepository. 124 * 125 * @param arcfile The name of a file containing the desired record. 126 * @param index The offset of the desired record in the file 127 * @return a BitarchiveRecord-object, or null if request times out or object is not found. 128 * @throws ArgumentNotValid on null or empty filenames, or if index is negative. 129 * @throws IOFailure If the get operation failed. 130 */ 131 @Override 132 public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid { 133 ArgumentNotValid.checkNotNullOrEmpty(arcfile, "String arcfile"); 134 ArgumentNotValid.checkNotNegative(index, "long index"); 135 File f = findFile(arcfile); 136 if (f == null) { 137 log.warn("File '{}' does not exist. Null BitarchiveRecord returned", arcfile); 138 return null; 139 } 140 ArchiveReader reader = null; 141 ArchiveRecord record = null; 142 try { 143 reader = ArchiveReaderFactory.get(f, index); 144 record = reader.get(); 145 return new BitarchiveRecord(record, arcfile); 146 } catch (IOException e) { 147 throw new IOFailure("Error reading record from '" + arcfile + "' offset " + index, e); 148 } finally { 149 if (record != null) { 150 try { 151 record.close(); 152 } catch (IOException e) { 153 log.warn("Error closing ARC record '{}'", record, e); 154 } 155 } 156 if (reader != null) { 157 try { 158 reader.close(); 159 } catch (IOException e) { 160 log.warn("Error closing ARC reader '{}'", reader, e); 161 } 162 } 163 } 164 } 165 166 /** 167 * Retrieves a file from an ArcRepository and places it in a local file. 168 * 169 * @param arcfilename Name of the arcfile to retrieve. 170 * @param replica The bitarchive to retrieve the data from. (Note argument is ignored) 171 * @param toFile Filename of a place where the file fetched can be put. 172 * @throws ArgumentNotValid if arcfilename is null or empty, or if toFile is null 173 * @throws IOFailure if there are problems reading or writing file, or the file with the given arcfilename could not 174 * be found. 175 */ 176 @Override 177 public void getFile(String arcfilename, Replica replica, File toFile) { 178 ArgumentNotValid.checkNotNullOrEmpty(arcfilename, "String arcfilename"); 179 ArgumentNotValid.checkNotNull(toFile, "File toFile"); 180 File f = findFile(arcfilename); 181 if (f != null) { 182 FileUtils.copyFile(f, toFile); 183 } else { 184 throw new IOFailure("File '" + arcfilename + "' does not exist"); 185 } 186 } 187 188 /** 189 * Runs a batch job on each file in the ArcRepository. 190 * 191 * @param job An object that implements the FileBatchJob interface. The initialize() method will be called before 192 * processing and the finish() method will be called afterwards. The process() method will be called with each File 193 * entry. An optional function postProcess() allows handling the combined results of the batchjob, e.g. summing the 194 * results, sorting, etc. 195 * @param replicaId The archive to execute the job on. 196 * @param args The arguments for the batchjob. This can be null. 197 * @return The status of the batch job after it ended. 198 * @throws ArgumentNotValid If the job is null or the replicaId is either null or the empty string. 199 * @throws IOFailure If a problem occurs during processing the batchjob. 200 */ 201 @Override 202 public BatchStatus batch(final FileBatchJob job, String replicaId, String... args) throws ArgumentNotValid, 203 IOFailure { 204 ArgumentNotValid.checkNotNull(job, "FileBatchJob job"); 205 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 206 OutputStream os = null; 207 File resultFile; 208 try { 209 resultFile = File.createTempFile("batch", replicaId, FileUtils.getTempDir()); 210 os = new FileOutputStream(resultFile); 211 List<File> files = new ArrayList<File>(); 212 final FilenameFilter filenameFilter = new FilenameFilter() { 213 public boolean accept(File dir, String name) { 214 Pattern filenamePattern = job.getFilenamePattern(); 215 return new File(dir, name).isFile() 216 && (filenamePattern == null || filenamePattern.matcher(name).matches()); 217 } 218 }; 219 for (File dir : storageDirs) { 220 File[] filesInDir = dir.listFiles(filenameFilter); 221 if (filesInDir != null) { 222 files.addAll(Arrays.asList(filesInDir)); 223 } 224 } 225 BatchLocalFiles batcher = new BatchLocalFiles(files.toArray(new File[files.size()])); 226 batcher.run(job, os); 227 } catch (IOException e) { 228 throw new IOFailure("Cannot perform batch '" + job + "'", e); 229 } finally { 230 if (os != null) { 231 try { 232 os.close(); 233 } catch (IOException e) { 234 log.warn("Error closing batch output stream '{}'", os, e); 235 } 236 } 237 } 238 return new BatchStatus(replicaId, job.getFilesFailed(), job.getNoOfFilesProcessed(), new FileRemoteFile( 239 resultFile), job.getExceptions()); 240 } 241 242 /** 243 * Updates the administrative data in the ArcRepository for a given file and replica. This implementation does 244 * nothing. 245 * 246 * @param fileName The name of a file stored in the ArcRepository. 247 * @param bitarchiveId The id of the replica that the administrative data for fileName is wrong for. 248 * @param newval What the administrative data will be updated to. 249 */ 250 @Override 251 public void updateAdminData(String fileName, String bitarchiveId, ReplicaStoreState newval) { 252 } 253 254 /** 255 * Updates the checksum kept in the ArcRepository for a given file. It is the responsibility of the ArcRepository 256 * implementation to ensure that this checksum matches that of the underlying files. This implementation does 257 * nothing. 258 * 259 * @param filename The name of a file stored in the ArcRepository. 260 * @param checksum The new checksum. 261 */ 262 @Override 263 public void updateAdminChecksum(String filename, String checksum) { 264 } 265 266 /** 267 * Remove a file from one part of the ArcRepository, retrieving a copy for security purposes. This is typically used 268 * when repairing a file that has been corrupted. 269 * 270 * @param fileName The name of the file to remove. 271 * @param bitarchiveId The id of the replica from which to remove the file. Not used in this implementation, may be 272 * null. 273 * @param checksum The checksum of the file to be removed. 274 * @param credentials A string that shows that the user is allowed to perform this operation. 275 * @return A local copy of the file removed. 276 * @throws ArgumentNotValid On null or empty parameters for fileName, checksum or credentials. 277 * @throws IOFailure On IO trouble. 278 * @throws PermissionDenied On wrong MD5 sum or wrong credentials. 279 */ 280 @Override 281 public File removeAndGetFile(String fileName, String bitarchiveId, String checksum, String credentials) { 282 // Ignores bitarchiveName, checksum, and credentials for now 283 ArgumentNotValid.checkNotNullOrEmpty(fileName, "String fileName"); 284 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); 285 ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials"); 286 File file = findFile(fileName); 287 if (file == null) { 288 throw new IOFailure("Cannot find file '" + fileName + "'"); 289 } 290 if (!ChecksumCalculator.calculateMd5(file).equals(checksum)) { 291 throw new PermissionDenied("Wrong checksum for removing file '" + fileName + "'"); 292 } 293 if (!credentials.equals(Settings.get(CREDENTIALS_SETTING))) { 294 throw new PermissionDenied("Wrong credentials for removing file '" + fileName + "'"); 295 } 296 File copiedTo = null; 297 try { 298 copiedTo = File.createTempFile("removeAndGetFile", fileName); 299 } catch (IOException e) { 300 throw new IOFailure("Cannot make temp file to copy '" + fileName + "' into", e); 301 } 302 FileUtils.moveFile(file, copiedTo); 303 return copiedTo; 304 } 305 306 /** 307 * Returns a File object for a filename if it exists in the archive. 308 * 309 * @param filename Name of file to find. 310 * @return A File object for the filename if the file exists, otherwise null. 311 */ 312 private File findFile(String filename) { 313 for (File dir : storageDirs) { 314 final File file = new File(dir, filename); 315 if (file.isFile()) { 316 return file; 317 } 318 } 319 return null; 320 } 321 322 /** 323 * Method for retrieving the checksums of all the files of the replica. 324 * 325 * @param replicaId Inherited dummy argument. 326 * @return A file containing the names and checksum of all the files in the system. 327 * @throws ArgumentNotValid If the replicaId is either null or the empty string. 328 * @throws IOFailure If an unexpected IOException is caught. 329 */ 330 @Override 331 public File getAllChecksums(String replicaId) throws IOFailure, ArgumentNotValid { 332 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 333 334 try { 335 List<String> checksums = new ArrayList<String>(); 336 // go through the different storageDirs and find files and checksums. 337 for (File dir : storageDirs) { 338 // go through all file and calculate the checksum 339 for (File entry : dir.listFiles()) { 340 String checksum = ChecksumCalculator.calculateMd5(entry); 341 String filename = entry.getName(); 342 343 checksums.add(ChecksumJob.makeLine(filename, checksum)); 344 } 345 } 346 347 // create a file with the results. 348 File res = File.createTempFile("all", "checksums", FileUtils.getTempDir()); 349 FileUtils.writeCollectionToFile(res, checksums); 350 return res; 351 } catch (IOException e) { 352 throw new IOFailure("Received unexpected IOFailure: ", e); 353 } 354 } 355 356 /** 357 * Method for retrieving all the filenames of the replica. 358 * 359 * @param replicaId Inherited dummy argument. 360 * @return A file containing the names of all the files. 361 * @throws ArgumentNotValid If the replicaId is either null or empty. 362 * @throws IOFailure If an IOException is caught. 363 */ 364 @Override 365 public File getAllFilenames(String replicaId) throws IOFailure, ArgumentNotValid { 366 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 367 368 List<String> filenames = new ArrayList<String>(); 369 // go through the different storageDirs and put the name of the files 370 // into the resulting list of filenames. 371 for (File dir : storageDirs) { 372 for (String name : dir.list()) { 373 filenames.add(name); 374 } 375 } 376 377 try { 378 File res = File.createTempFile("all", "filenames", FileUtils.getTempDir()); 379 FileUtils.writeCollectionToFile(res, filenames); 380 return res; 381 } catch (IOException e) { 382 throw new IOFailure("Received unexpected IOFailure: ", e); 383 } 384 } 385 386 /** 387 * Method for correcting a bad entry. Calls 'removeAndGetFile' followed by 'store'. 388 * 389 * @param replicaId Inherited dummy argument. 390 * @param checksum The checksum of the bad entry. 391 * @param file The new file to replace the bad entry. 392 * @param credentials The 'password' to allow changing the archive. 393 * @return The bad entry file. 394 * @throws ArgumentNotValid If one of the arguments are null, or if a string is empty. 395 * @throws PermissionDenied If the credentials or checksum are invalid. 396 */ 397 @Override 398 public File correct(String replicaId, String checksum, File file, String credentials) throws ArgumentNotValid, 399 PermissionDenied { 400 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 401 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); 402 ArgumentNotValid.checkNotNull(file, "File file"); 403 ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials"); 404 405 // remove bad file. 406 File res = removeAndGetFile(file.getName(), replicaId, checksum, credentials); 407 // store good new file. 408 store(file); 409 // return bad file. 410 return res; 411 } 412 413 /** 414 * Method for finding the checksum of a file. 415 * 416 * @param replicaId Inherited dummy variable. 417 * @param filename The name of the file to calculate the checksum. 418 * @return The checksum of the file, or the empty string if the file was not found or an error occurred. 419 * @throws ArgumentNotValid If the replicaId or the filename is either null or the empty string. 420 */ 421 @Override 422 public String getChecksum(String replicaId, String filename) throws ArgumentNotValid { 423 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 424 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 425 return ChecksumCalculator.calculateMd5(findFile(filename)); 426 } 427 428}