001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.archive.bitarchive; 025 026import java.io.File; 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.LinkedHashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.regex.Pattern; 035 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039import dk.netarkivet.archive.ArchiveSettings; 040import dk.netarkivet.archive.Constants; 041import dk.netarkivet.common.exceptions.ArgumentNotValid; 042import dk.netarkivet.common.exceptions.IOFailure; 043import dk.netarkivet.common.exceptions.PermissionDenied; 044import dk.netarkivet.common.exceptions.UnknownID; 045import dk.netarkivet.common.utils.ApplicationUtils; 046import dk.netarkivet.common.utils.FileUtils; 047import dk.netarkivet.common.utils.Settings; 048 049/** 050 * This class handles file lookup and encapsulates the actual placement of files. 051 */ 052public final class BitarchiveAdmin { 053 054 /** The class logger. */ 055 private static final Logger log = LoggerFactory.getLogger(BitarchiveAdmin.class); 056 057 /** 058 * Map containing the archive directories and their files. The file must be the CanonicalFile (use 059 * getCanonicalFile() before access). 060 */ 061 private Map<File, List<String>> archivedFiles = Collections 062 .synchronizedMap(new LinkedHashMap<File, List<String>>()); 063 064 /** 065 * Map containing the time for the latest update of the filelist for each archive directory. The file must be the 066 * CanonicalFile (use getCanonicalFile() before access). 067 */ 068 private Map<File, Long> archiveTime = Collections.synchronizedMap(new HashMap<File, Long>()); 069 070 /** Singleton instance. */ 071 private static BitarchiveAdmin instance; 072 073 /** How much space we must have available *in a single dir* before we will listen for new uploads. */ 074 private final long minSpaceLeft; 075 076 /** How much space we require available *in every dir* after we have accepted an upload. */ 077 private final long minSpaceRequired; 078 079 /** 080 * Creates a new BitarchiveAdmin object for an existing bit archive. Reads the directories to use from settings. 081 * 082 * @throws ArgumentNotValid If the settings for minSpaceLeft is non-positive or the setting for minSpaceRequired is 083 * negative. 084 * @throws PermissionDenied If any of the directories cannot be created or are not writeable. 085 * @throws IOFailure If it is not possible to retrieve the canonical file for the directories. 086 */ 087 private BitarchiveAdmin() throws ArgumentNotValid, PermissionDenied, IOFailure { 088 String[] filedirnames = Settings.getAll(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR); 089 minSpaceLeft = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_LEFT); 090 // Check, if value of minSpaceLeft is greater than zero 091 if (minSpaceLeft <= 0L) { 092 log.warn("Wrong setting of minSpaceLeft read from Settings: {}", minSpaceLeft); 093 throw new ArgumentNotValid("Wrong setting of minSpaceLeft read from Settings: " + minSpaceLeft); 094 } 095 096 minSpaceRequired = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_REQUIRED); 097 // Check, if value of minSpaceRequired is at least zero 098 if (minSpaceLeft < 0L) { 099 log.warn("Wrong setting of minSpaceRequired read from Settings: {}", minSpaceLeft); 100 throw new ArgumentNotValid("Wrong setting of minSpaceRequired read from Settings: " + minSpaceLeft); 101 } 102 103 log.info("Requiring at least {} bytes free.", minSpaceRequired); 104 log.info("Listening if at least {} bytes free.", minSpaceLeft); 105 106 try { 107 for (String filedirname : filedirnames) { 108 File basedir = new File(filedirname).getCanonicalFile(); 109 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 110 111 // Ensure that 'filedir' exists. If it doesn't, it is created 112 ApplicationUtils.dirMustExist(filedir); 113 File tempdir = new File(basedir, Constants.TEMPORARY_DIRECTORY_NAME); 114 115 // Ensure that 'tempdir' exists. If it doesn't, it is created 116 ApplicationUtils.dirMustExist(tempdir); 117 118 File atticdir = new File(basedir, Constants.ATTIC_DIRECTORY_NAME); 119 120 // Ensure that 'atticdir' exists. If it doesn't, it is created 121 ApplicationUtils.dirMustExist(atticdir); 122 123 // initialise the variables archivedFiles and archiveTime 124 archivedFiles.put(basedir, new ArrayList<String>()); 125 archiveTime.put(basedir, 0L); 126 updateFileList(basedir); 127 128 final Long bytesUsedInDir = calculateBytesUsed(basedir); 129 log.info( 130 "Using bit archive directorys {'{}', '{}', '{}'} under base directory: '{}' with {} bytes of content and {} bytes free. Current number of files archived: {}", 131 Constants.FILE_DIRECTORY_NAME, Constants.TEMPORARY_DIRECTORY_NAME, 132 Constants.ATTIC_DIRECTORY_NAME, basedir, bytesUsedInDir, FileUtils.getBytesFree(basedir), 133 archivedFiles.get(basedir).size()); 134 } 135 } catch (IOException e) { 136 throw new IOFailure("Could not retrieve Canonical files.", e); 137 } 138 } 139 140 /** 141 * Checks whether the filelist is up to date. If the modified timestamp for the a directory is larger than the last 142 * recorded timestamp, then the stored filelist is updated with the latest changes. 143 */ 144 public synchronized void verifyFilelistUpToDate() { 145 for (File basedir : archivedFiles.keySet()) { 146 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 147 long lastModified = filedir.lastModified(); 148 if (archiveTime.get(basedir) < lastModified) { 149 // Update the list and the time. 150 updateFileList(basedir); 151 } 152 } 153 } 154 155 /** 156 * Method for updating the filelist for a given basedir. 157 * 158 * @param basedir The basedir to update the filelist for. 159 * @throws ArgumentNotValid If basedir is null or if it not a proper directory. 160 * @throws UnknownID If the basedir cannot be found both the archivedFiles map or the archiveTime map. 161 * @throws IOFailure If it is not possible to retrieve the canonical file for the basedir. 162 */ 163 public void updateFileList(File basedir) throws ArgumentNotValid, UnknownID, IOFailure { 164 ArgumentNotValid.checkNotNull(basedir, "File basedir"); 165 // ensure that it is the CanonicalFile for the directory. 166 try { 167 basedir = basedir.getCanonicalFile(); 168 } catch (IOException e) { 169 throw new IOFailure("Could not retrieve canonical path for file '" + basedir, e); 170 } 171 if (!basedir.isDirectory()) { 172 throw new ArgumentNotValid("The directory '" + basedir.getPath() + " is not a proper directory."); 173 } 174 if (!archivedFiles.containsKey(basedir) || !archiveTime.containsKey(basedir)) { 175 throw new UnknownID("The directory '" + basedir + "' is not known " 176 + "by the settings. Known directories are: " + archivedFiles.keySet()); 177 } 178 179 log.debug("Updating the filelist for '{}'.", basedir); 180 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 181 if (!checkArchiveDir(filedir)) { 182 throw new UnknownID("The directory '" + filedir + "' is not an " + " archive directory."); 183 } 184 185 String[] dirContent = filedir.list(); 186 List<String> filenames = new ArrayList<String>(dirContent.length); 187 for (String file : dirContent) { 188 // ensure that only files are handled 189 if ((new File(filedir, file)).isFile()) { 190 filenames.add(file); 191 } else { 192 log.warn("The file '{}' in directory {} is not a proper file.", file, filedir.getPath()); 193 } 194 } 195 archivedFiles.put(basedir, filenames); 196 archiveTime.put(basedir, filedir.lastModified()); 197 } 198 199 /** 200 * Returns true if we have at least one dir with the required amount of space left. 201 * 202 * @return true if we have at least one dir with the required amount of space left, otherwise false. 203 */ 204 public boolean hasEnoughSpace() { 205 for (File dir : archivedFiles.keySet()) { 206 if (checkArchiveDir(dir) && FileUtils.getBytesFree(dir) > minSpaceLeft) { 207 return true; 208 } 209 } 210 return false; 211 } 212 213 /** 214 * Returns a temporary place for the the file to be stored. 215 * 216 * @param arcFileName The simple name (i.e. no dirs) of the ARC file. 217 * @param requestedSize How large the file is in bytes. 218 * @return The path where the arcFile should go. 219 * @throws ArgumentNotValid If arcFileName is null or empty, or requestedSize is negative. 220 * @throws IOFailure if there is no more room left to store this file of size=requestedSize 221 */ 222 public File getTemporaryPath(String arcFileName, long requestedSize) throws ArgumentNotValid, IOFailure { 223 ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFile"); 224 ArgumentNotValid.checkNotNegative(requestedSize, "requestedSize"); 225 226 for (File dir : archivedFiles.keySet()) { 227 long bytesFreeInDir = FileUtils.getBytesFree(dir); 228 // TODO If it turns out that it has not enough space for 229 // this file, it should resend the Upload message 230 // This should probably be handled in the 231 // method BitarchiveServer.visit(UploadMessage msg) 232 // This is bug 1586. 233 234 if (checkArchiveDir(dir) && (bytesFreeInDir > minSpaceLeft) 235 && (bytesFreeInDir - requestedSize > minSpaceRequired)) { 236 File filedir = new File(dir, Constants.TEMPORARY_DIRECTORY_NAME); 237 return new File(filedir, arcFileName); 238 } else { 239 log.debug("Not enough space on dir '{}' for file '{}' of size {} bytes. Only {} left", dir.getPath(), 240 arcFileName, requestedSize, bytesFreeInDir); 241 } 242 } 243 log.warn("No space left in dirs: {}, to store file '{}' of size {}", archivedFiles.keySet(), arcFileName, 244 requestedSize); 245 throw new IOFailure("No space left in dirs: " + archivedFiles.keySet() + ", to store file '" + arcFileName 246 + "' of size " + requestedSize); 247 } 248 249 /** 250 * Moves a file from temporary storage to file storage. 251 * <p> 252 * Note: It is checked, if tempLocation resides in directory TEMPORARY_DIRECTORY_NAME and whether the parent of 253 * tempLocation is a Bitarchive directory. 254 * 255 * @param tempLocation The temporary location where the file was stored. This must be a path returned from 256 * getTemporaryPath 257 * @return The location where the file is now stored 258 * @throws IOFailure if tempLocation is not created from getTemporaryPath or file cannot be moved to Storage 259 * location. 260 * @throws ArgumentNotValid If the tempLocation file is null. 261 */ 262 public File moveToStorage(File tempLocation) throws IOFailure, ArgumentNotValid { 263 ArgumentNotValid.checkNotNull(tempLocation, "tempLocation"); 264 try { 265 tempLocation = tempLocation.getCanonicalFile(); 266 } catch (IOException e) { 267 throw new IOFailure("Could not retrieve the canonical file for '" + tempLocation + "'.", e); 268 } 269 String arcFileName = tempLocation.getName(); 270 271 /** 272 * Check, that File tempLocation resides in directory TEMPORARY_DIRECTORY_NAME. 273 */ 274 File arcFilePath = tempLocation.getParentFile(); 275 if (arcFilePath == null || !arcFilePath.getName().equals(Constants.TEMPORARY_DIRECTORY_NAME)) { 276 throw new IOFailure("Location '" + tempLocation + "' is not in " + "tempdir '" 277 + Constants.TEMPORARY_DIRECTORY_NAME + "'"); 278 } 279 /** 280 * Check, that arcFilePath (now known to be TEMPORARY_DIRECTORY_NAME) resides in a recognised Bitarchive 281 * Directory. 282 */ 283 File basedir = arcFilePath.getParentFile(); 284 if (basedir == null || !isBitarchiveDirectory(basedir)) { 285 throw new IOFailure("Location '" + tempLocation + "' is not in " + "recognised archive directory."); 286 } 287 /** 288 * Move File tempLocation to new location: storageFile 289 */ 290 File storagePath = new File(basedir, Constants.FILE_DIRECTORY_NAME); 291 File storageFile = new File(storagePath, arcFileName); 292 if (!tempLocation.renameTo(storageFile)) { 293 throw new IOFailure("Could not move '" + tempLocation.getPath() + "' to '" + storageFile.getPath() + "'"); 294 } 295 // Update the filelist for the directory with this new file. 296 final File canonicalFile; 297 try { 298 canonicalFile = basedir.getCanonicalFile(); 299 } catch (IOException e) { 300 throw new IOFailure("Could not find canonical file for " + basedir.getAbsolutePath(), e); 301 } 302 final List<String> fileList = archivedFiles.get(canonicalFile); 303 if (fileList == null) { 304 throw new UnknownID("The directory " + basedir.getAbsolutePath() + " was not found in the map of known directories and files."); 305 } 306 fileList.add(arcFileName); 307 archiveTime.put(canonicalFile, storagePath.lastModified()); 308 return storageFile; 309 } 310 311 /** 312 * Checks whether a directory is one of the known bitarchive directories. 313 * 314 * @param theDir The dir to check 315 * @return true If it is a valid archive directory; otherwise returns false. 316 * @throws IOFailure if theDir or one of the valid archive directories does not exist 317 * @throws ArgumentNotValid if theDir is null 318 */ 319 protected boolean isBitarchiveDirectory(File theDir) throws ArgumentNotValid, IOFailure { 320 ArgumentNotValid.checkNotNull(theDir, "File theDir"); 321 try { 322 return archivedFiles.containsKey(theDir.getCanonicalFile()); 323 } catch (IOException e) { 324 throw new IOFailure("Could not retrieve the canonical file for '" + theDir + "'.", e); 325 } 326 } 327 328 /** 329 * Check that the given file is a directory appropriate for use. A File is appropiate to use as archivedir, if the 330 * file is an existing directory, and is writable by this java process. 331 * 332 * @param file A file 333 * @return true, if 'file' is an existing directory and is writable. 334 * @throws ArgumentNotValid if 'file' is null. 335 */ 336 private boolean checkArchiveDir(File file) throws ArgumentNotValid { 337 ArgumentNotValid.checkNotNull(file, "file"); 338 if (!file.exists()) { 339 log.warn("Directory '{}' does not exist", file); 340 return false; 341 } 342 if (!file.isDirectory()) { 343 log.warn("Directory '{}' is not a directory after all", file); 344 return false; 345 } 346 if (!file.canWrite()) { 347 log.warn("Directory '{}' is not writable", file); 348 return false; 349 } 350 return true; 351 } 352 353 /** 354 * Return array with references to all files in the archive. 355 * 356 * @return array with references to all files in the archive 357 */ 358 public File[] getFiles() { 359 // Ensure that the filelist is up to date. 360 verifyFilelistUpToDate(); 361 List<File> files = new ArrayList<File>(); 362 for (File archivePath : archivedFiles.keySet()) { 363 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 364 if (checkArchiveDir(archiveDir)) { 365 List<String> filesHere = archivedFiles.get(archivePath); 366 for (String filename : filesHere) { 367 files.add(new File(archiveDir, filename)); 368 } 369 } 370 } 371 return files.toArray(new File[files.size()]); 372 } 373 374 /** 375 * Return an array of all files in this archive that match a given regular expression on the filename. 376 * 377 * @param regexp A precompiled regular expression matching whole filenames. This will probably be given to a 378 * FilenameFilter 379 * @return An array of all the files in this bitarchive that exactly match the regular expression on the filename 380 * (sans paths). 381 */ 382 public File[] getFilesMatching(final Pattern regexp) { 383 ArgumentNotValid.checkNotNull(regexp, "Pattern regexp"); 384 // Ensure that the filelist is up to date. 385 verifyFilelistUpToDate(); 386 List<File> files = new ArrayList<File>(); 387 for (File archivePath : archivedFiles.keySet()) { 388 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 389 if (checkArchiveDir(archiveDir)) { 390 for (String filename : archivedFiles.get(archivePath)) { 391 if (regexp.matcher(filename).matches()) { 392 files.add(new File(archiveDir, filename)); 393 } 394 } 395 } 396 } 397 return files.toArray(new File[files.size()]); 398 } 399 400 /** 401 * Return the path that a given arc file can be found in. 402 * 403 * @param arcFileName Name of an arc file (with no path) 404 * @return A BitarchiveARCFile for the given file, or null if the file does not exist. 405 */ 406 public BitarchiveARCFile lookup(String arcFileName) { 407 ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFileName"); 408 verifyFilelistUpToDate(); 409 for (File archivePath : archivedFiles.keySet()) { 410 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 411 if (checkArchiveDir(archiveDir)) { 412 File archiveFile = new File(archiveDir, arcFileName); 413 if (archiveFile.exists()) { 414 return new BitarchiveARCFile(arcFileName, archiveFile); 415 } 416 } 417 } 418 // the arcfile named "arcFileName" does not exist in this bitarchive. 419 log.trace("The arcfile named '{}' does not exist in this bitarchve", arcFileName); 420 return null; 421 } 422 423 /** 424 * Calculate how many bytes are used by all files in a directory. 425 * 426 * @param filedir An existing directory with a FILE_DIRECTORY_NAME subdir and a TEMPORARY_DIRECTORY_NAME subdir. 427 * @return Number of bytes used by all files in the directory (not including overhead from partially used blocks). 428 */ 429 private long calculateBytesUsed(File filedir) { 430 long used = 0; 431 File[] files = new File(filedir, Constants.FILE_DIRECTORY_NAME).listFiles(); 432 // Check, that listFiles method returns valid information 433 if (files != null) { 434 for (File datafiles : files) { 435 if (datafiles.isFile()) { 436 // Add size of file f to amount of bytes used. 437 used += datafiles.length(); 438 } else { 439 log.warn("Non-file '{}' found in archive", datafiles.getAbsolutePath()); 440 } 441 } 442 } else { 443 log.warn("filedir does not contain a directory named: {}", Constants.FILE_DIRECTORY_NAME); 444 } 445 File[] tempfiles = new File(filedir, Constants.TEMPORARY_DIRECTORY_NAME).listFiles(); 446 // Check, that listFiles() method returns valid information 447 if (tempfiles != null) { 448 for (File tempfile : tempfiles) { 449 if (tempfile.isFile()) { 450 // Add size of file f to amount of bytes used. 451 used += tempfile.length(); 452 } else { 453 log.warn("Non-file '{}' found in archive", tempfile.getAbsolutePath()); 454 } 455 } 456 } else { 457 log.warn("filedir does not contain a directory named: {}", Constants.TEMPORARY_DIRECTORY_NAME); 458 } 459 File[] atticfiles = new File(filedir, Constants.ATTIC_DIRECTORY_NAME).listFiles(); 460 // Check, that listFiles() method returns valid information 461 if (atticfiles != null) { 462 for (File atticfile : atticfiles) { 463 if (atticfile.isFile()) { 464 // Add size of file tempfiles[i] to amount of bytes used. 465 used += atticfile.length(); 466 } else { 467 log.warn("Non-file '{}' found in archive", atticfile.getAbsolutePath()); 468 } 469 } 470 } else { 471 log.warn("filedir does not contain a directory named: {}", Constants.ATTIC_DIRECTORY_NAME); 472 } 473 return used; 474 } 475 476 /** 477 * Get the one and only instance of the bitarchive admin. 478 * 479 * @return A BitarchiveAdmin object 480 */ 481 public static synchronized BitarchiveAdmin getInstance() { 482 if (instance == null) { 483 instance = new BitarchiveAdmin(); 484 } 485 return instance; 486 } 487 488 /** 489 * Close down the bitarchive admin. Currently has no data to store. 490 */ 491 public void close() { 492 archivedFiles.clear(); 493 archiveTime.clear(); 494 instance = null; 495 } 496 497 /** 498 * Return the path used to store files that are removed by RemoveAndGetFileMessage. 499 * 500 * @param existingFile a File object for an existing file in the bitarchive 501 * @return The full path of the file in the attic dir 502 */ 503 public File getAtticPath(File existingFile) { 504 ArgumentNotValid.checkNotNull(existingFile, "File existingFile"); 505 // Find where the file resides so we can use a dir in the same place. 506 try { 507 existingFile = existingFile.getCanonicalFile(); 508 } catch (IOException e) { 509 throw new IOFailure("Could not retrieve canonical file for '" + existingFile + "'.", e); 510 } 511 String arcFileName = existingFile.getName(); 512 File parentDir = existingFile.getParentFile().getParentFile(); 513 if (!isBitarchiveDirectory(parentDir)) { 514 log.warn("Attempt to get attic path for non-archived file '{}'", existingFile); 515 throw new ArgumentNotValid("File should belong to a bitarchive dir," + " but " + existingFile + " doesn't"); 516 } 517 // Ensure that 'atticdir' exists. If it doesn't, it is created 518 File atticdir = new File(parentDir, Constants.ATTIC_DIRECTORY_NAME); 519 ApplicationUtils.dirMustExist(atticdir); 520 return new File(atticdir, arcFileName); 521 } 522 523}