001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.archive.bitarchive; 025 026import java.io.File; 027import java.io.IOException; 028import java.util.ArrayList; 029import java.util.Collections; 030import java.util.HashMap; 031import java.util.LinkedHashMap; 032import java.util.List; 033import java.util.Map; 034import java.util.regex.Pattern; 035 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039import dk.netarkivet.archive.ArchiveSettings; 040import dk.netarkivet.archive.Constants; 041import dk.netarkivet.common.exceptions.ArgumentNotValid; 042import dk.netarkivet.common.exceptions.IOFailure; 043import dk.netarkivet.common.exceptions.PermissionDenied; 044import dk.netarkivet.common.exceptions.UnknownID; 045import dk.netarkivet.common.utils.ApplicationUtils; 046import dk.netarkivet.common.utils.FileUtils; 047import dk.netarkivet.common.utils.Settings; 048 049/** 050 * This class handles file lookup and encapsulates the actual placement of files. 051 */ 052public final class BitarchiveAdmin { 053 054 /** The class logger. */ 055 private static final Logger log = LoggerFactory.getLogger(BitarchiveAdmin.class); 056 057 /** 058 * Map containing the archive directories and their files. The file must be the CanonicalFile (use 059 * getCanonicalFile() before access). 060 */ 061 private Map<File, List<String>> archivedFiles = Collections 062 .synchronizedMap(new LinkedHashMap<File, List<String>>()); 063 064 /** 065 * Map containing the time for the latest update of the filelist for each archive directory. The file must be the 066 * CanonicalFile (use getCanonicalFile() before access). 067 */ 068 private Map<File, Long> archiveTime = Collections.synchronizedMap(new HashMap<File, Long>()); 069 070 /** Singleton instance. */ 071 private static BitarchiveAdmin instance; 072 073 /** How much space we must have available *in a single dir* before we will listen for new uploads. */ 074 private final long minSpaceLeft; 075 076 /** How much space we require available *in every dir* after we have accepted an upload. */ 077 private final long minSpaceRequired; 078 079 /** Are readOnly Directories allowed. */ 080 private final boolean readOnlyAllowed; 081 082 /** 083 * Creates a new BitarchiveAdmin object for an existing bit archive. Reads the directories to use from settings. 084 * 085 * @throws ArgumentNotValid If the settings for minSpaceLeft is non-positive or the setting for minSpaceRequired is 086 * negative. 087 * @throws PermissionDenied If any of the directories cannot be created or are not writeable. 088 * @throws IOFailure If it is not possible to retrieve the canonical file for the directories. 089 */ 090 private BitarchiveAdmin() throws ArgumentNotValid, PermissionDenied, IOFailure { 091 String[] filedirnames = Settings.getAll(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR); 092 minSpaceLeft = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_LEFT); 093 readOnlyAllowed = Settings.getBoolean(ArchiveSettings.BITARCHIVE_READ_ONLY_ALLOWED); 094 095 log.info("readOnlyAllowed is: {}", readOnlyAllowed); 096 097 // Check, if value of minSpaceLeft is greater than zero 098 if (minSpaceLeft <= 0L) { 099 log.warn("Wrong setting of minSpaceLeft read from Settings: {}", minSpaceLeft); 100 throw new ArgumentNotValid("Wrong setting of minSpaceLeft read from Settings: " + minSpaceLeft); 101 } 102 103 minSpaceRequired = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_REQUIRED); 104 // Check, if value of minSpaceRequired is at least zero 105 if (minSpaceLeft < 0L) { 106 log.warn("Wrong setting of minSpaceRequired read from Settings: {}", minSpaceLeft); 107 throw new ArgumentNotValid("Wrong setting of minSpaceRequired read from Settings: " + minSpaceLeft); 108 } 109 110 log.info("Requiring at least {} bytes free.", minSpaceRequired); 111 log.info("Listening if at least {} bytes free.", minSpaceLeft); 112 113 try { 114 for (String filedirname : filedirnames) { 115 File basedir = new File(filedirname).getCanonicalFile(); 116 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 117 // Ensure that 'filedir' exists. If it doesn't, it is created 118 ApplicationUtils.dirMustExist(filedir); 119 120 File tempdir = new File(basedir, Constants.TEMPORARY_DIRECTORY_NAME); 121 // Ensure that 'tempdir' exists. If it doesn't, it is created 122 ApplicationUtils.dirMustExist(tempdir); 123 124 File atticdir = new File(basedir, Constants.ATTIC_DIRECTORY_NAME); 125 // Ensure that 'atticdir' exists. If it doesn't, it is created 126 ApplicationUtils.dirMustExist(atticdir); 127 128 // initialise the variables archivedFiles and archiveTime 129 archivedFiles.put(basedir, new ArrayList<String>()); 130 archiveTime.put(basedir, 0L); 131 updateFileList(basedir); 132 133 final Long bytesUsedInDir = calculateBytesUsed(basedir); 134 log.info( 135 "Using bit archive directorys {'{}', '{}', '{}'} under base directory: '{}' with {} bytes of content and {} bytes free. Current number of files archived: {}", 136 Constants.FILE_DIRECTORY_NAME, Constants.TEMPORARY_DIRECTORY_NAME, 137 Constants.ATTIC_DIRECTORY_NAME, basedir, bytesUsedInDir, FileUtils.getBytesFree(basedir), 138 archivedFiles.get(basedir).size()); 139 } 140 } catch (IOException e) { 141 throw new IOFailure("Could not retrieve Canonical files.", e); 142 } 143 } 144 145 /** 146 * Checks whether the filelist is up to date. If the modified timestamp for the a directory is larger than the last 147 * recorded timestamp, then the stored filelist is updated with the latest changes. 148 */ 149 public synchronized void verifyFilelistUpToDate() { 150 for (File basedir : archivedFiles.keySet()) { 151 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 152 long lastModified = filedir.lastModified(); 153 if (archiveTime.get(basedir) < lastModified) { 154 // Update the list and the time. 155 updateFileList(basedir); 156 } 157 } 158 } 159 160 /** 161 * Method for updating the filelist for a given basedir. 162 * 163 * @param basedir The basedir to update the filelist for. 164 * @throws ArgumentNotValid If basedir is null or if it not a proper directory. 165 * @throws UnknownID If the basedir cannot be found both the archivedFiles map or the archiveTime map. 166 * @throws IOFailure If it is not possible to retrieve the canonical file for the basedir. 167 */ 168 public void updateFileList(File basedir) throws ArgumentNotValid, UnknownID, IOFailure { 169 ArgumentNotValid.checkNotNull(basedir, "File basedir"); 170 // ensure that it is the CanonicalFile for the directory. 171 try { 172 basedir = basedir.getCanonicalFile(); 173 } catch (IOException e) { 174 throw new IOFailure("Could not retrieve canonical path for file '" + basedir, e); 175 } 176 if (!basedir.isDirectory()) { 177 throw new ArgumentNotValid("The directory '" + basedir.getPath() + " is not a proper directory."); 178 } 179 if (!archivedFiles.containsKey(basedir) || !archiveTime.containsKey(basedir)) { 180 throw new UnknownID("The directory '" + basedir + "' is not known " 181 + "by the settings. Known directories are: " + archivedFiles.keySet()); 182 } 183 184 log.debug("Updating the filelist for '{}'.", basedir); 185 File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME); 186 if (!checkArchiveDir(filedir)) { 187 throw new UnknownID("The directory '" + filedir + "' is not an " + " archive directory."); 188 } 189 190 String[] dirContent = filedir.list(); 191 List<String> filenames = new ArrayList<String>(dirContent.length); 192 for (String file : dirContent) { 193 // ensure that only files are handled 194 if ((new File(filedir, file)).isFile()) { 195 filenames.add(file); 196 } else { 197 log.warn("The file '{}' in directory {} is not a proper file.", file, filedir.getPath()); 198 } 199 } 200 archivedFiles.put(basedir, filenames); 201 archiveTime.put(basedir, filedir.lastModified()); 202 } 203 204 /** 205 * Returns true if we have at least one dir with the required amount of space left. 206 * 207 * @return true if we have at least one dir with the required amount of space left, otherwise false. 208 */ 209 public boolean hasEnoughSpace() { 210 for (File dir : archivedFiles.keySet()) { 211 if (checkArchiveDir(dir) && FileUtils.getBytesFree(dir) > minSpaceLeft) { 212 return true; 213 } 214 } 215 return false; 216 } 217 218 /** 219 * Returns a temporary place for the the file to be stored. 220 * 221 * @param arcFileName The simple name (i.e. no dirs) of the ARC file. 222 * @param requestedSize How large the file is in bytes. 223 * @return The path where the arcFile should go. 224 * @throws ArgumentNotValid If arcFileName is null or empty, or requestedSize is negative. 225 * @throws IOFailure if there is no more room left to store this file of size=requestedSize 226 */ 227 public File getTemporaryPath(String arcFileName, long requestedSize) throws ArgumentNotValid, IOFailure { 228 ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFile"); 229 ArgumentNotValid.checkNotNegative(requestedSize, "requestedSize"); 230 231 for (File dir : archivedFiles.keySet()) { 232 long bytesFreeInDir = FileUtils.getBytesFree(dir); 233 // TODO If it turns out that it has not enough space for 234 // this file, it should resend the Upload message 235 // This should probably be handled in the 236 // method BitarchiveServer.visit(UploadMessage msg) 237 // This is bug 1586. 238 239 if (checkArchiveDir(dir) && (bytesFreeInDir > minSpaceLeft) 240 && (bytesFreeInDir - requestedSize > minSpaceRequired)) { 241 File filedir = new File(dir, Constants.TEMPORARY_DIRECTORY_NAME); 242 return new File(filedir, arcFileName); 243 } else { 244 log.debug("Not enough space on dir '{}' for file '{}' of size {} bytes. Only {} left", dir.getPath(), 245 arcFileName, requestedSize, bytesFreeInDir); 246 } 247 } 248 log.warn("No space left in dirs: {}, to store file '{}' of size {}", archivedFiles.keySet(), arcFileName, 249 requestedSize); 250 throw new IOFailure("No space left in dirs: " + archivedFiles.keySet() + ", to store file '" + arcFileName 251 + "' of size " + requestedSize); 252 } 253 254 /** 255 * Moves a file from temporary storage to file storage. 256 * <p> 257 * Note: It is checked, if tempLocation resides in directory TEMPORARY_DIRECTORY_NAME and whether the parent of 258 * tempLocation is a Bitarchive directory. 259 * 260 * @param tempLocation The temporary location where the file was stored. This must be a path returned from 261 * getTemporaryPath 262 * @return The location where the file is now stored 263 * @throws IOFailure if tempLocation is not created from getTemporaryPath or file cannot be moved to Storage 264 * location. 265 * @throws ArgumentNotValid If the tempLocation file is null. 266 */ 267 public File moveToStorage(File tempLocation) throws IOFailure, ArgumentNotValid { 268 ArgumentNotValid.checkNotNull(tempLocation, "tempLocation"); 269 try { 270 tempLocation = tempLocation.getCanonicalFile(); 271 } catch (IOException e) { 272 throw new IOFailure("Could not retrieve the canonical file for '" + tempLocation + "'.", e); 273 } 274 String arcFileName = tempLocation.getName(); 275 276 /** 277 * Check, that File tempLocation resides in directory TEMPORARY_DIRECTORY_NAME. 278 */ 279 File arcFilePath = tempLocation.getParentFile(); 280 if (arcFilePath == null || !arcFilePath.getName().equals(Constants.TEMPORARY_DIRECTORY_NAME)) { 281 throw new IOFailure("Location '" + tempLocation + "' is not in " + "tempdir '" 282 + Constants.TEMPORARY_DIRECTORY_NAME + "'"); 283 } 284 /** 285 * Check, that arcFilePath (now known to be TEMPORARY_DIRECTORY_NAME) resides in a recognised Bitarchive 286 * Directory. 287 */ 288 File basedir = arcFilePath.getParentFile(); 289 if (basedir == null || !isBitarchiveDirectory(basedir)) { 290 throw new IOFailure("Location '" + tempLocation + "' is not in " + "recognised archive directory."); 291 } 292 /** 293 * Move File tempLocation to new location: storageFile 294 */ 295 File storagePath = new File(basedir, Constants.FILE_DIRECTORY_NAME); 296 File storageFile = new File(storagePath, arcFileName); 297 if (!tempLocation.renameTo(storageFile)) { 298 throw new IOFailure("Could not move '" + tempLocation.getPath() + "' to '" + storageFile.getPath() + "'"); 299 } 300 // Update the filelist for the directory with this new file. 301 final File canonicalFile; 302 try { 303 canonicalFile = basedir.getCanonicalFile(); 304 } catch (IOException e) { 305 throw new IOFailure("Could not find canonical file for " + basedir.getAbsolutePath(), e); 306 } 307 final List<String> fileList = archivedFiles.get(canonicalFile); 308 if (fileList == null) { 309 throw new UnknownID("The directory " + basedir.getAbsolutePath() + " was not found in the map of known directories and files."); 310 } 311 fileList.add(arcFileName); 312 archiveTime.put(canonicalFile, storagePath.lastModified()); 313 return storageFile; 314 } 315 316 /** 317 * Checks whether a directory is one of the known bitarchive directories. 318 * 319 * @param theDir The dir to check 320 * @return true If it is a valid archive directory; otherwise returns false. 321 * @throws IOFailure if theDir or one of the valid archive directories does not exist 322 * @throws ArgumentNotValid if theDir is null 323 */ 324 protected boolean isBitarchiveDirectory(File theDir) throws ArgumentNotValid, IOFailure { 325 ArgumentNotValid.checkNotNull(theDir, "File theDir"); 326 try { 327 return archivedFiles.containsKey(theDir.getCanonicalFile()); 328 } catch (IOException e) { 329 throw new IOFailure("Could not retrieve the canonical file for '" + theDir + "'.", e); 330 } 331 } 332 333 /** 334 * Check that the given file is a directory appropriate for use. A File is appropiate to use as archivedir, if the 335 * file is an existing directory, and is writable by this java process. 336 * 337 * @param file A file 338 * @return true, if 'file' is an existing directory and is writable. 339 * @throws ArgumentNotValid if 'file' is null. 340 */ 341 private boolean checkArchiveDir(File file) throws ArgumentNotValid { 342 ArgumentNotValid.checkNotNull(file, "file"); 343 344 if (readOnlyAllowed) { 345 log.info("checkArchiveDir skipped for Directory '{}'. Assuming directory is ok due to readOnlyAllowed-Setting set to true", file); 346 return true; 347 } 348 349 if (!file.exists()) { 350 log.warn("Directory '{}' does not exist", file); 351 return false; 352 } 353 if (!file.isDirectory()) { 354 log.warn("Directory '{}' is not a directory after all", file); 355 return false; 356 } 357 if (!file.canWrite()) { 358 log.warn("Directory '{}' is not writable", file); 359 return false; 360 } 361 return true; 362 } 363 364 /** 365 * Return array with references to all files in the archive. 366 * 367 * @return array with references to all files in the archive 368 */ 369 public File[] getFiles() { 370 // Ensure that the filelist is up to date. 371 verifyFilelistUpToDate(); 372 List<File> files = new ArrayList<File>(); 373 for (File archivePath : archivedFiles.keySet()) { 374 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 375 if (checkArchiveDir(archiveDir)) { 376 List<String> filesHere = archivedFiles.get(archivePath); 377 for (String filename : filesHere) { 378 files.add(new File(archiveDir, filename)); 379 } 380 } 381 } 382 return files.toArray(new File[files.size()]); 383 } 384 385 /** 386 * Return an array of all files in this archive that match a given regular expression on the filename. 387 * 388 * @param regexp A precompiled regular expression matching whole filenames. This will probably be given to a 389 * FilenameFilter 390 * @return An array of all the files in this bitarchive that exactly match the regular expression on the filename 391 * (sans paths). 392 */ 393 public File[] getFilesMatching(final Pattern regexp) { 394 ArgumentNotValid.checkNotNull(regexp, "Pattern regexp"); 395 // Ensure that the filelist is up to date. 396 verifyFilelistUpToDate(); 397 List<File> files = new ArrayList<File>(); 398 for (File archivePath : archivedFiles.keySet()) { 399 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 400 if (checkArchiveDir(archiveDir)) { 401 for (String filename : archivedFiles.get(archivePath)) { 402 if (regexp.matcher(filename).matches()) { 403 files.add(new File(archiveDir, filename)); 404 } 405 } 406 } 407 } 408 return files.toArray(new File[files.size()]); 409 } 410 411 /** 412 * Return the path that a given arc file can be found in. 413 * 414 * @param arcFileName Name of an arc file (with no path) 415 * @return A BitarchiveARCFile for the given file, or null if the file does not exist. 416 */ 417 public BitarchiveARCFile lookup(String arcFileName) { 418 ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFileName"); 419 verifyFilelistUpToDate(); 420 for (File archivePath : archivedFiles.keySet()) { 421 File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME); 422 if (checkArchiveDir(archiveDir)) { 423 File archiveFile = new File(archiveDir, arcFileName); 424 if (archiveFile.exists()) { 425 return new BitarchiveARCFile(arcFileName, archiveFile); 426 } 427 } 428 } 429 // the arcfile named "arcFileName" does not exist in this bitarchive. 430 log.trace("The arcfile named '{}' does not exist in this bitarchve", arcFileName); 431 return null; 432 } 433 434 /** 435 * Calculate how many bytes are used by all files in a directory. 436 * 437 * @param filedir An existing directory with a FILE_DIRECTORY_NAME subdir and a TEMPORARY_DIRECTORY_NAME subdir. 438 * @return Number of bytes used by all files in the directory (not including overhead from partially used blocks). 439 */ 440 private long calculateBytesUsed(File filedir) { 441 long used = 0; 442 File[] files = new File(filedir, Constants.FILE_DIRECTORY_NAME).listFiles(); 443 // Check, that listFiles method returns valid information 444 if (files != null) { 445 for (File datafiles : files) { 446 if (datafiles.isFile()) { 447 // Add size of file f to amount of bytes used. 448 used += datafiles.length(); 449 } else { 450 log.warn("Non-file '{}' found in archive", datafiles.getAbsolutePath()); 451 } 452 } 453 } else { 454 log.warn("filedir does not contain a directory named: {}", Constants.FILE_DIRECTORY_NAME); 455 } 456 File[] tempfiles = new File(filedir, Constants.TEMPORARY_DIRECTORY_NAME).listFiles(); 457 // Check, that listFiles() method returns valid information 458 if (tempfiles != null) { 459 for (File tempfile : tempfiles) { 460 if (tempfile.isFile()) { 461 // Add size of file f to amount of bytes used. 462 used += tempfile.length(); 463 } else { 464 log.warn("Non-file '{}' found in archive", tempfile.getAbsolutePath()); 465 } 466 } 467 } else { 468 log.warn("filedir does not contain a directory named: {}", Constants.TEMPORARY_DIRECTORY_NAME); 469 } 470 File[] atticfiles = new File(filedir, Constants.ATTIC_DIRECTORY_NAME).listFiles(); 471 // Check, that listFiles() method returns valid information 472 if (atticfiles != null) { 473 for (File atticfile : atticfiles) { 474 if (atticfile.isFile()) { 475 // Add size of file tempfiles[i] to amount of bytes used. 476 used += atticfile.length(); 477 } else { 478 log.warn("Non-file '{}' found in archive", atticfile.getAbsolutePath()); 479 } 480 } 481 } else { 482 log.warn("filedir does not contain a directory named: {}", Constants.ATTIC_DIRECTORY_NAME); 483 } 484 return used; 485 } 486 487 /** 488 * Get the one and only instance of the bitarchive admin. 489 * 490 * @return A BitarchiveAdmin object 491 */ 492 public static synchronized BitarchiveAdmin getInstance() { 493 if (instance == null) { 494 instance = new BitarchiveAdmin(); 495 } 496 return instance; 497 } 498 499 /** 500 * Close down the bitarchive admin. Currently has no data to store. 501 */ 502 public void close() { 503 archivedFiles.clear(); 504 archiveTime.clear(); 505 instance = null; 506 } 507 508 /** 509 * Return the path used to store files that are removed by RemoveAndGetFileMessage. 510 * 511 * @param existingFile a File object for an existing file in the bitarchive 512 * @return The full path of the file in the attic dir 513 */ 514 public File getAtticPath(File existingFile) { 515 ArgumentNotValid.checkNotNull(existingFile, "File existingFile"); 516 // Find where the file resides so we can use a dir in the same place. 517 try { 518 existingFile = existingFile.getCanonicalFile(); 519 } catch (IOException e) { 520 throw new IOFailure("Could not retrieve canonical file for '" + existingFile + "'.", e); 521 } 522 String arcFileName = existingFile.getName(); 523 File parentDir = existingFile.getParentFile().getParentFile(); 524 if (!isBitarchiveDirectory(parentDir)) { 525 log.warn("Attempt to get attic path for non-archived file '{}'", existingFile); 526 throw new ArgumentNotValid("File should belong to a bitarchive dir," + " but " + existingFile + " doesn't"); 527 } 528 // Ensure that 'atticdir' exists. If it doesn't, it is created 529 File atticdir = new File(parentDir, Constants.ATTIC_DIRECTORY_NAME); 530 ApplicationUtils.dirMustExist(atticdir); 531 return new File(atticdir, arcFileName); 532 } 533 534}