001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.archive.bitarchive;
025
026import java.io.File;
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.LinkedHashMap;
032import java.util.List;
033import java.util.Map;
034import java.util.regex.Pattern;
035
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import dk.netarkivet.archive.ArchiveSettings;
040import dk.netarkivet.archive.Constants;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.exceptions.PermissionDenied;
044import dk.netarkivet.common.exceptions.UnknownID;
045import dk.netarkivet.common.utils.ApplicationUtils;
046import dk.netarkivet.common.utils.FileUtils;
047import dk.netarkivet.common.utils.Settings;
048
049/**
050 * This class handles file lookup and encapsulates the actual placement of files.
051 */
052public final class BitarchiveAdmin {
053
054    /** The class logger. */
055    private static final Logger log = LoggerFactory.getLogger(BitarchiveAdmin.class);
056
057    /**
058     * Map containing the archive directories and their files. The file must be the CanonicalFile (use
059     * getCanonicalFile() before access).
060     */
061    private Map<File, List<String>> archivedFiles = Collections
062            .synchronizedMap(new LinkedHashMap<File, List<String>>());
063
064    /**
065     * Map containing the time for the latest update of the filelist for each archive directory. The file must be the
066     * CanonicalFile (use getCanonicalFile() before access).
067     */
068    private Map<File, Long> archiveTime = Collections.synchronizedMap(new HashMap<File, Long>());
069
070    /** Singleton instance. */
071    private static BitarchiveAdmin instance;
072
073    /** How much space we must have available *in a single dir* before we will listen for new uploads. */
074    private final long minSpaceLeft;
075
076    /** How much space we require available *in every dir* after we have accepted an upload. */
077    private final long minSpaceRequired;
078
079    /**
080     * Creates a new BitarchiveAdmin object for an existing bit archive. Reads the directories to use from settings.
081     *
082     * @throws ArgumentNotValid If the settings for minSpaceLeft is non-positive or the setting for minSpaceRequired is
083     * negative.
084     * @throws PermissionDenied If any of the directories cannot be created or are not writeable.
085     * @throws IOFailure If it is not possible to retrieve the canonical file for the directories.
086     */
087    private BitarchiveAdmin() throws ArgumentNotValid, PermissionDenied, IOFailure {
088        String[] filedirnames = Settings.getAll(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR);
089        minSpaceLeft = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_LEFT);
090        // Check, if value of minSpaceLeft is greater than zero
091        if (minSpaceLeft <= 0L) {
092            log.warn("Wrong setting of minSpaceLeft read from Settings: {}", minSpaceLeft);
093            throw new ArgumentNotValid("Wrong setting of minSpaceLeft read from Settings: " + minSpaceLeft);
094        }
095
096        minSpaceRequired = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_REQUIRED);
097        // Check, if value of minSpaceRequired is at least zero
098        if (minSpaceLeft < 0L) {
099            log.warn("Wrong setting of minSpaceRequired read from Settings: {}", minSpaceLeft);
100            throw new ArgumentNotValid("Wrong setting of minSpaceRequired read from Settings: " + minSpaceLeft);
101        }
102
103        log.info("Requiring at least {} bytes free.", minSpaceRequired);
104        log.info("Listening if at least {} bytes free.", minSpaceLeft);
105
106        try {
107            for (String filedirname : filedirnames) {
108                File basedir = new File(filedirname).getCanonicalFile();
109                File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
110
111                // Ensure that 'filedir' exists. If it doesn't, it is created
112                ApplicationUtils.dirMustExist(filedir);
113                File tempdir = new File(basedir, Constants.TEMPORARY_DIRECTORY_NAME);
114
115                // Ensure that 'tempdir' exists. If it doesn't, it is created
116                ApplicationUtils.dirMustExist(tempdir);
117
118                File atticdir = new File(basedir, Constants.ATTIC_DIRECTORY_NAME);
119
120                // Ensure that 'atticdir' exists. If it doesn't, it is created
121                ApplicationUtils.dirMustExist(atticdir);
122
123                // initialise the variables archivedFiles and archiveTime
124                archivedFiles.put(basedir, new ArrayList<String>());
125                archiveTime.put(basedir, 0L);
126                updateFileList(basedir);
127
128                final Long bytesUsedInDir = calculateBytesUsed(basedir);
129                log.info(
130                        "Using bit archive directorys {'{}', '{}', '{}'} under base directory: '{}' with {} bytes of content and {} bytes free. Current number of files archived: {}",
131                        Constants.FILE_DIRECTORY_NAME, Constants.TEMPORARY_DIRECTORY_NAME,
132                        Constants.ATTIC_DIRECTORY_NAME, basedir, bytesUsedInDir, FileUtils.getBytesFree(basedir),
133                        archivedFiles.get(basedir).size());
134            }
135        } catch (IOException e) {
136            throw new IOFailure("Could not retrieve Canonical files.", e);
137        }
138    }
139
140    /**
141     * Checks whether the filelist is up to date. If the modified timestamp for the a directory is larger than the last
142     * recorded timestamp, then the stored filelist is updated with the latest changes.
143     */
144    public synchronized void verifyFilelistUpToDate() {
145        for (File basedir : archivedFiles.keySet()) {
146            File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
147            long lastModified = filedir.lastModified();
148            if (archiveTime.get(basedir) < lastModified) {
149                // Update the list and the time.
150                updateFileList(basedir);
151            }
152        }
153    }
154
155    /**
156     * Method for updating the filelist for a given basedir.
157     *
158     * @param basedir The basedir to update the filelist for.
159     * @throws ArgumentNotValid If basedir is null or if it not a proper directory.
160     * @throws UnknownID If the basedir cannot be found both the archivedFiles map or the archiveTime map.
161     * @throws IOFailure If it is not possible to retrieve the canonical file for the basedir.
162     */
163    public void updateFileList(File basedir) throws ArgumentNotValid, UnknownID, IOFailure {
164        ArgumentNotValid.checkNotNull(basedir, "File basedir");
165        // ensure that it is the CanonicalFile for the directory.
166        try {
167            basedir = basedir.getCanonicalFile();
168        } catch (IOException e) {
169            throw new IOFailure("Could not retrieve canonical path for file '" + basedir, e);
170        }
171        if (!basedir.isDirectory()) {
172            throw new ArgumentNotValid("The directory '" + basedir.getPath() + " is not a proper directory.");
173        }
174        if (!archivedFiles.containsKey(basedir) || !archiveTime.containsKey(basedir)) {
175            throw new UnknownID("The directory '" + basedir + "' is not known "
176                    + "by the settings. Known directories are: " + archivedFiles.keySet());
177        }
178
179        log.debug("Updating the filelist for '{}'.", basedir);
180        File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
181        if (!checkArchiveDir(filedir)) {
182            throw new UnknownID("The directory '" + filedir + "' is not an " + " archive directory.");
183        }
184
185        String[] dirContent = filedir.list();
186        List<String> filenames = new ArrayList<String>(dirContent.length);
187        for (String file : dirContent) {
188            // ensure that only files are handled
189            if ((new File(filedir, file)).isFile()) {
190                filenames.add(file);
191            } else {
192                log.warn("The file '{}' in directory {} is not a proper file.", file, filedir.getPath());
193            }
194        }
195        archivedFiles.put(basedir, filenames);
196        archiveTime.put(basedir, filedir.lastModified());
197    }
198
199    /**
200     * Returns true if we have at least one dir with the required amount of space left.
201     *
202     * @return true if we have at least one dir with the required amount of space left, otherwise false.
203     */
204    public boolean hasEnoughSpace() {
205        for (File dir : archivedFiles.keySet()) {
206            if (checkArchiveDir(dir) && FileUtils.getBytesFree(dir) > minSpaceLeft) {
207                return true;
208            }
209        }
210        return false;
211    }
212
213    /**
214     * Returns a temporary place for the the file to be stored.
215     *
216     * @param arcFileName The simple name (i.e. no dirs) of the ARC file.
217     * @param requestedSize How large the file is in bytes.
218     * @return The path where the arcFile should go.
219     * @throws ArgumentNotValid If arcFileName is null or empty, or requestedSize is negative.
220     * @throws IOFailure if there is no more room left to store this file of size=requestedSize
221     */
222    public File getTemporaryPath(String arcFileName, long requestedSize) throws ArgumentNotValid, IOFailure {
223        ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFile");
224        ArgumentNotValid.checkNotNegative(requestedSize, "requestedSize");
225
226        for (File dir : archivedFiles.keySet()) {
227            long bytesFreeInDir = FileUtils.getBytesFree(dir);
228            // TODO If it turns out that it has not enough space for
229            // this file, it should resend the Upload message
230            // This should probably be handled in the
231            // method BitarchiveServer.visit(UploadMessage msg)
232            // This is bug 1586.
233
234            if (checkArchiveDir(dir) && (bytesFreeInDir > minSpaceLeft)
235                    && (bytesFreeInDir - requestedSize > minSpaceRequired)) {
236                File filedir = new File(dir, Constants.TEMPORARY_DIRECTORY_NAME);
237                return new File(filedir, arcFileName);
238            } else {
239                log.debug("Not enough space on dir '{}' for file '{}' of size {} bytes. Only {} left", dir.getPath(),
240                        arcFileName, requestedSize, bytesFreeInDir);
241            }
242        }
243        log.warn("No space left in dirs: {}, to store file '{}' of size {}", archivedFiles.keySet(), arcFileName,
244                requestedSize);
245        throw new IOFailure("No space left in dirs: " + archivedFiles.keySet() + ", to store file '" + arcFileName
246                + "' of size " + requestedSize);
247    }
248
249    /**
250     * Moves a file from temporary storage to file storage.
251     * <p>
252     * Note: It is checked, if tempLocation resides in directory TEMPORARY_DIRECTORY_NAME and whether the parent of
253     * tempLocation is a Bitarchive directory.
254     *
255     * @param tempLocation The temporary location where the file was stored. This must be a path returned from
256     * getTemporaryPath
257     * @return The location where the file is now stored
258     * @throws IOFailure if tempLocation is not created from getTemporaryPath or file cannot be moved to Storage
259     * location.
260     * @throws ArgumentNotValid If the tempLocation file is null.
261     */
262    public File moveToStorage(File tempLocation) throws IOFailure, ArgumentNotValid {
263        ArgumentNotValid.checkNotNull(tempLocation, "tempLocation");
264        try {
265            tempLocation = tempLocation.getCanonicalFile();
266        } catch (IOException e) {
267            throw new IOFailure("Could not retrieve the canonical file for '" + tempLocation + "'.", e);
268        }
269        String arcFileName = tempLocation.getName();
270
271        /**
272         * Check, that File tempLocation resides in directory TEMPORARY_DIRECTORY_NAME.
273         */
274        File arcFilePath = tempLocation.getParentFile();
275        if (arcFilePath == null || !arcFilePath.getName().equals(Constants.TEMPORARY_DIRECTORY_NAME)) {
276            throw new IOFailure("Location '" + tempLocation + "' is not in " + "tempdir '"
277                    + Constants.TEMPORARY_DIRECTORY_NAME + "'");
278        }
279        /**
280         * Check, that arcFilePath (now known to be TEMPORARY_DIRECTORY_NAME) resides in a recognised Bitarchive
281         * Directory.
282         */
283        File basedir = arcFilePath.getParentFile();
284        if (basedir == null || !isBitarchiveDirectory(basedir)) {
285            throw new IOFailure("Location '" + tempLocation + "' is not in " + "recognised archive directory.");
286        }
287        /**
288         * Move File tempLocation to new location: storageFile
289         */
290        File storagePath = new File(basedir, Constants.FILE_DIRECTORY_NAME);
291        File storageFile = new File(storagePath, arcFileName);
292        if (!tempLocation.renameTo(storageFile)) {
293            throw new IOFailure("Could not move '" + tempLocation.getPath() + "' to '" + storageFile.getPath() + "'");
294        }
295        // Update the filelist for the directory with this new file.
296        final File canonicalFile;
297        try {
298            canonicalFile = basedir.getCanonicalFile();
299        } catch (IOException e) {
300            throw new IOFailure("Could not find canonical file for " + basedir.getAbsolutePath(), e);
301        }
302        final List<String> fileList = archivedFiles.get(canonicalFile);
303        if (fileList == null) {
304            throw new UnknownID("The directory " + basedir.getAbsolutePath() + " was not found in the map of known directories and files.");
305        }
306        fileList.add(arcFileName);
307        archiveTime.put(canonicalFile, storagePath.lastModified());
308        return storageFile;
309    }
310
311    /**
312     * Checks whether a directory is one of the known bitarchive directories.
313     *
314     * @param theDir The dir to check
315     * @return true If it is a valid archive directory; otherwise returns false.
316     * @throws IOFailure if theDir or one of the valid archive directories does not exist
317     * @throws ArgumentNotValid if theDir is null
318     */
319    protected boolean isBitarchiveDirectory(File theDir) throws ArgumentNotValid, IOFailure {
320        ArgumentNotValid.checkNotNull(theDir, "File theDir");
321        try {
322            return archivedFiles.containsKey(theDir.getCanonicalFile());
323        } catch (IOException e) {
324            throw new IOFailure("Could not retrieve the canonical file for '" + theDir + "'.", e);
325        }
326    }
327
328    /**
329     * Check that the given file is a directory appropriate for use. A File is appropiate to use as archivedir, if the
330     * file is an existing directory, and is writable by this java process.
331     *
332     * @param file A file
333     * @return true, if 'file' is an existing directory and is writable.
334     * @throws ArgumentNotValid if 'file' is null.
335     */
336    private boolean checkArchiveDir(File file) throws ArgumentNotValid {
337        ArgumentNotValid.checkNotNull(file, "file");
338        if (!file.exists()) {
339            log.warn("Directory '{}' does not exist", file);
340            return false;
341        }
342        if (!file.isDirectory()) {
343            log.warn("Directory '{}' is not a directory after all", file);
344            return false;
345        }
346        if (!file.canWrite()) {
347            log.warn("Directory '{}' is not writable", file);
348            return false;
349        }
350        return true;
351    }
352
353    /**
354     * Return array with references to all files in the archive.
355     *
356     * @return array with references to all files in the archive
357     */
358    public File[] getFiles() {
359        // Ensure that the filelist is up to date.
360        verifyFilelistUpToDate();
361        List<File> files = new ArrayList<File>();
362        for (File archivePath : archivedFiles.keySet()) {
363            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
364            if (checkArchiveDir(archiveDir)) {
365                List<String> filesHere = archivedFiles.get(archivePath);
366                for (String filename : filesHere) {
367                    files.add(new File(archiveDir, filename));
368                }
369            }
370        }
371        return files.toArray(new File[files.size()]);
372    }
373
374    /**
375     * Return an array of all files in this archive that match a given regular expression on the filename.
376     *
377     * @param regexp A precompiled regular expression matching whole filenames. This will probably be given to a
378     * FilenameFilter
379     * @return An array of all the files in this bitarchive that exactly match the regular expression on the filename
380     * (sans paths).
381     */
382    public File[] getFilesMatching(final Pattern regexp) {
383        ArgumentNotValid.checkNotNull(regexp, "Pattern regexp");
384        // Ensure that the filelist is up to date.
385        verifyFilelistUpToDate();
386        List<File> files = new ArrayList<File>();
387        for (File archivePath : archivedFiles.keySet()) {
388            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
389            if (checkArchiveDir(archiveDir)) {
390                for (String filename : archivedFiles.get(archivePath)) {
391                    if (regexp.matcher(filename).matches()) {
392                        files.add(new File(archiveDir, filename));
393                    }
394                }
395            }
396        }
397        return files.toArray(new File[files.size()]);
398    }
399
400    /**
401     * Return the path that a given arc file can be found in.
402     *
403     * @param arcFileName Name of an arc file (with no path)
404     * @return A BitarchiveARCFile for the given file, or null if the file does not exist.
405     */
406    public BitarchiveARCFile lookup(String arcFileName) {
407        ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFileName");
408        verifyFilelistUpToDate();
409        for (File archivePath : archivedFiles.keySet()) {
410            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
411            if (checkArchiveDir(archiveDir)) {
412                File archiveFile = new File(archiveDir, arcFileName);
413                if (archiveFile.exists()) {
414                    return new BitarchiveARCFile(arcFileName, archiveFile);
415                }
416            }
417        }
418        // the arcfile named "arcFileName" does not exist in this bitarchive.
419        log.trace("The arcfile named '{}' does not exist in this bitarchve", arcFileName);
420        return null;
421    }
422
423    /**
424     * Calculate how many bytes are used by all files in a directory.
425     *
426     * @param filedir An existing directory with a FILE_DIRECTORY_NAME subdir and a TEMPORARY_DIRECTORY_NAME subdir.
427     * @return Number of bytes used by all files in the directory (not including overhead from partially used blocks).
428     */
429    private long calculateBytesUsed(File filedir) {
430        long used = 0;
431        File[] files = new File(filedir, Constants.FILE_DIRECTORY_NAME).listFiles();
432        // Check, that listFiles method returns valid information
433        if (files != null) {
434            for (File datafiles : files) {
435                if (datafiles.isFile()) {
436                    // Add size of file f to amount of bytes used.
437                    used += datafiles.length();
438                } else {
439                    log.warn("Non-file '{}' found in archive", datafiles.getAbsolutePath());
440                }
441            }
442        } else {
443            log.warn("filedir does not contain a directory named: {}", Constants.FILE_DIRECTORY_NAME);
444        }
445        File[] tempfiles = new File(filedir, Constants.TEMPORARY_DIRECTORY_NAME).listFiles();
446        // Check, that listFiles() method returns valid information
447        if (tempfiles != null) {
448            for (File tempfile : tempfiles) {
449                if (tempfile.isFile()) {
450                    // Add size of file f to amount of bytes used.
451                    used += tempfile.length();
452                } else {
453                    log.warn("Non-file '{}' found in archive", tempfile.getAbsolutePath());
454                }
455            }
456        } else {
457            log.warn("filedir does not contain a directory named: {}", Constants.TEMPORARY_DIRECTORY_NAME);
458        }
459        File[] atticfiles = new File(filedir, Constants.ATTIC_DIRECTORY_NAME).listFiles();
460        // Check, that listFiles() method returns valid information
461        if (atticfiles != null) {
462            for (File atticfile : atticfiles) {
463                if (atticfile.isFile()) {
464                    // Add size of file tempfiles[i] to amount of bytes used.
465                    used += atticfile.length();
466                } else {
467                    log.warn("Non-file '{}' found in archive", atticfile.getAbsolutePath());
468                }
469            }
470        } else {
471            log.warn("filedir does not contain a directory named: {}", Constants.ATTIC_DIRECTORY_NAME);
472        }
473        return used;
474    }
475
476    /**
477     * Get the one and only instance of the bitarchive admin.
478     *
479     * @return A BitarchiveAdmin object
480     */
481    public static synchronized BitarchiveAdmin getInstance() {
482        if (instance == null) {
483            instance = new BitarchiveAdmin();
484        }
485        return instance;
486    }
487
488    /**
489     * Close down the bitarchive admin. Currently has no data to store.
490     */
491    public void close() {
492        archivedFiles.clear();
493        archiveTime.clear();
494        instance = null;
495    }
496
497    /**
498     * Return the path used to store files that are removed by RemoveAndGetFileMessage.
499     *
500     * @param existingFile a File object for an existing file in the bitarchive
501     * @return The full path of the file in the attic dir
502     */
503    public File getAtticPath(File existingFile) {
504        ArgumentNotValid.checkNotNull(existingFile, "File existingFile");
505        // Find where the file resides so we can use a dir in the same place.
506        try {
507            existingFile = existingFile.getCanonicalFile();
508        } catch (IOException e) {
509            throw new IOFailure("Could not retrieve canonical file for '" + existingFile + "'.", e);
510        }
511        String arcFileName = existingFile.getName();
512        File parentDir = existingFile.getParentFile().getParentFile();
513        if (!isBitarchiveDirectory(parentDir)) {
514            log.warn("Attempt to get attic path for non-archived file '{}'", existingFile);
515            throw new ArgumentNotValid("File should belong to a bitarchive dir," + " but " + existingFile + " doesn't");
516        }
517        // Ensure that 'atticdir' exists. If it doesn't, it is created
518        File atticdir = new File(parentDir, Constants.ATTIC_DIRECTORY_NAME);
519        ApplicationUtils.dirMustExist(atticdir);
520        return new File(atticdir, arcFileName);
521    }
522
523}