001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.archive.bitarchive;
025
026import java.io.File;
027import java.io.IOException;
028import java.util.ArrayList;
029import java.util.Collections;
030import java.util.HashMap;
031import java.util.LinkedHashMap;
032import java.util.List;
033import java.util.Map;
034import java.util.regex.Pattern;
035
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import dk.netarkivet.archive.ArchiveSettings;
040import dk.netarkivet.archive.Constants;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.exceptions.PermissionDenied;
044import dk.netarkivet.common.exceptions.UnknownID;
045import dk.netarkivet.common.utils.ApplicationUtils;
046import dk.netarkivet.common.utils.FileUtils;
047import dk.netarkivet.common.utils.Settings;
048
049/**
050 * This class handles file lookup and encapsulates the actual placement of files.
051 */
052public final class BitarchiveAdmin {
053
054    /** The class logger. */
055    private static final Logger log = LoggerFactory.getLogger(BitarchiveAdmin.class);
056
057    /**
058     * Map containing the archive directories and their files. The file must be the CanonicalFile (use
059     * getCanonicalFile() before access).
060     */
061    private Map<File, List<String>> archivedFiles = Collections
062            .synchronizedMap(new LinkedHashMap<File, List<String>>());
063
064    /**
065     * Map containing the time for the latest update of the filelist for each archive directory. The file must be the
066     * CanonicalFile (use getCanonicalFile() before access).
067     */
068    private Map<File, Long> archiveTime = Collections.synchronizedMap(new HashMap<File, Long>());
069
070    /** Singleton instance. */
071    private static BitarchiveAdmin instance;
072
073    /** How much space we must have available *in a single dir* before we will listen for new uploads. */
074    private final long minSpaceLeft;
075
076    /** How much space we require available *in every dir* after we have accepted an upload. */
077    private final long minSpaceRequired;
078
079    /** Are readOnly Directories allowed. */
080    private final boolean readOnlyAllowed;
081
082    /**
083     * Creates a new BitarchiveAdmin object for an existing bit archive. Reads the directories to use from settings.
084     *
085     * @throws ArgumentNotValid If the settings for minSpaceLeft is non-positive or the setting for minSpaceRequired is
086     * negative.
087     * @throws PermissionDenied If any of the directories cannot be created or are not writeable.
088     * @throws IOFailure If it is not possible to retrieve the canonical file for the directories.
089     */
090    private BitarchiveAdmin() throws ArgumentNotValid, PermissionDenied, IOFailure {
091        String[] filedirnames = Settings.getAll(ArchiveSettings.BITARCHIVE_SERVER_FILEDIR);
092        minSpaceLeft = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_LEFT);
093        readOnlyAllowed = Settings.getBoolean(ArchiveSettings.BITARCHIVE_READ_ONLY_ALLOWED);
094
095        log.info("readOnlyAllowed is: {}", readOnlyAllowed);
096
097        // Check, if value of minSpaceLeft is greater than zero
098        if (minSpaceLeft <= 0L) {
099            log.warn("Wrong setting of minSpaceLeft read from Settings: {}", minSpaceLeft);
100            throw new ArgumentNotValid("Wrong setting of minSpaceLeft read from Settings: " + minSpaceLeft);
101        }
102
103        minSpaceRequired = Settings.getLong(ArchiveSettings.BITARCHIVE_MIN_SPACE_REQUIRED);
104        // Check, if value of minSpaceRequired is at least zero
105        if (minSpaceLeft < 0L) {
106            log.warn("Wrong setting of minSpaceRequired read from Settings: {}", minSpaceLeft);
107            throw new ArgumentNotValid("Wrong setting of minSpaceRequired read from Settings: " + minSpaceLeft);
108        }
109
110        log.info("Requiring at least {} bytes free.", minSpaceRequired);
111        log.info("Listening if at least {} bytes free.", minSpaceLeft);
112
113        try {
114            for (String filedirname : filedirnames) {
115                File basedir = new File(filedirname).getCanonicalFile();
116                File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
117                // Ensure that 'filedir' exists. If it doesn't, it is created
118                ApplicationUtils.dirMustExist(filedir);
119
120                File tempdir = new File(basedir, Constants.TEMPORARY_DIRECTORY_NAME);
121                // Ensure that 'tempdir' exists. If it doesn't, it is created
122                ApplicationUtils.dirMustExist(tempdir);
123
124                File atticdir = new File(basedir, Constants.ATTIC_DIRECTORY_NAME);
125                // Ensure that 'atticdir' exists. If it doesn't, it is created
126                ApplicationUtils.dirMustExist(atticdir);
127
128                // initialise the variables archivedFiles and archiveTime
129                archivedFiles.put(basedir, new ArrayList<String>());
130                archiveTime.put(basedir, 0L);
131                updateFileList(basedir);
132
133                final Long bytesUsedInDir = calculateBytesUsed(basedir);
134                log.info(
135                        "Using bit archive directorys {'{}', '{}', '{}'} under base directory: '{}' with {} bytes of content and {} bytes free. Current number of files archived: {}",
136                        Constants.FILE_DIRECTORY_NAME, Constants.TEMPORARY_DIRECTORY_NAME,
137                        Constants.ATTIC_DIRECTORY_NAME, basedir, bytesUsedInDir, FileUtils.getBytesFree(basedir),
138                        archivedFiles.get(basedir).size());
139            }
140        } catch (IOException e) {
141            throw new IOFailure("Could not retrieve Canonical files.", e);
142        }
143    }
144
145    /**
146     * Checks whether the filelist is up to date. If the modified timestamp for the a directory is larger than the last
147     * recorded timestamp, then the stored filelist is updated with the latest changes.
148     */
149    public synchronized void verifyFilelistUpToDate() {
150        for (File basedir : archivedFiles.keySet()) {
151            File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
152            long lastModified = filedir.lastModified();
153            if (archiveTime.get(basedir) < lastModified) {
154                // Update the list and the time.
155                updateFileList(basedir);
156            }
157        }
158    }
159
160    /**
161     * Method for updating the filelist for a given basedir.
162     *
163     * @param basedir The basedir to update the filelist for.
164     * @throws ArgumentNotValid If basedir is null or if it not a proper directory.
165     * @throws UnknownID If the basedir cannot be found both the archivedFiles map or the archiveTime map.
166     * @throws IOFailure If it is not possible to retrieve the canonical file for the basedir.
167     */
168    public void updateFileList(File basedir) throws ArgumentNotValid, UnknownID, IOFailure {
169        ArgumentNotValid.checkNotNull(basedir, "File basedir");
170        // ensure that it is the CanonicalFile for the directory.
171        try {
172            basedir = basedir.getCanonicalFile();
173        } catch (IOException e) {
174            throw new IOFailure("Could not retrieve canonical path for file '" + basedir, e);
175        }
176        if (!basedir.isDirectory()) {
177            throw new ArgumentNotValid("The directory '" + basedir.getPath() + " is not a proper directory.");
178        }
179        if (!archivedFiles.containsKey(basedir) || !archiveTime.containsKey(basedir)) {
180            throw new UnknownID("The directory '" + basedir + "' is not known "
181                    + "by the settings. Known directories are: " + archivedFiles.keySet());
182        }
183
184        log.debug("Updating the filelist for '{}'.", basedir);
185        File filedir = new File(basedir, Constants.FILE_DIRECTORY_NAME);
186        if (!checkArchiveDir(filedir)) {
187            throw new UnknownID("The directory '" + filedir + "' is not an " + " archive directory.");
188        }
189
190        String[] dirContent = filedir.list();
191        List<String> filenames = new ArrayList<String>(dirContent.length);
192        for (String file : dirContent) {
193            // ensure that only files are handled
194            if ((new File(filedir, file)).isFile()) {
195                filenames.add(file);
196            } else {
197                log.warn("The file '{}' in directory {} is not a proper file.", file, filedir.getPath());
198            }
199        }
200        archivedFiles.put(basedir, filenames);
201        archiveTime.put(basedir, filedir.lastModified());
202    }
203
204    /**
205     * Returns true if we have at least one dir with the required amount of space left.
206     *
207     * @return true if we have at least one dir with the required amount of space left, otherwise false.
208     */
209    public boolean hasEnoughSpace() {
210        for (File dir : archivedFiles.keySet()) {
211            if (checkArchiveDir(dir) && FileUtils.getBytesFree(dir) > minSpaceLeft) {
212                return true;
213            }
214        }
215        return false;
216    }
217
218    /**
219     * Returns a temporary place for the the file to be stored.
220     *
221     * @param arcFileName The simple name (i.e. no dirs) of the ARC file.
222     * @param requestedSize How large the file is in bytes.
223     * @return The path where the arcFile should go.
224     * @throws ArgumentNotValid If arcFileName is null or empty, or requestedSize is negative.
225     * @throws IOFailure if there is no more room left to store this file of size=requestedSize
226     */
227    public File getTemporaryPath(String arcFileName, long requestedSize) throws ArgumentNotValid, IOFailure {
228        ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFile");
229        ArgumentNotValid.checkNotNegative(requestedSize, "requestedSize");
230
231        for (File dir : archivedFiles.keySet()) {
232            long bytesFreeInDir = FileUtils.getBytesFree(dir);
233            // TODO If it turns out that it has not enough space for
234            // this file, it should resend the Upload message
235            // This should probably be handled in the
236            // method BitarchiveServer.visit(UploadMessage msg)
237            // This is bug 1586.
238
239            if (checkArchiveDir(dir) && (bytesFreeInDir > minSpaceLeft)
240                    && (bytesFreeInDir - requestedSize > minSpaceRequired)) {
241                File filedir = new File(dir, Constants.TEMPORARY_DIRECTORY_NAME);
242                return new File(filedir, arcFileName);
243            } else {
244                log.debug("Not enough space on dir '{}' for file '{}' of size {} bytes. Only {} left", dir.getPath(),
245                        arcFileName, requestedSize, bytesFreeInDir);
246            }
247        }
248        log.warn("No space left in dirs: {}, to store file '{}' of size {}", archivedFiles.keySet(), arcFileName,
249                requestedSize);
250        throw new IOFailure("No space left in dirs: " + archivedFiles.keySet() + ", to store file '" + arcFileName
251                + "' of size " + requestedSize);
252    }
253
254    /**
255     * Moves a file from temporary storage to file storage.
256     * <p>
257     * Note: It is checked, if tempLocation resides in directory TEMPORARY_DIRECTORY_NAME and whether the parent of
258     * tempLocation is a Bitarchive directory.
259     *
260     * @param tempLocation The temporary location where the file was stored. This must be a path returned from
261     * getTemporaryPath
262     * @return The location where the file is now stored
263     * @throws IOFailure if tempLocation is not created from getTemporaryPath or file cannot be moved to Storage
264     * location.
265     * @throws ArgumentNotValid If the tempLocation file is null.
266     */
267    public File moveToStorage(File tempLocation) throws IOFailure, ArgumentNotValid {
268        ArgumentNotValid.checkNotNull(tempLocation, "tempLocation");
269        try {
270            tempLocation = tempLocation.getCanonicalFile();
271        } catch (IOException e) {
272            throw new IOFailure("Could not retrieve the canonical file for '" + tempLocation + "'.", e);
273        }
274        String arcFileName = tempLocation.getName();
275
276        /**
277         * Check, that File tempLocation resides in directory TEMPORARY_DIRECTORY_NAME.
278         */
279        File arcFilePath = tempLocation.getParentFile();
280        if (arcFilePath == null || !arcFilePath.getName().equals(Constants.TEMPORARY_DIRECTORY_NAME)) {
281            throw new IOFailure("Location '" + tempLocation + "' is not in " + "tempdir '"
282                    + Constants.TEMPORARY_DIRECTORY_NAME + "'");
283        }
284        /**
285         * Check, that arcFilePath (now known to be TEMPORARY_DIRECTORY_NAME) resides in a recognised Bitarchive
286         * Directory.
287         */
288        File basedir = arcFilePath.getParentFile();
289        if (basedir == null || !isBitarchiveDirectory(basedir)) {
290            throw new IOFailure("Location '" + tempLocation + "' is not in " + "recognised archive directory.");
291        }
292        /**
293         * Move File tempLocation to new location: storageFile
294         */
295        File storagePath = new File(basedir, Constants.FILE_DIRECTORY_NAME);
296        File storageFile = new File(storagePath, arcFileName);
297        if (!tempLocation.renameTo(storageFile)) {
298            throw new IOFailure("Could not move '" + tempLocation.getPath() + "' to '" + storageFile.getPath() + "'");
299        }
300        // Update the filelist for the directory with this new file.
301        final File canonicalFile;
302        try {
303            canonicalFile = basedir.getCanonicalFile();
304        } catch (IOException e) {
305            throw new IOFailure("Could not find canonical file for " + basedir.getAbsolutePath(), e);
306        }
307        final List<String> fileList = archivedFiles.get(canonicalFile);
308        if (fileList == null) {
309            throw new UnknownID("The directory " + basedir.getAbsolutePath() + " was not found in the map of known directories and files.");
310        }
311        fileList.add(arcFileName);
312        archiveTime.put(canonicalFile, storagePath.lastModified());
313        return storageFile;
314    }
315
316    /**
317     * Checks whether a directory is one of the known bitarchive directories.
318     *
319     * @param theDir The dir to check
320     * @return true If it is a valid archive directory; otherwise returns false.
321     * @throws IOFailure if theDir or one of the valid archive directories does not exist
322     * @throws ArgumentNotValid if theDir is null
323     */
324    protected boolean isBitarchiveDirectory(File theDir) throws ArgumentNotValid, IOFailure {
325        ArgumentNotValid.checkNotNull(theDir, "File theDir");
326        try {
327            return archivedFiles.containsKey(theDir.getCanonicalFile());
328        } catch (IOException e) {
329            throw new IOFailure("Could not retrieve the canonical file for '" + theDir + "'.", e);
330        }
331    }
332
333    /**
334     * Check that the given file is a directory appropriate for use. A File is appropiate to use as archivedir, if the
335     * file is an existing directory, and is writable by this java process.
336     *
337     * @param file A file
338     * @return true, if 'file' is an existing directory and is writable.
339     * @throws ArgumentNotValid if 'file' is null.
340     */
341    private boolean checkArchiveDir(File file) throws ArgumentNotValid {
342        ArgumentNotValid.checkNotNull(file, "file");
343
344        if (readOnlyAllowed) {
345            log.info("checkArchiveDir skipped for Directory '{}'. Assuming directory is ok due to readOnlyAllowed-Setting set to true", file);
346            return true;
347        }
348
349        if (!file.exists()) {
350            log.warn("Directory '{}' does not exist", file);
351            return false;
352        }
353        if (!file.isDirectory()) {
354            log.warn("Directory '{}' is not a directory after all", file);
355            return false;
356        }
357        if (!file.canWrite()) {
358            log.warn("Directory '{}' is not writable", file);
359            return false;
360        }
361        return true;
362    }
363
364    /**
365     * Return array with references to all files in the archive.
366     *
367     * @return array with references to all files in the archive
368     */
369    public File[] getFiles() {
370        // Ensure that the filelist is up to date.
371        verifyFilelistUpToDate();
372        List<File> files = new ArrayList<File>();
373        for (File archivePath : archivedFiles.keySet()) {
374            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
375            if (checkArchiveDir(archiveDir)) {
376                List<String> filesHere = archivedFiles.get(archivePath);
377                for (String filename : filesHere) {
378                    files.add(new File(archiveDir, filename));
379                }
380            }
381        }
382        return files.toArray(new File[files.size()]);
383    }
384
385    /**
386     * Return an array of all files in this archive that match a given regular expression on the filename.
387     *
388     * @param regexp A precompiled regular expression matching whole filenames. This will probably be given to a
389     * FilenameFilter
390     * @return An array of all the files in this bitarchive that exactly match the regular expression on the filename
391     * (sans paths).
392     */
393    public File[] getFilesMatching(final Pattern regexp) {
394        ArgumentNotValid.checkNotNull(regexp, "Pattern regexp");
395        // Ensure that the filelist is up to date.
396        verifyFilelistUpToDate();
397        List<File> files = new ArrayList<File>();
398        for (File archivePath : archivedFiles.keySet()) {
399            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
400            if (checkArchiveDir(archiveDir)) {
401                for (String filename : archivedFiles.get(archivePath)) {
402                    if (regexp.matcher(filename).matches()) {
403                        files.add(new File(archiveDir, filename));
404                    }
405                }
406            }
407        }
408        return files.toArray(new File[files.size()]);
409    }
410
411    /**
412     * Return the path that a given arc file can be found in.
413     *
414     * @param arcFileName Name of an arc file (with no path)
415     * @return A BitarchiveARCFile for the given file, or null if the file does not exist.
416     */
417    public BitarchiveARCFile lookup(String arcFileName) {
418        ArgumentNotValid.checkNotNullOrEmpty(arcFileName, "arcFileName");
419        verifyFilelistUpToDate();
420        for (File archivePath : archivedFiles.keySet()) {
421            File archiveDir = new File(archivePath, Constants.FILE_DIRECTORY_NAME);
422            if (checkArchiveDir(archiveDir)) {
423                File archiveFile = new File(archiveDir, arcFileName);
424                if (archiveFile.exists()) {
425                    return new BitarchiveARCFile(arcFileName, archiveFile);
426                }
427            }
428        }
429        // the arcfile named "arcFileName" does not exist in this bitarchive.
430        log.trace("The arcfile named '{}' does not exist in this bitarchve", arcFileName);
431        return null;
432    }
433
434    /**
435     * Calculate how many bytes are used by all files in a directory.
436     *
437     * @param filedir An existing directory with a FILE_DIRECTORY_NAME subdir and a TEMPORARY_DIRECTORY_NAME subdir.
438     * @return Number of bytes used by all files in the directory (not including overhead from partially used blocks).
439     */
440    private long calculateBytesUsed(File filedir) {
441        long used = 0;
442        File[] files = new File(filedir, Constants.FILE_DIRECTORY_NAME).listFiles();
443        // Check, that listFiles method returns valid information
444        if (files != null) {
445            for (File datafiles : files) {
446                if (datafiles.isFile()) {
447                    // Add size of file f to amount of bytes used.
448                    used += datafiles.length();
449                } else {
450                    log.warn("Non-file '{}' found in archive", datafiles.getAbsolutePath());
451                }
452            }
453        } else {
454            log.warn("filedir does not contain a directory named: {}", Constants.FILE_DIRECTORY_NAME);
455        }
456        File[] tempfiles = new File(filedir, Constants.TEMPORARY_DIRECTORY_NAME).listFiles();
457        // Check, that listFiles() method returns valid information
458        if (tempfiles != null) {
459            for (File tempfile : tempfiles) {
460                if (tempfile.isFile()) {
461                    // Add size of file f to amount of bytes used.
462                    used += tempfile.length();
463                } else {
464                    log.warn("Non-file '{}' found in archive", tempfile.getAbsolutePath());
465                }
466            }
467        } else {
468            log.warn("filedir does not contain a directory named: {}", Constants.TEMPORARY_DIRECTORY_NAME);
469        }
470        File[] atticfiles = new File(filedir, Constants.ATTIC_DIRECTORY_NAME).listFiles();
471        // Check, that listFiles() method returns valid information
472        if (atticfiles != null) {
473            for (File atticfile : atticfiles) {
474                if (atticfile.isFile()) {
475                    // Add size of file tempfiles[i] to amount of bytes used.
476                    used += atticfile.length();
477                } else {
478                    log.warn("Non-file '{}' found in archive", atticfile.getAbsolutePath());
479                }
480            }
481        } else {
482            log.warn("filedir does not contain a directory named: {}", Constants.ATTIC_DIRECTORY_NAME);
483        }
484        return used;
485    }
486
487    /**
488     * Get the one and only instance of the bitarchive admin.
489     *
490     * @return A BitarchiveAdmin object
491     */
492    public static synchronized BitarchiveAdmin getInstance() {
493        if (instance == null) {
494            instance = new BitarchiveAdmin();
495        }
496        return instance;
497    }
498
499    /**
500     * Close down the bitarchive admin. Currently has no data to store.
501     */
502    public void close() {
503        archivedFiles.clear();
504        archiveTime.clear();
505        instance = null;
506    }
507
508    /**
509     * Return the path used to store files that are removed by RemoveAndGetFileMessage.
510     *
511     * @param existingFile a File object for an existing file in the bitarchive
512     * @return The full path of the file in the attic dir
513     */
514    public File getAtticPath(File existingFile) {
515        ArgumentNotValid.checkNotNull(existingFile, "File existingFile");
516        // Find where the file resides so we can use a dir in the same place.
517        try {
518            existingFile = existingFile.getCanonicalFile();
519        } catch (IOException e) {
520            throw new IOFailure("Could not retrieve canonical file for '" + existingFile + "'.", e);
521        }
522        String arcFileName = existingFile.getName();
523        File parentDir = existingFile.getParentFile().getParentFile();
524        if (!isBitarchiveDirectory(parentDir)) {
525            log.warn("Attempt to get attic path for non-archived file '{}'", existingFile);
526            throw new ArgumentNotValid("File should belong to a bitarchive dir," + " but " + existingFile + " doesn't");
527        }
528        // Ensure that 'atticdir' exists. If it doesn't, it is created
529        File atticdir = new File(parentDir, Constants.ATTIC_DIRECTORY_NAME);
530        ApplicationUtils.dirMustExist(atticdir);
531        return new File(atticdir, arcFileName);
532    }
533
534}