001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.common.distribute.arcrepository;
025
026import java.io.File;
027import java.io.FileOutputStream;
028import java.io.FilenameFilter;
029import java.io.IOException;
030import java.io.OutputStream;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.List;
034import java.util.regex.Pattern;
035
036import org.archive.io.ArchiveReader;
037import org.archive.io.ArchiveReaderFactory;
038import org.archive.io.ArchiveRecord;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import dk.netarkivet.common.distribute.FileRemoteFile;
043import dk.netarkivet.common.exceptions.ArgumentNotValid;
044import dk.netarkivet.common.exceptions.IOFailure;
045import dk.netarkivet.common.exceptions.IllegalState;
046import dk.netarkivet.common.exceptions.PermissionDenied;
047import dk.netarkivet.common.utils.ChecksumCalculator;
048import dk.netarkivet.common.utils.FileUtils;
049import dk.netarkivet.common.utils.Settings;
050import dk.netarkivet.common.utils.batch.BatchLocalFiles;
051import dk.netarkivet.common.utils.batch.ChecksumJob;
052import dk.netarkivet.common.utils.batch.FileBatchJob;
053
054/**
055 * A simple implementation of ArcRepositoryClient that just has a number of local directories where it stores its files.
056 * This class doesn't implement credentials checking or checksum storing!
057 */
058public class LocalArcRepositoryClient implements ArcRepositoryClient {
059
060    /** The logger for this class. */
061    private static final Logger log = LoggerFactory.getLogger(LocalArcRepositoryClient.class);
062
063    /** The default place in classpath where the settings file can be found. */
064    private static String defaultSettingsClasspath = "dk/netarkivet/common/distribute/arcrepository/"
065            + "LocalArcRepositoryClientSettings.xml";
066
067    /*
068     * The static initialiser is called when the class is loaded. It will add default values for all settings defined in
069     * this class, by loading them from a settings.xml file in classpath.
070     */
071    static {
072        Settings.addDefaultClasspathSettings(defaultSettingsClasspath);
073    }
074
075    /** List of the directories that we store files in. Non-absolute dirs are relative to the current directory. */
076    private final List<File> storageDirs = new ArrayList<File>(1);
077
078    /** Store the file in the directories designated by this setting. */
079    private static final String FILE_DIRS = "settings.common.arcrepositoryClient.fileDir";
080    /** The credentials used to correct data in the archive. */
081    private static final String CREDENTIALS_SETTING = "settings.archive.bitarchive.thisCredentials";
082
083    /** Create a new LocalArcRepositoryClient based on current settings. */
084    public LocalArcRepositoryClient() {
085        List<String> fileDirs = Arrays.asList(Settings.getAll(FILE_DIRS));
086        for (String fileName : fileDirs) {
087            File f = new File(fileName);
088            FileUtils.createDir(f);
089            log.info("directory '{}' is part of this local archive repository", f.getAbsolutePath());
090            storageDirs.add(f);
091        }
092    }
093
094    @Override
095    public void close() {
096    }
097
098    /**
099     * Store the given file in the ArcRepository. After storing, the file is deleted.
100     *
101     * @param file A file to be stored. Must exist.
102     * @throws IOFailure thrown if store is unsuccessful, or failed to clean up files after the store operation.
103     * @throws IllegalState if file already exists.
104     * @throws ArgumentNotValid if file parameter is null or file is not an existing file.
105     */
106    @Override
107    public void store(File file) throws IOFailure, ArgumentNotValid {
108        ArgumentNotValid.checkNotNull(file, "File file");
109        ArgumentNotValid.checkTrue(file.exists(), "File '" + file + "' does not exist");
110        if (findFile(file.getName()) != null) {
111            throw new IllegalState("A file with the name '" + file.getName() + " is already stored");
112        }
113        for (File dir : storageDirs) {
114            if (dir.canWrite() && FileUtils.getBytesFree(dir) > file.length()) {
115                FileUtils.moveFile(file, new File(dir, file.getName()));
116                return;
117            }
118        }
119        throw new IOFailure("Not enough room for '" + file + "' in any of the dirs " + storageDirs);
120    }
121
122    /**
123     * Gets a single ARC record out of the ArcRepository.
124     *
125     * @param arcfile The name of a file containing the desired record.
126     * @param index The offset of the desired record in the file
127     * @return a BitarchiveRecord-object, or null if request times out or object is not found.
128     * @throws ArgumentNotValid on null or empty filenames, or if index is negative.
129     * @throws IOFailure If the get operation failed.
130     */
131    @Override
132    public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid {
133        ArgumentNotValid.checkNotNullOrEmpty(arcfile, "String arcfile");
134        ArgumentNotValid.checkNotNegative(index, "long index");
135        File f = findFile(arcfile);
136        if (f == null) {
137            log.warn("File '{}' does not exist. Null BitarchiveRecord returned", arcfile);
138            return null;
139        }
140        ArchiveReader reader = null;
141        ArchiveRecord record = null;
142        try {
143            reader = ArchiveReaderFactory.get(f, index);
144            record = reader.get();
145            return new BitarchiveRecord(record, arcfile);
146        } catch (IOException e) {
147            throw new IOFailure("Error reading record from '" + arcfile + "' offset " + index, e);
148        } finally {
149            if (record != null) {
150                try {
151                    record.close();
152                } catch (IOException e) {
153                    log.warn("Error closing ARC record '{}'", record, e);
154                }
155            }
156            if (reader != null) {
157                try {
158                    reader.close();
159                } catch (IOException e) {
160                    log.warn("Error closing ARC reader '{}'", reader, e);
161                }
162            }
163        }
164    }
165
166    /**
167     * Retrieves a file from an ArcRepository and places it in a local file.
168     *
169     * @param arcfilename Name of the arcfile to retrieve.
170     * @param replica The bitarchive to retrieve the data from. (Note argument is ignored)
171     * @param toFile Filename of a place where the file fetched can be put.
172     * @throws ArgumentNotValid if arcfilename is null or empty, or if toFile is null
173     * @throws IOFailure if there are problems reading or writing file, or the file with the given arcfilename could not
174     * be found.
175     */
176    @Override
177    public void getFile(String arcfilename, Replica replica, File toFile) {
178        ArgumentNotValid.checkNotNullOrEmpty(arcfilename, "String arcfilename");
179        ArgumentNotValid.checkNotNull(toFile, "File toFile");
180        File f = findFile(arcfilename);
181        if (f != null) {
182            FileUtils.copyFile(f, toFile);
183        } else {
184            throw new IOFailure("File '" + arcfilename + "' does not exist");
185        }
186    }
187
188    /**
189     * Runs a batch job on each file in the ArcRepository.
190     *
191     * @param job An object that implements the FileBatchJob interface. The initialize() method will be called before
192     * processing and the finish() method will be called afterwards. The process() method will be called with each File
193     * entry. An optional function postProcess() allows handling the combined results of the batchjob, e.g. summing the
194     * results, sorting, etc.
195     * @param replicaId The archive to execute the job on.
196     * @param args The arguments for the batchjob. This can be null.
197     * @return The status of the batch job after it ended.
198     * @throws ArgumentNotValid If the job is null or the replicaId is either null or the empty string.
199     * @throws IOFailure If a problem occurs during processing the batchjob.
200     */
201    @Override
202    public BatchStatus batch(final FileBatchJob job, String replicaId, String... args) throws ArgumentNotValid,
203            IOFailure {
204        ArgumentNotValid.checkNotNull(job, "FileBatchJob job");
205        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
206        OutputStream os = null;
207        File resultFile;
208        try {
209            resultFile = File.createTempFile("batch", replicaId, FileUtils.getTempDir());
210            os = new FileOutputStream(resultFile);
211            List<File> files = new ArrayList<File>();
212            final FilenameFilter filenameFilter = new FilenameFilter() {
213                public boolean accept(File dir, String name) {
214                    Pattern filenamePattern = job.getFilenamePattern();
215                    return new File(dir, name).isFile()
216                            && (filenamePattern == null || filenamePattern.matcher(name).matches());
217                }
218            };
219            for (File dir : storageDirs) {
220                File[] filesInDir = dir.listFiles(filenameFilter);
221                if (filesInDir != null) {
222                    files.addAll(Arrays.asList(filesInDir));
223                }
224            }
225            BatchLocalFiles batcher = new BatchLocalFiles(files.toArray(new File[files.size()]));
226            batcher.run(job, os);
227        } catch (IOException e) {
228            throw new IOFailure("Cannot perform batch '" + job + "'", e);
229        } finally {
230            if (os != null) {
231                try {
232                    os.close();
233                } catch (IOException e) {
234                    log.warn("Error closing batch output stream '{}'", os, e);
235                }
236            }
237        }
238        return new BatchStatus(replicaId, job.getFilesFailed(), job.getNoOfFilesProcessed(), new FileRemoteFile(
239                resultFile), job.getExceptions());
240    }
241
242    /**
243     * Updates the administrative data in the ArcRepository for a given file and replica. This implementation does
244     * nothing.
245     *
246     * @param fileName The name of a file stored in the ArcRepository.
247     * @param bitarchiveId The id of the replica that the administrative data for fileName is wrong for.
248     * @param newval What the administrative data will be updated to.
249     */
250    @Override
251    public void updateAdminData(String fileName, String bitarchiveId, ReplicaStoreState newval) {
252    }
253
254    /**
255     * Updates the checksum kept in the ArcRepository for a given file. It is the responsibility of the ArcRepository
256     * implementation to ensure that this checksum matches that of the underlying files. This implementation does
257     * nothing.
258     *
259     * @param filename The name of a file stored in the ArcRepository.
260     * @param checksum The new checksum.
261     */
262    @Override
263    public void updateAdminChecksum(String filename, String checksum) {
264    }
265
266    /**
267     * Remove a file from one part of the ArcRepository, retrieving a copy for security purposes. This is typically used
268     * when repairing a file that has been corrupted.
269     *
270     * @param fileName The name of the file to remove.
271     * @param bitarchiveId The id of the replica from which to remove the file. Not used in this implementation, may be
272     * null.
273     * @param checksum The checksum of the file to be removed.
274     * @param credentials A string that shows that the user is allowed to perform this operation.
275     * @return A local copy of the file removed.
276     * @throws ArgumentNotValid On null or empty parameters for fileName, checksum or credentials.
277     * @throws IOFailure On IO trouble.
278     * @throws PermissionDenied On wrong MD5 sum or wrong credentials.
279     */
280    @Override
281    public File removeAndGetFile(String fileName, String bitarchiveId, String checksum, String credentials) {
282        // Ignores bitarchiveName, checksum, and credentials for now
283        ArgumentNotValid.checkNotNullOrEmpty(fileName, "String fileName");
284        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
285        ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials");
286        File file = findFile(fileName);
287        if (file == null) {
288            throw new IOFailure("Cannot find file '" + fileName + "'");
289        }
290        if (!ChecksumCalculator.calculateMd5(file).equals(checksum)) {
291            throw new PermissionDenied("Wrong checksum for removing file '" + fileName + "'");
292        }
293        if (!credentials.equals(Settings.get(CREDENTIALS_SETTING))) {
294            throw new PermissionDenied("Wrong credentials for removing file '" + fileName + "'");
295        }
296        File copiedTo = null;
297        try {
298            copiedTo = File.createTempFile("removeAndGetFile", fileName);
299        } catch (IOException e) {
300            throw new IOFailure("Cannot make temp file to copy '" + fileName + "' into", e);
301        }
302        FileUtils.moveFile(file, copiedTo);
303        return copiedTo;
304    }
305
306    /**
307     * Returns a File object for a filename if it exists in the archive.
308     *
309     * @param filename Name of file to find.
310     * @return A File object for the filename if the file exists, otherwise null.
311     */
312    private File findFile(String filename) {
313        for (File dir : storageDirs) {
314            final File file = new File(dir, filename);
315            if (file.isFile()) {
316                return file;
317            }
318        }
319        return null;
320    }
321
322    /**
323     * Method for retrieving the checksums of all the files of the replica.
324     *
325     * @param replicaId Inherited dummy argument.
326     * @return A file containing the names and checksum of all the files in the system.
327     * @throws ArgumentNotValid If the replicaId is either null or the empty string.
328     * @throws IOFailure If an unexpected IOException is caught.
329     */
330    @Override
331    public File getAllChecksums(String replicaId) throws IOFailure, ArgumentNotValid {
332        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
333
334        try {
335            List<String> checksums = new ArrayList<String>();
336            // go through the different storageDirs and find files and checksums.
337            for (File dir : storageDirs) {
338                // go through all file and calculate the checksum
339                for (File entry : dir.listFiles()) {
340                    String checksum = ChecksumCalculator.calculateMd5(entry);
341                    String filename = entry.getName();
342
343                    checksums.add(ChecksumJob.makeLine(filename, checksum));
344                }
345            }
346
347            // create a file with the results.
348            File res = File.createTempFile("all", "checksums", FileUtils.getTempDir());
349            FileUtils.writeCollectionToFile(res, checksums);
350            return res;
351        } catch (IOException e) {
352            throw new IOFailure("Received unexpected IOFailure: ", e);
353        }
354    }
355
356    /**
357     * Method for retrieving all the filenames of the replica.
358     *
359     * @param replicaId Inherited dummy argument.
360     * @return A file containing the names of all the files.
361     * @throws ArgumentNotValid If the replicaId is either null or empty.
362     * @throws IOFailure If an IOException is caught.
363     */
364    @Override
365    public File getAllFilenames(String replicaId) throws IOFailure, ArgumentNotValid {
366        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
367
368        List<String> filenames = new ArrayList<String>();
369        // go through the different storageDirs and put the name of the files
370        // into the resulting list of filenames.
371        for (File dir : storageDirs) {
372            for (String name : dir.list()) {
373                filenames.add(name);
374            }
375        }
376
377        try {
378            File res = File.createTempFile("all", "filenames", FileUtils.getTempDir());
379            FileUtils.writeCollectionToFile(res, filenames);
380            return res;
381        } catch (IOException e) {
382            throw new IOFailure("Received unexpected IOFailure: ", e);
383        }
384    }
385
386    /**
387     * Method for correcting a bad entry. Calls 'removeAndGetFile' followed by 'store'.
388     *
389     * @param replicaId Inherited dummy argument.
390     * @param checksum The checksum of the bad entry.
391     * @param file The new file to replace the bad entry.
392     * @param credentials The 'password' to allow changing the archive.
393     * @return The bad entry file.
394     * @throws ArgumentNotValid If one of the arguments are null, or if a string is empty.
395     * @throws PermissionDenied If the credentials or checksum are invalid.
396     */
397    @Override
398    public File correct(String replicaId, String checksum, File file, String credentials) throws ArgumentNotValid,
399            PermissionDenied {
400        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
401        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
402        ArgumentNotValid.checkNotNull(file, "File file");
403        ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials");
404
405        // remove bad file.
406        File res = removeAndGetFile(file.getName(), replicaId, checksum, credentials);
407        // store good new file.
408        store(file);
409        // return bad file.
410        return res;
411    }
412
413    /**
414     * Method for finding the checksum of a file.
415     *
416     * @param replicaId Inherited dummy variable.
417     * @param filename The name of the file to calculate the checksum.
418     * @return The checksum of the file, or the empty string if the file was not found or an error occurred.
419     * @throws ArgumentNotValid If the replicaId or the filename is either null or the empty string.
420     */
421    @Override
422    public String getChecksum(String replicaId, String filename) throws ArgumentNotValid {
423        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
424        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
425        return ChecksumCalculator.calculateMd5(findFile(filename));
426    }
427
428}