001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.archive.checksum;
024
025import java.io.File;
026import java.io.FileWriter;
027import java.io.IOException;
028import java.io.InputStream;
029import java.util.Date;
030
031import org.apache.commons.io.IOUtils;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import com.sleepycat.bind.EntryBinding;
036import com.sleepycat.bind.serial.SerialBinding;
037import com.sleepycat.bind.serial.StoredClassCatalog;
038import com.sleepycat.je.Cursor;
039import com.sleepycat.je.Database;
040import com.sleepycat.je.DatabaseConfig;
041import com.sleepycat.je.DatabaseEntry;
042import com.sleepycat.je.DatabaseException;
043import com.sleepycat.je.Environment;
044import com.sleepycat.je.EnvironmentConfig;
045import com.sleepycat.je.LockMode;
046import com.sleepycat.je.OperationStatus;
047import com.sleepycat.je.Transaction;
048
049import dk.netarkivet.archive.ArchiveSettings;
050import dk.netarkivet.archive.tools.LoadDatabaseChecksumArchive;
051import dk.netarkivet.common.CommonSettings;
052import dk.netarkivet.common.distribute.RemoteFile;
053import dk.netarkivet.common.exceptions.ArgumentNotValid;
054import dk.netarkivet.common.exceptions.IOFailure;
055import dk.netarkivet.common.exceptions.IllegalState;
056import dk.netarkivet.common.utils.ChecksumCalculator;
057import dk.netarkivet.common.utils.FileUtils;
058import dk.netarkivet.common.utils.Settings;
059import dk.netarkivet.common.utils.batch.ChecksumJob;
060
061/**
062 * A ChecksumArchive persisted with a Berkeley DB JE Database. Migrating from the {@link FileChecksumArchive} to the
063 * DatabaseChecksumArchive is done with the {@link LoadDatabaseChecksumArchive} tool.
064 */
065public class DatabaseChecksumArchive implements ChecksumArchive {
066
067    /** The logger used by this class. */
068    private static final Logger log = LoggerFactory.getLogger(DatabaseChecksumArchive.class);
069
070    /** The singleton instance of this class. */
071    private static DatabaseChecksumArchive instance;
072    /** The basedir for the database itself. */
073    private File databaseBaseDir;
074    /** The subdirectory to the databaseBaseDir, where the database is located. */
075    private static final String DATABASE_SUBDIR = "DB";
076    /** The name of the database. */
077    private static final String DATABASE_NAME = "CHECKSUM";
078    /** The Database environment. */
079    private Environment env;
080    /** The Checksum Database itself */
081    private Database checksumDB;
082
083    /** The Database to store class information. */
084    private Database classDB;
085    /** The name of the class database. */
086    private static final String CLASS_DATABASE_NAME = "CLASS";
087
088    /**
089     * The Berkeley DB binder for the data object and keyObject in our database, i.e. Url and Long, respectively.
090     */
091    private EntryBinding objectBinding;
092    private EntryBinding keyBinding;
093
094    /** The minSpaceLeft value. */
095    private long minSpaceLeft;
096
097    /** The prefix to the removedEntryFile. */
098    private static final String WRONG_FILENAME_PREFIX = "removed_";
099    /** The suffix to the removedEntryFile. */
100    private static final String WRONG_FILENAME_SUFFIX = ".checksum";
101
102    /**
103     * The file for storing all the deleted entries. Each entry should be: 'date :' + 'wrongEntry'.
104     */
105    private File wrongEntryFile;
106
107    /**
108     * Method for obtaining the current singleton instance of this class. If the instance of this class has not yet been
109     * constructed, then it will be initialised.
110     *
111     * @return The current instance of this class.
112     * @throws Exception
113     */
114    public static synchronized DatabaseChecksumArchive getInstance() throws Exception {
115        if (instance == null) {
116            instance = new DatabaseChecksumArchive();
117        }
118        return instance;
119    }
120
121    /**
122     * Constructor. Retrieves the minimum space left variable, and ensures the existence of the archive file. If the
123     * file does not exist, then it is created.
124     *
125     * @throws Exception
126     */
127    public DatabaseChecksumArchive() throws DatabaseException {
128        super();
129
130        // Get the minimum space left setting.
131        long minSpaceLeft = Settings.getLong(ArchiveSettings.CHECKSUM_MIN_SPACE_LEFT);
132        // make sure, that minSpaceLeft is non-negative.
133        if (minSpaceLeft < 0) {
134            String msg = "Wrong setting of minSpaceRequired read from Settings: int " + minSpaceLeft;
135            log.warn(msg);
136            throw new ArgumentNotValid(msg);
137        }
138
139        // Initialize the checksum database.
140        initializeDatabase();
141
142        // Initialize Wrong Entry file
143        initializeWrongEntryFile();
144    }
145
146    private void initializeWrongEntryFile() {
147        String WrongEntryFilename = WRONG_FILENAME_PREFIX + Settings.get(CommonSettings.USE_REPLICA_ID)
148                + WRONG_FILENAME_SUFFIX;
149        wrongEntryFile = new File(databaseBaseDir, WrongEntryFilename);
150
151        // ensure that the file exists.
152        if (!wrongEntryFile.exists()) {
153            try {
154                wrongEntryFile.createNewFile();
155            } catch (IOException e) {
156                String msg = "Cannot create 'wrongEntryFile'!";
157                log.error(msg);
158                throw new IOFailure(msg, e);
159            }
160        }
161    }
162
163    private void initializeDatabase() throws DatabaseException {
164        databaseBaseDir = Settings.getFile(ArchiveSettings.CHECKSUM_BASEDIR);
165        File homeDirectory = new File(databaseBaseDir, DATABASE_SUBDIR);
166        if (!homeDirectory.isDirectory()) {
167            homeDirectory.mkdirs();
168        }
169        log.info("Opening ChecksumDB-environment in: {}", homeDirectory.getAbsolutePath());
170
171        EnvironmentConfig envConfig = new EnvironmentConfig();
172        envConfig.setTransactional(true);
173        envConfig.setAllowCreate(true);
174
175        DatabaseConfig dbConfig = new DatabaseConfig();
176        dbConfig.setTransactional(true);
177        dbConfig.setAllowCreate(true);
178
179        Transaction nullTransaction = null;
180        env = new Environment(homeDirectory, envConfig);
181        checksumDB = env.openDatabase(nullTransaction, DATABASE_NAME, dbConfig);
182        // Open the database that stores your class information.
183
184        classDB = env.openDatabase(nullTransaction, CLASS_DATABASE_NAME, dbConfig);
185        StoredClassCatalog classCatalog = new StoredClassCatalog(classDB);
186
187        // Create the binding
188        objectBinding = new SerialBinding(classCatalog, String.class);
189        keyBinding = new SerialBinding(classCatalog, String.class);
190    }
191
192    @Override
193    public boolean hasEnoughSpace() {
194        if (checkDatabaseDir(databaseBaseDir) && (FileUtils.getBytesFree(databaseBaseDir) > minSpaceLeft)) {
195            return true;
196        }
197        return false;
198    }
199
200    private boolean checkDatabaseDir(File file) {
201        // The file must exist.
202        if (!file.isDirectory()) {
203            log.warn("The file '{}' is not a valid directory.", file.getAbsolutePath());
204            return false;
205        }
206        // It must be writable.
207        if (!file.canWrite()) {
208            log.warn("The directory '{}' is not writable", file.getAbsolutePath());
209            return false;
210        }
211        return true;
212    }
213
214    @Override
215    public File correct(String filename, File correctFile) throws IOFailure, ArgumentNotValid, IllegalState {
216        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
217        ArgumentNotValid.checkNotNull(correctFile, "File correctFile");
218
219        // If no file entry exists, then IllegalState
220        if (!hasEntry(filename)) {
221            String errMsg = "No file entry for file '" + filename + "'.";
222            log.error(errMsg);
223            throw new IllegalState(errMsg);
224        }
225
226        // retrieve the checksum
227        String currentChecksum = getChecksum(filename);
228
229        // Calculate the new checksum and verify that it is different.
230        String newChecksum = calculateChecksum(correctFile);
231        if (newChecksum.equals(currentChecksum)) {
232            // This should never occur.
233            throw new IllegalState("The checksum of the old 'bad' entry is "
234                    + " the same as the checksum of the new correcting entry");
235        }
236
237        // Make entry in the wrongEntryFile.
238        String badEntry = ChecksumJob.makeLine(filename, currentChecksum);
239        appendWrongRecordToWrongEntryFile(badEntry);
240
241        // Correct the bad entry, by changing the value to the newChecksum.'
242        // Since the checksumArchive is a hashmap, then putting an existing
243        // entry with a new value will override the existing one.
244        put(filename, newChecksum);
245
246        // Make the file containing the bad entry be returned in the
247        // CorrectMessage.
248        File removedEntryFile;
249        try {
250            // Initialise file and writer.
251            removedEntryFile = File.createTempFile(filename, "tmp", FileUtils.getTempDir());
252            FileWriter fw = new FileWriter(removedEntryFile);
253
254            // Write the bad entry.
255            fw.write(badEntry);
256
257            // flush and close.
258            fw.flush();
259            fw.close();
260        } catch (IOException e) {
261            throw new IOFailure("Unable to create return file for CorrectMessage", e);
262        }
263
264        // Return the file containing the removed entry.
265        return removedEntryFile;
266    }
267
268    /**
269     * Method for appending a 'wrong' entry in the wrongEntryFile. It will be noted which time the wrong entry was
270     * appended: date + " : " + wrongRecord.
271     *
272     * @param wrongRecord The record to append.
273     * @throws IOFailure If the wrong record cannot be appended correctly.
274     */
275    private synchronized void appendWrongRecordToWrongEntryFile(String wrongRecord) throws IOFailure {
276        try {
277            // Create the string to append: date + 'wrong record'.
278            String entry = new Date().toString() + " : " + wrongRecord + "\n";
279
280            // get a filewriter for the checksum file, and append the record.
281            boolean appendToFile = true;
282            FileWriter fwrite = new FileWriter(wrongEntryFile, appendToFile);
283            fwrite.append(entry);
284
285            // close fileWriter.
286            fwrite.flush();
287            fwrite.close();
288        } catch (IOException e) {
289            log.warn("Cannot put a bad record to the 'wrongEntryFile'.", e);
290            throw new IOFailure("Cannot put a bad record to the 'wrongEntryFile'.", e);
291        }
292    }
293
294    @Override
295    public String getChecksum(String filename) {
296        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
297
298        Transaction nullTransaction = null;
299        LockMode nullLockMode = null;
300        DatabaseEntry key = new DatabaseEntry();
301        keyBinding.objectToEntry(filename, key);
302        DatabaseEntry data = new DatabaseEntry();
303
304        OperationStatus status = null;
305        try {
306            status = checksumDB.get(nullTransaction, key, data, nullLockMode);
307        } catch (DatabaseException e) {
308            throw new IOFailure("Could not retrieve a checksum for the filename '" + filename + "'", e);
309        }
310
311        String resultChecksum = null;
312        if (status == OperationStatus.SUCCESS) {
313            resultChecksum = (String) objectBinding.entryToObject(data);
314        }
315
316        return resultChecksum;
317    }
318
319    @Override
320    public boolean hasEntry(String filename) {
321        return (getChecksum(filename) != null);
322    }
323
324    @Override
325    public synchronized void upload(RemoteFile file, String filename) {
326        ArgumentNotValid.checkNotNull(file, "RemoteFile file");
327        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
328
329        InputStream input = null;
330
331        try {
332            input = file.getInputStream();
333            String newChecksum = calculateChecksum(input);
334            if (hasEntry(filename)) {
335                // fetch already stored checksum
336                String oldChecksum = getChecksum(filename);
337                if (newChecksum.equals(oldChecksum)) {
338                    log.warn(
339                            "Cannot upload archivefile '{}', " + "it is already archived with the same checksum: '{}'",
340                            filename, oldChecksum);
341                } else {
342                    throw new IllegalState("Cannot upload archivefile '" + filename
343                            + "', it is already archived with different checksum." + " Archive checksum: '"
344                            + oldChecksum + "' and the uploaded file has: '" + newChecksum + "'.");
345                }
346                // It is considered a success that it already is within the archive,
347                // thus do not throw an exception.
348                return;
349            } else {
350                put(filename, newChecksum);
351            }
352        } finally {
353            if (input != null) {
354                IOUtils.closeQuietly(input);
355            }
356        }
357    }
358    
359    public synchronized void upload(String checksum, String filename) {
360        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
361        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
362
363        if (hasEntry(filename)) {
364                // fetch already stored checksum
365                String oldChecksum = getChecksum(filename);
366                if (checksum.equals(oldChecksum)) {
367                        log.warn(
368                                        "Cannot upload archivefile '{}', " + "it is already archived with the same checksum: '{}'",
369                                        filename, oldChecksum);
370                } else {
371                        throw new IllegalState("Cannot upload archivefile '" + filename
372                                        + "', it is already archived with different checksum." + " Archive checksum: '"
373                                        + oldChecksum + "' and the uploaded file has: '" + checksum + "'.");
374                }
375                // It is considered a success that it already is within the archive,
376                // thus do not throw an exception.
377                return;
378        } else {
379                put(filename, checksum);
380        }
381    }
382
383    /**
384     * Update the database with a new filename and its checksum.
385     *
386     * @param filename A given filename
387     * @param checksum The related checksum
388     */
389    public void put(String filename, String checksum) {
390        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
391        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
392
393        DatabaseEntry theKey = new DatabaseEntry();
394        DatabaseEntry theData = new DatabaseEntry();
395        keyBinding.objectToEntry(filename, theKey);
396        objectBinding.objectToEntry(checksum, theData);
397        Transaction nullTransaction = null;
398
399        try {
400            checksumDB.put(nullTransaction, theKey, theData);
401        } catch (DatabaseException e) {
402            throw new IOFailure("Database exception occuring during ingest", e);
403        }
404    }
405
406    @Override
407    public String calculateChecksum(File f) {
408        return ChecksumCalculator.calculateMd5(f);
409    }
410
411    @Override
412    public String calculateChecksum(InputStream is) {
413        return ChecksumCalculator.calculateMd5(is);
414    }
415
416    @Override
417    public File getArchiveAsFile() {
418        File tempFile = null;
419        try {
420            tempFile = File.createTempFile("allFilenamesAndChecksums", "tmp", FileUtils.getTempDir());
421            log.debug("Creating temporary file for checksums: " + tempFile.getAbsolutePath());
422            dumpDatabaseToFile(tempFile, false);
423            log.debug("Dumped checksums to temporary file: " + tempFile.getAbsolutePath());
424        } catch (IOException e) {
425            throw new IOFailure(e.toString());
426        }
427
428        return tempFile;
429    }
430
431    /**
432     * Write the contents of the database to the given file.
433     *
434     * @param outputFile The outputfile whereto the data is written.
435     * @param writeOnlyFilenames If true, we only write the filenames to the files, not the checksums
436     * @throws IOException If unable to write to file for some reason
437     */
438    private void dumpDatabaseToFile(File tempFile, boolean writeOnlyFilenames) throws IOException {
439        Cursor cursor = null;
440        File resultFile = tempFile;
441
442        FileWriter fw = new FileWriter(resultFile);
443        try {
444            cursor = checksumDB.openCursor(null, null);
445
446            DatabaseEntry foundKey = new DatabaseEntry();
447            DatabaseEntry foundData = new DatabaseEntry();
448
449            while (cursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
450                String keyString = new String(foundKey.getData());
451                String dataString = new String(foundData.getData());
452                if (writeOnlyFilenames) {
453                    fw.append(keyString);
454                } else {
455                    fw.append(keyString);
456                    fw.append(ChecksumJob.STRING_FILENAME_SEPARATOR);
457                    fw.append(dataString);
458                }
459                fw.append('\n'); // end with newline
460            }
461            fw.flush();
462        } catch (DatabaseException de) {
463            throw new IOFailure("Error accessing database." + de);
464        } finally {
465            if (fw != null) {
466                IOUtils.closeQuietly(fw);
467            }
468            if (cursor != null) {
469                try {
470                    cursor.close();
471                } catch (DatabaseException e) {
472                    log.warn("Database error occurred when closing the cursor: ", e);
473                }
474            }
475        }
476    }
477
478    @Override
479    public File getAllFilenames() {
480        File tempFile = null;
481        try {
482            tempFile = File.createTempFile("allFilenames", "tmp", FileUtils.getTempDir());
483        } catch (IOException e) {
484            throw new IOFailure(e.toString());
485        }
486
487        try {
488            dumpDatabaseToFile(tempFile, true);
489        } catch (IOException e) {
490            throw new IOFailure("Error during the getAllFilenames operation: ", e);
491        }
492
493        return tempFile;
494    }
495
496    @Override
497    public void cleanup() {
498        if (checksumDB != null) {
499            try {
500                checksumDB.close();
501            } catch (DatabaseException e) {
502                log.warn("Unable to close database. The error was :", e);
503            }
504        }
505    }
506}