001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.archive.checksum; 024 025import java.io.File; 026import java.io.FileWriter; 027import java.io.IOException; 028import java.io.InputStream; 029import java.util.Date; 030 031import org.apache.commons.io.IOUtils; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import com.sleepycat.bind.EntryBinding; 036import com.sleepycat.bind.serial.SerialBinding; 037import com.sleepycat.bind.serial.StoredClassCatalog; 038import com.sleepycat.je.Cursor; 039import com.sleepycat.je.Database; 040import com.sleepycat.je.DatabaseConfig; 041import com.sleepycat.je.DatabaseEntry; 042import com.sleepycat.je.DatabaseException; 043import com.sleepycat.je.Environment; 044import com.sleepycat.je.EnvironmentConfig; 045import com.sleepycat.je.LockMode; 046import com.sleepycat.je.OperationStatus; 047import com.sleepycat.je.Transaction; 048 049import dk.netarkivet.archive.ArchiveSettings; 050import dk.netarkivet.archive.tools.LoadDatabaseChecksumArchive; 051import dk.netarkivet.common.CommonSettings; 052import dk.netarkivet.common.distribute.RemoteFile; 053import dk.netarkivet.common.exceptions.ArgumentNotValid; 054import dk.netarkivet.common.exceptions.IOFailure; 055import dk.netarkivet.common.exceptions.IllegalState; 056import dk.netarkivet.common.utils.ChecksumCalculator; 057import dk.netarkivet.common.utils.FileUtils; 058import dk.netarkivet.common.utils.Settings; 059import dk.netarkivet.common.utils.batch.ChecksumJob; 060 061/** 062 * A ChecksumArchive persisted with a Berkeley DB JE Database. Migrating from the {@link FileChecksumArchive} to the 063 * DatabaseChecksumArchive is done with the {@link LoadDatabaseChecksumArchive} tool. 064 */ 065public class DatabaseChecksumArchive implements ChecksumArchive { 066 067 /** The logger used by this class. */ 068 private static final Logger log = LoggerFactory.getLogger(DatabaseChecksumArchive.class); 069 070 /** The singleton instance of this class. */ 071 private static DatabaseChecksumArchive instance; 072 /** The basedir for the database itself. */ 073 private File databaseBaseDir; 074 /** The subdirectory to the databaseBaseDir, where the database is located. */ 075 private static final String DATABASE_SUBDIR = "DB"; 076 /** The name of the database. */ 077 private static final String DATABASE_NAME = "CHECKSUM"; 078 /** The Database environment. */ 079 private Environment env; 080 /** The Checksum Database itself */ 081 private Database checksumDB; 082 083 /** The Database to store class information. */ 084 private Database classDB; 085 /** The name of the class database. */ 086 private static final String CLASS_DATABASE_NAME = "CLASS"; 087 088 /** 089 * The Berkeley DB binder for the data object and keyObject in our database, i.e. Url and Long, respectively. 090 */ 091 private EntryBinding objectBinding; 092 private EntryBinding keyBinding; 093 094 /** The minSpaceLeft value. */ 095 private long minSpaceLeft; 096 097 /** The prefix to the removedEntryFile. */ 098 private static final String WRONG_FILENAME_PREFIX = "removed_"; 099 /** The suffix to the removedEntryFile. */ 100 private static final String WRONG_FILENAME_SUFFIX = ".checksum"; 101 102 /** 103 * The file for storing all the deleted entries. Each entry should be: 'date :' + 'wrongEntry'. 104 */ 105 private File wrongEntryFile; 106 107 /** 108 * Method for obtaining the current singleton instance of this class. If the instance of this class has not yet been 109 * constructed, then it will be initialised. 110 * 111 * @return The current instance of this class. 112 * @throws Exception 113 */ 114 public static synchronized DatabaseChecksumArchive getInstance() throws Exception { 115 if (instance == null) { 116 instance = new DatabaseChecksumArchive(); 117 } 118 return instance; 119 } 120 121 /** 122 * Constructor. Retrieves the minimum space left variable, and ensures the existence of the archive file. If the 123 * file does not exist, then it is created. 124 * 125 * @throws Exception 126 */ 127 public DatabaseChecksumArchive() throws DatabaseException { 128 super(); 129 130 // Get the minimum space left setting. 131 long minSpaceLeft = Settings.getLong(ArchiveSettings.CHECKSUM_MIN_SPACE_LEFT); 132 // make sure, that minSpaceLeft is non-negative. 133 if (minSpaceLeft < 0) { 134 String msg = "Wrong setting of minSpaceRequired read from Settings: int " + minSpaceLeft; 135 log.warn(msg); 136 throw new ArgumentNotValid(msg); 137 } 138 139 // Initialize the checksum database. 140 initializeDatabase(); 141 142 // Initialize Wrong Entry file 143 initializeWrongEntryFile(); 144 } 145 146 private void initializeWrongEntryFile() { 147 String WrongEntryFilename = WRONG_FILENAME_PREFIX + Settings.get(CommonSettings.USE_REPLICA_ID) 148 + WRONG_FILENAME_SUFFIX; 149 wrongEntryFile = new File(databaseBaseDir, WrongEntryFilename); 150 151 // ensure that the file exists. 152 if (!wrongEntryFile.exists()) { 153 try { 154 wrongEntryFile.createNewFile(); 155 } catch (IOException e) { 156 String msg = "Cannot create 'wrongEntryFile'!"; 157 log.error(msg); 158 throw new IOFailure(msg, e); 159 } 160 } 161 } 162 163 private void initializeDatabase() throws DatabaseException { 164 databaseBaseDir = Settings.getFile(ArchiveSettings.CHECKSUM_BASEDIR); 165 File homeDirectory = new File(databaseBaseDir, DATABASE_SUBDIR); 166 if (!homeDirectory.isDirectory()) { 167 homeDirectory.mkdirs(); 168 } 169 log.info("Opening ChecksumDB-environment in: {}", homeDirectory.getAbsolutePath()); 170 171 EnvironmentConfig envConfig = new EnvironmentConfig(); 172 envConfig.setTransactional(true); 173 envConfig.setAllowCreate(true); 174 175 DatabaseConfig dbConfig = new DatabaseConfig(); 176 dbConfig.setTransactional(true); 177 dbConfig.setAllowCreate(true); 178 179 Transaction nullTransaction = null; 180 env = new Environment(homeDirectory, envConfig); 181 checksumDB = env.openDatabase(nullTransaction, DATABASE_NAME, dbConfig); 182 // Open the database that stores your class information. 183 184 classDB = env.openDatabase(nullTransaction, CLASS_DATABASE_NAME, dbConfig); 185 StoredClassCatalog classCatalog = new StoredClassCatalog(classDB); 186 187 // Create the binding 188 objectBinding = new SerialBinding(classCatalog, String.class); 189 keyBinding = new SerialBinding(classCatalog, String.class); 190 } 191 192 @Override 193 public boolean hasEnoughSpace() { 194 if (checkDatabaseDir(databaseBaseDir) && (FileUtils.getBytesFree(databaseBaseDir) > minSpaceLeft)) { 195 return true; 196 } 197 return false; 198 } 199 200 private boolean checkDatabaseDir(File file) { 201 // The file must exist. 202 if (!file.isDirectory()) { 203 log.warn("The file '{}' is not a valid directory.", file.getAbsolutePath()); 204 return false; 205 } 206 // It must be writable. 207 if (!file.canWrite()) { 208 log.warn("The directory '{}' is not writable", file.getAbsolutePath()); 209 return false; 210 } 211 return true; 212 } 213 214 @Override 215 public File correct(String filename, File correctFile) throws IOFailure, ArgumentNotValid, IllegalState { 216 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 217 ArgumentNotValid.checkNotNull(correctFile, "File correctFile"); 218 219 // If no file entry exists, then IllegalState 220 if (!hasEntry(filename)) { 221 String errMsg = "No file entry for file '" + filename + "'."; 222 log.error(errMsg); 223 throw new IllegalState(errMsg); 224 } 225 226 // retrieve the checksum 227 String currentChecksum = getChecksum(filename); 228 229 // Calculate the new checksum and verify that it is different. 230 String newChecksum = calculateChecksum(correctFile); 231 if (newChecksum.equals(currentChecksum)) { 232 // This should never occur. 233 throw new IllegalState("The checksum of the old 'bad' entry is " 234 + " the same as the checksum of the new correcting entry"); 235 } 236 237 // Make entry in the wrongEntryFile. 238 String badEntry = ChecksumJob.makeLine(filename, currentChecksum); 239 appendWrongRecordToWrongEntryFile(badEntry); 240 241 // Correct the bad entry, by changing the value to the newChecksum.' 242 // Since the checksumArchive is a hashmap, then putting an existing 243 // entry with a new value will override the existing one. 244 put(filename, newChecksum); 245 246 // Make the file containing the bad entry be returned in the 247 // CorrectMessage. 248 File removedEntryFile; 249 try { 250 // Initialise file and writer. 251 removedEntryFile = File.createTempFile(filename, "tmp", FileUtils.getTempDir()); 252 FileWriter fw = new FileWriter(removedEntryFile); 253 254 // Write the bad entry. 255 fw.write(badEntry); 256 257 // flush and close. 258 fw.flush(); 259 fw.close(); 260 } catch (IOException e) { 261 throw new IOFailure("Unable to create return file for CorrectMessage", e); 262 } 263 264 // Return the file containing the removed entry. 265 return removedEntryFile; 266 } 267 268 /** 269 * Method for appending a 'wrong' entry in the wrongEntryFile. It will be noted which time the wrong entry was 270 * appended: date + " : " + wrongRecord. 271 * 272 * @param wrongRecord The record to append. 273 * @throws IOFailure If the wrong record cannot be appended correctly. 274 */ 275 private synchronized void appendWrongRecordToWrongEntryFile(String wrongRecord) throws IOFailure { 276 try { 277 // Create the string to append: date + 'wrong record'. 278 String entry = new Date().toString() + " : " + wrongRecord + "\n"; 279 280 // get a filewriter for the checksum file, and append the record. 281 boolean appendToFile = true; 282 FileWriter fwrite = new FileWriter(wrongEntryFile, appendToFile); 283 fwrite.append(entry); 284 285 // close fileWriter. 286 fwrite.flush(); 287 fwrite.close(); 288 } catch (IOException e) { 289 log.warn("Cannot put a bad record to the 'wrongEntryFile'.", e); 290 throw new IOFailure("Cannot put a bad record to the 'wrongEntryFile'.", e); 291 } 292 } 293 294 @Override 295 public String getChecksum(String filename) { 296 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 297 298 Transaction nullTransaction = null; 299 LockMode nullLockMode = null; 300 DatabaseEntry key = new DatabaseEntry(); 301 keyBinding.objectToEntry(filename, key); 302 DatabaseEntry data = new DatabaseEntry(); 303 304 OperationStatus status = null; 305 try { 306 status = checksumDB.get(nullTransaction, key, data, nullLockMode); 307 } catch (DatabaseException e) { 308 throw new IOFailure("Could not retrieve a checksum for the filename '" + filename + "'", e); 309 } 310 311 String resultChecksum = null; 312 if (status == OperationStatus.SUCCESS) { 313 resultChecksum = (String) objectBinding.entryToObject(data); 314 } 315 316 return resultChecksum; 317 } 318 319 @Override 320 public boolean hasEntry(String filename) { 321 return (getChecksum(filename) != null); 322 } 323 324 @Override 325 public synchronized void upload(RemoteFile file, String filename) { 326 ArgumentNotValid.checkNotNull(file, "RemoteFile file"); 327 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 328 329 InputStream input = null; 330 331 try { 332 input = file.getInputStream(); 333 String newChecksum = calculateChecksum(input); 334 if (hasEntry(filename)) { 335 // fetch already stored checksum 336 String oldChecksum = getChecksum(filename); 337 if (newChecksum.equals(oldChecksum)) { 338 log.warn( 339 "Cannot upload archivefile '{}', " + "it is already archived with the same checksum: '{}'", 340 filename, oldChecksum); 341 } else { 342 throw new IllegalState("Cannot upload archivefile '" + filename 343 + "', it is already archived with different checksum." + " Archive checksum: '" 344 + oldChecksum + "' and the uploaded file has: '" + newChecksum + "'."); 345 } 346 // It is considered a success that it already is within the archive, 347 // thus do not throw an exception. 348 return; 349 } else { 350 put(filename, newChecksum); 351 } 352 } finally { 353 if (input != null) { 354 IOUtils.closeQuietly(input); 355 } 356 } 357 } 358 359 public synchronized void upload(String checksum, String filename) { 360 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); 361 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 362 363 if (hasEntry(filename)) { 364 // fetch already stored checksum 365 String oldChecksum = getChecksum(filename); 366 if (checksum.equals(oldChecksum)) { 367 log.warn( 368 "Cannot upload archivefile '{}', " + "it is already archived with the same checksum: '{}'", 369 filename, oldChecksum); 370 } else { 371 throw new IllegalState("Cannot upload archivefile '" + filename 372 + "', it is already archived with different checksum." + " Archive checksum: '" 373 + oldChecksum + "' and the uploaded file has: '" + checksum + "'."); 374 } 375 // It is considered a success that it already is within the archive, 376 // thus do not throw an exception. 377 return; 378 } else { 379 put(filename, checksum); 380 } 381 } 382 383 /** 384 * Update the database with a new filename and its checksum. 385 * 386 * @param filename A given filename 387 * @param checksum The related checksum 388 */ 389 public void put(String filename, String checksum) { 390 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 391 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); 392 393 DatabaseEntry theKey = new DatabaseEntry(); 394 DatabaseEntry theData = new DatabaseEntry(); 395 keyBinding.objectToEntry(filename, theKey); 396 objectBinding.objectToEntry(checksum, theData); 397 Transaction nullTransaction = null; 398 399 try { 400 checksumDB.put(nullTransaction, theKey, theData); 401 } catch (DatabaseException e) { 402 throw new IOFailure("Database exception occuring during ingest", e); 403 } 404 } 405 406 @Override 407 public String calculateChecksum(File f) { 408 return ChecksumCalculator.calculateMd5(f); 409 } 410 411 @Override 412 public String calculateChecksum(InputStream is) { 413 return ChecksumCalculator.calculateMd5(is); 414 } 415 416 @Override 417 public File getArchiveAsFile() { 418 File tempFile = null; 419 try { 420 tempFile = File.createTempFile("allFilenamesAndChecksums", "tmp", FileUtils.getTempDir()); 421 log.debug("Creating temporary file for checksums: " + tempFile.getAbsolutePath()); 422 dumpDatabaseToFile(tempFile, false); 423 log.debug("Dumped checksums to temporary file: " + tempFile.getAbsolutePath()); 424 } catch (IOException e) { 425 throw new IOFailure(e.toString()); 426 } 427 428 return tempFile; 429 } 430 431 /** 432 * Write the contents of the database to the given file. 433 * 434 * @param outputFile The outputfile whereto the data is written. 435 * @param writeOnlyFilenames If true, we only write the filenames to the files, not the checksums 436 * @throws IOException If unable to write to file for some reason 437 */ 438 private void dumpDatabaseToFile(File tempFile, boolean writeOnlyFilenames) throws IOException { 439 Cursor cursor = null; 440 File resultFile = tempFile; 441 442 FileWriter fw = new FileWriter(resultFile); 443 try { 444 cursor = checksumDB.openCursor(null, null); 445 446 DatabaseEntry foundKey = new DatabaseEntry(); 447 DatabaseEntry foundData = new DatabaseEntry(); 448 449 while (cursor.getNext(foundKey, foundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) { 450 String keyString = new String(foundKey.getData()); 451 String dataString = new String(foundData.getData()); 452 if (writeOnlyFilenames) { 453 fw.append(keyString); 454 } else { 455 fw.append(keyString); 456 fw.append(ChecksumJob.STRING_FILENAME_SEPARATOR); 457 fw.append(dataString); 458 } 459 fw.append('\n'); // end with newline 460 } 461 fw.flush(); 462 } catch (DatabaseException de) { 463 throw new IOFailure("Error accessing database." + de); 464 } finally { 465 if (fw != null) { 466 IOUtils.closeQuietly(fw); 467 } 468 if (cursor != null) { 469 try { 470 cursor.close(); 471 } catch (DatabaseException e) { 472 log.warn("Database error occurred when closing the cursor: ", e); 473 } 474 } 475 } 476 } 477 478 @Override 479 public File getAllFilenames() { 480 File tempFile = null; 481 try { 482 tempFile = File.createTempFile("allFilenames", "tmp", FileUtils.getTempDir()); 483 } catch (IOException e) { 484 throw new IOFailure(e.toString()); 485 } 486 487 try { 488 dumpDatabaseToFile(tempFile, true); 489 } catch (IOException e) { 490 throw new IOFailure("Error during the getAllFilenames operation: ", e); 491 } 492 493 return tempFile; 494 } 495 496 @Override 497 public void cleanup() { 498 if (checksumDB != null) { 499 try { 500 checksumDB.close(); 501 } catch (DatabaseException e) { 502 log.warn("Unable to close database. The error was :", e); 503 } 504 } 505 } 506}