001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.archive.arcrepositoryadmin; 024 025import java.io.File; 026import java.io.FileReader; 027import java.io.IOException; 028import java.sql.Connection; 029import java.sql.Date; 030import java.sql.PreparedStatement; 031import java.sql.ResultSet; 032import java.sql.SQLException; 033import java.sql.Timestamp; 034import java.util.ArrayList; 035import java.util.Calendar; 036import java.util.Collection; 037import java.util.HashSet; 038import java.util.Iterator; 039import java.util.List; 040import java.util.Random; 041import java.util.Set; 042 043import org.apache.commons.io.LineIterator; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import dk.netarkivet.common.distribute.Channels; 048import dk.netarkivet.common.distribute.arcrepository.Replica; 049import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState; 050import dk.netarkivet.common.distribute.arcrepository.ReplicaType; 051import dk.netarkivet.common.exceptions.ArgumentNotValid; 052import dk.netarkivet.common.exceptions.IOFailure; 053import dk.netarkivet.common.exceptions.IllegalState; 054import dk.netarkivet.common.exceptions.UnknownID; 055import dk.netarkivet.common.utils.DBUtils; 056import dk.netarkivet.common.utils.ExceptionUtils; 057import dk.netarkivet.common.utils.FileUtils; 058import dk.netarkivet.common.utils.KeyValuePair; 059import dk.netarkivet.common.utils.NotificationType; 060import dk.netarkivet.common.utils.NotificationsFactory; 061import dk.netarkivet.common.utils.StringUtils; 062import dk.netarkivet.common.utils.TimeUtils; 063import dk.netarkivet.common.utils.batch.ChecksumJob; 064 065/** 066 * Method for storing the bitpreservation cache in a database. 067 * <p> 068 * This method uses the 'admin.data' file for retrieving the upload status. 069 */ 070public final class ReplicaCacheDatabase implements BitPreservationDAO { 071 072 /** The log. */ 073 protected static final Logger log = LoggerFactory.getLogger(ReplicaCacheDatabase.class); 074 075 /** The current instance. */ 076 private static ReplicaCacheDatabase instance; 077 078 /** 079 * The number of entries between logging in either file list or checksum list. This also controls how often the 080 * database connection is renewed in methods {@link #addChecksumInformation(File, Replica)} and 081 * {@link #addFileListInformation(File, Replica)}, where the operations can take hours, and seems to leak memory. 082 */ 083 private final int LOGGING_ENTRY_INTERVAL = 1000; 084 085 /** Waiting time in seconds before attempting to initialise the database again. */ 086 private final int WAIT_BEFORE_INIT_RETRY = 30; 087 088 /** Number of DB INIT retries. */ 089 private final int INIT_DB_RETRIES = 3; 090 091 /** 092 * Constructor. throws IllegalState if unable to initialize the database. 093 */ 094 private ReplicaCacheDatabase() { 095 // Get a connection to the archive database 096 Connection con = ArchiveDBConnection.get(); 097 try { 098 int retries = 0; 099 boolean initialized = false; 100 while (retries < INIT_DB_RETRIES && !initialized) { 101 retries++; 102 try { 103 initialiseDB(con); 104 initialized = true; 105 log.info("Initialization of database successful"); 106 return; 107 } catch (IOFailure e) { 108 if (retries < INIT_DB_RETRIES) { 109 log.info("Initialization failed. Probably because another application is calling the same " 110 + "method now. Retrying after a minimum of {} seconds: ", WAIT_BEFORE_INIT_RETRY, e); 111 waitSome(); 112 } else { 113 throw new IllegalState("Unable to initialize the database."); 114 } 115 } 116 } 117 } finally { 118 ArchiveDBConnection.release(con); 119 } 120 } 121 122 /** 123 * Wait a while. 124 */ 125 private void waitSome() { 126 Random rand = new Random(); 127 try { 128 Thread.sleep(WAIT_BEFORE_INIT_RETRY * TimeUtils.SECOND_IN_MILLIS + rand.nextInt(WAIT_BEFORE_INIT_RETRY)); 129 } catch (InterruptedException e1) { 130 // Ignored 131 } 132 } 133 134 /** 135 * Method for retrieving the current instance of this class. 136 * 137 * @return The current instance. 138 */ 139 public static synchronized ReplicaCacheDatabase getInstance() { 140 if (instance == null) { 141 instance = new ReplicaCacheDatabase(); 142 } 143 return instance; 144 } 145 146 /** 147 * Method for initialising the database. This basically makes sure that all the replicas are within the database, 148 * and that no unknown replicas have been defined. 149 * 150 * @param connection An open connection to the archive database 151 */ 152 protected void initialiseDB(Connection connection) { 153 // retrieve the list of replicas. 154 Collection<Replica> replicas = Replica.getKnown(); 155 // Retrieve the replica IDs currently in the database. 156 List<String> repIds = ReplicaCacheHelpers.retrieveIdsFromReplicaTable(connection); 157 log.debug("IDs for replicas already in the database: {}", StringUtils.conjoin(",", repIds)); 158 for (Replica rep : replicas) { 159 // try removing the id from the temporary list of IDs within the DB. 160 // If the remove is not successful, then the replica is already 161 // in the database. 162 if (!repIds.remove(rep.getId())) { 163 // if the replica id cannot be removed from the list, then it 164 // does not exist in the database and must be added. 165 log.info("Inserting replica '{}' in database.", rep.toString()); 166 ReplicaCacheHelpers.insertReplicaIntoDB(rep, connection); 167 } else { 168 // Otherwise it already exists in the DB. 169 log.debug("Replica '{}' already inserted in database.", rep.toString()); 170 } 171 } 172 173 // If unknown replica ids are found, then throw exception. 174 if (repIds.size() > 0) { 175 throw new IllegalState("The database contain identifiers for the following replicas, which are not " 176 + "defined in the settings: " + repIds); 177 } 178 } 179 180 /** 181 * Method for retrieving the entry in the replicafileinfo table for a given file and replica. 182 * 183 * @param filename The name of the file for the entry. 184 * @param replica The replica of the entry. 185 * @return The replicafileinfo entry corresponding to the given filename and replica. 186 * @throws ArgumentNotValid If the filename is either null or empty, or if the replica is null. 187 */ 188 public ReplicaFileInfo getReplicaFileInfo(String filename, Replica replica) throws ArgumentNotValid { 189 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 190 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 191 192 // retrieve replicafileinfo for the given filename 193 // FIXME Use joins! 194 String sql = "SELECT replicafileinfo_guid, replica_id, " + "replicafileinfo.file_id, " 195 + "segment_id, checksum, upload_status, filelist_status, " 196 + "checksum_status, filelist_checkdatetime, checksum_checkdatetime " + "FROM replicafileinfo, file " 197 + " WHERE file.file_id = replicafileinfo.file_id" + " AND file.filename=? AND replica_id=?"; 198 199 PreparedStatement s = null; 200 Connection con = ArchiveDBConnection.get(); 201 try { 202 s = DBUtils.prepareStatement(con, sql, filename, replica.getId()); 203 ResultSet res = s.executeQuery(); 204 if (res.next()) { 205 // return the corresponding replica file info. 206 return new ReplicaFileInfo(res); 207 } else { 208 return null; 209 } 210 } catch (SQLException e) { 211 final String message = "SQL error while selecting ResultsSet by executing statement '" + sql + "'."; 212 log.warn(message, e); 213 throw new IOFailure(message, e); 214 } finally { 215 DBUtils.closeStatementIfOpen(s); 216 ArchiveDBConnection.release(con); 217 } 218 219 } 220 221 /** 222 * Method for retrieving the checksum for a specific file. Since a file is not directly attached with a checksum, 223 * the checksum of a file must be found by having the replicafileinfo entries for the file vote about it. 224 * 225 * @param filename The name of the file, whose checksum are to be found. 226 * @return The checksum of the file, or a Null if no validated checksum can be found. 227 * @throws ArgumentNotValid If the filename is either null or the empty string. 228 */ 229 public String getChecksum(String filename) throws ArgumentNotValid { 230 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 231 232 Connection con = ArchiveDBConnection.get(); 233 try { 234 // retrieve the fileId 235 long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con); 236 237 // Check if a checksum with status OK for the file can be found in 238 // the database 239 for (Replica rep : Replica.getKnown()) { 240 // Return the checksum, if it has a valid status. 241 if (ReplicaCacheHelpers.retrieveChecksumStatusForReplicaFileInfoEntry(fileId, rep.getId(), con) == ChecksumStatus.OK) { 242 return ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(), con); 243 } 244 } 245 246 // log that we vote about the file. 247 log.debug("No commonly accepted checksum for the file '{}' has previously been found. " 248 + "Voting to achieve one.", filename); 249 250 // retrieves all the UNKNOWN_STATE checksums, and return if unanimous. 251 Set<String> checksums = new HashSet<String>(); 252 253 for (Replica rep : Replica.getKnown()) { 254 if (ReplicaCacheHelpers.retrieveChecksumStatusForReplicaFileInfoEntry(fileId, rep.getId(), con) != ChecksumStatus.CORRUPT) { 255 String tmpChecksum = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, 256 rep.getId(), con); 257 if (tmpChecksum != null) { 258 checksums.add(tmpChecksum); 259 } else { 260 log.info("Replica '{}' has a null checksum for the file '{}'.", rep.getId(), 261 ReplicaCacheHelpers.retrieveFilenameForFileId(fileId, con)); 262 } 263 } 264 } 265 266 // check if unanimous (thus exactly one!) 267 if (checksums.size() == 1) { 268 // return the first and only value. 269 return checksums.iterator().next(); 270 } 271 272 // If no checksums are found, then return null. 273 if (checksums.size() == 0) { 274 log.warn("No checksums found for file '{}'.", filename); 275 return null; 276 } 277 278 log.info("No unanimous checksum found for file '{}'.", filename); 279 // put all into a list for voting 280 List<String> checksumList = new ArrayList<String>(); 281 for (Replica rep : Replica.getKnown()) { 282 String cs = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(), con); 283 284 if (cs != null) { 285 checksumList.add(cs); 286 } else { 287 // log when it is second time we find this checksum to be null? 288 log.debug("Replica '{}' has a null checksum for the file '{}'.", rep.getId(), 289 ReplicaCacheHelpers.retrieveFilenameForFileId(fileId, con)); 290 } 291 } 292 293 // vote and return the most frequent checksum. 294 return ReplicaCacheHelpers.vote(checksumList); 295 296 } finally { 297 ArchiveDBConnection.release(con); 298 } 299 } 300 301 /** 302 * Retrieves the names of all the files in the file table of the database. 303 * 304 * @return The list of filenames known by the database. 305 */ 306 public Collection<String> retrieveAllFilenames() { 307 Connection con = ArchiveDBConnection.get(); 308 // make sql query. 309 final String sql = "SELECT filename FROM file"; 310 try { 311 // Perform the select. 312 return DBUtils.selectStringList(con, sql, new Object[] {}); 313 } finally { 314 ArchiveDBConnection.release(con); 315 } 316 } 317 318 /** 319 * Retrieves the ReplicaStoreState for the entry in the replicafileinfo table, which refers to the given file and 320 * replica. 321 * 322 * @param filename The name of the file in the filetable. 323 * @param replicaId The id of the replica. 324 * @return The ReplicaStoreState for the specified entry. 325 * @throws ArgumentNotValid If the replicaId or the filename are eihter null or the empty string. 326 */ 327 public ReplicaStoreState getReplicaStoreState(String filename, String replicaId) throws ArgumentNotValid { 328 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 329 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 330 331 Connection con = ArchiveDBConnection.get(); 332 333 // Make query for extracting the upload status. 334 // FIXME Use joins. 335 String sql = "SELECT upload_status FROM replicafileinfo, file WHERE " 336 + "replicafileinfo.file_id = file.file_id AND file.filename = ? " + "AND replica_id = ?"; 337 try { 338 // execute the query. 339 int ordinal = DBUtils.selectIntValue(con, sql, filename, replicaId); 340 341 // return the corresponding ReplicaStoreState. 342 return ReplicaStoreState.fromOrdinal(ordinal); 343 } finally { 344 ArchiveDBConnection.release(con); 345 } 346 } 347 348 /** 349 * Sets the ReplicaStoreState for the entry in the replicafileinfo table. 350 * 351 * @param filename The name of the file in the filetable. 352 * @param replicaId The id of the replica. 353 * @param state The ReplicaStoreState for the specified entry. 354 * @throws ArgumentNotValid If the replicaId or the filename are eihter null or the empty string. Or if the 355 * ReplicaStoreState is null. 356 */ 357 public void setReplicaStoreState(String filename, String replicaId, ReplicaStoreState state) 358 throws ArgumentNotValid { 359 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 360 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 361 ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state"); 362 363 Connection con = ArchiveDBConnection.get(); 364 PreparedStatement statement = null; 365 try { 366 // retrieve the guid for the file. 367 long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con); 368 369 // Make query for updating the upload status 370 if (state == ReplicaStoreState.UPLOAD_COMPLETED) { 371 // An UPLOAD_COMPLETE 372 // UPLOAD_COMPLETE => filelist_status = OK, checksum_status = OK 373 String sql = "UPDATE replicafileinfo SET upload_status = ?, " 374 + "filelist_status = ?, checksum_status = ? " + "WHERE replica_id = ? AND file_id = ?"; 375 statement = DBUtils.prepareStatement(con, sql, state.ordinal(), FileListStatus.OK.ordinal(), 376 ChecksumStatus.OK.ordinal(), replicaId, fileId); 377 } else { 378 String sql = "UPDATE replicafileinfo SET upload_status = ? WHERE replica_id = ? AND file_id = ?"; 379 statement = DBUtils.prepareStatement(con, sql, state.ordinal(), replicaId, fileId); 380 } 381 382 // execute the update and commit to database. 383 statement.executeUpdate(); 384 con.commit(); 385 } catch (SQLException e) { 386 String errMsg = "Received the following SQL error while updating the database: " 387 + ExceptionUtils.getSQLExceptionCause(e); 388 log.warn(errMsg, e); 389 throw new IOFailure(errMsg, e); 390 } finally { 391 DBUtils.closeStatementIfOpen(statement); 392 ArchiveDBConnection.release(con); 393 } 394 } 395 396 /** 397 * Creates a new entry for the filename for each replica, and give it the given checksum and set the upload_status = 398 * UNKNOWN_UPLOAD_STATUS. 399 * 400 * @param filename The name of the file. 401 * @param checksum The checksum of the file. 402 * @throws ArgumentNotValid If the filename or the checksum is either null or the empty string. 403 * @throws IllegalState If the file exists with another checksum on one of the replicas. Or if the file has already 404 * been completely uploaded to one of the replicas. 405 */ 406 public void insertNewFileForUpload(String filename, String checksum) throws ArgumentNotValid, IllegalState { 407 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 408 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checkums"); 409 410 Connection con = ArchiveDBConnection.get(); 411 // retrieve the fileId for the filename. 412 long fileId; 413 414 try { 415 // insert into DB, or make sure that it can be inserted. 416 if (existsFileInDB(filename)) { 417 // retrieve the fileId of the existing file. 418 fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con); 419 420 // Check the entries for this file associated with the replicas. 421 for (Replica rep : Replica.getKnown()) { 422 // Ensure that the file has not been completely uploaded to a 423 // replica. 424 ReplicaStoreState us = ReplicaCacheHelpers.retrieveUploadStatus(fileId, rep.getId(), con); 425 426 if (us.equals(ReplicaStoreState.UPLOAD_COMPLETED)) { 427 throw new IllegalState("The file has already been completely uploaded to the replica: " + rep); 428 } 429 430 // make sure that it has not been attempted uploaded with 431 // another checksum 432 String entryCs = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(), 433 con); 434 435 // throw an exception if the registered checksum differs. 436 if (entryCs != null && !checksum.equals(entryCs)) { 437 throw new IllegalState("The file '" + filename + "' with checksum '" + entryCs 438 + "' has attempted being uploaded with the checksum '" + checksum + "'"); 439 } 440 } 441 } else { 442 fileId = ReplicaCacheHelpers.insertFileIntoDB(filename, con); 443 } 444 445 for (Replica rep : Replica.getKnown()) { 446 // retrieve the guid for the corresponding replicafileinfo entry 447 long guid = ReplicaCacheHelpers.retrieveReplicaFileInfoGuid(fileId, rep.getId(), con); 448 449 // Update with the correct information. 450 ReplicaCacheHelpers.updateReplicaFileInfo(guid, checksum, ReplicaStoreState.UNKNOWN_UPLOAD_STATE, con); 451 } 452 } finally { 453 ArchiveDBConnection.release(con); 454 } 455 } 456 457 /** 458 * Method for inserting an entry into the database about a file upload has begun for a specific replica. It is not 459 * tested whether the entry has another checksum or another UploadStatus. 460 * 461 * @param filename The name of the file. 462 * @param replica The replica for the replicafileinfo. 463 * @param state The new ReplicaStoreState for the entry. 464 * @throws ArgumentNotValid If the filename is either null or the empty string. Or if the replica or the status is 465 * null. 466 */ 467 public void changeStateOfReplicafileinfo(String filename, Replica replica, ReplicaStoreState state) 468 throws ArgumentNotValid { 469 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 470 ArgumentNotValid.checkNotNull(replica, "Replica rep"); 471 ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state"); 472 473 PreparedStatement statement = null; 474 Connection connection = null; 475 try { 476 connection = ArchiveDBConnection.get(); 477 // retrieve the replicafileinfo_guid for this filename . 478 long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection); 479 statement = connection.prepareStatement("UPDATE replicafileinfo SET upload_status = ? " 480 + "WHERE replicafileinfo_guid = ?"); 481 statement.setLong(1, state.ordinal()); 482 statement.setLong(2, guid); 483 484 // Perform the update. 485 statement.executeUpdate(); 486 connection.commit(); 487 } catch (SQLException e) { 488 throw new IllegalState("Cannot update status and checksum of a replicafileinfo in the database.", e); 489 } finally { 490 DBUtils.closeStatementIfOpen(statement); 491 if (connection != null) { 492 ArchiveDBConnection.release(connection); 493 } 494 } 495 } 496 497 /** 498 * Method for inserting an entry into the database about a file upload has begun for a specific replica. It is not 499 * tested whether the entry has another checksum or another UploadStatus. 500 * 501 * @param filename The name of the file. 502 * @param checksum The new checksum for the entry. 503 * @param replica The replica for the replicafileinfo. 504 * @param state The new ReplicaStoreState for the entry. 505 * @throws ArgumentNotValid If the filename or the checksum is either null or the empty string. Or if the replica or 506 * the status is null. 507 * @throws IllegalState If an sql exception is thrown. 508 */ 509 public void changeStateOfReplicafileinfo(String filename, String checksum, Replica replica, ReplicaStoreState state) 510 throws ArgumentNotValid, IllegalState { 511 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 512 ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum"); 513 ArgumentNotValid.checkNotNull(replica, "Replica rep"); 514 ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state"); 515 516 PreparedStatement statement = null; 517 Connection connection = null; 518 try { 519 connection = ArchiveDBConnection.get(); 520 // retrieve the replicafileinfo_guid for this filename . 521 long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection); 522 523 statement = connection.prepareStatement("UPDATE replicafileinfo SET upload_status = ?, checksum = ? " 524 + "WHERE replicafileinfo_guid = ?"); 525 statement.setLong(1, state.ordinal()); 526 statement.setString(2, checksum); 527 statement.setLong(3, guid); 528 529 // Perform the update. 530 statement.executeUpdate(); 531 connection.commit(); 532 } catch (SQLException e) { 533 throw new IllegalState("Cannot update status and checksum of a replicafileinfo in the database.", e); 534 } finally { 535 DBUtils.closeStatementIfOpen(statement); 536 ArchiveDBConnection.release(connection); 537 } 538 } 539 540 /** 541 * Retrieves the names of all the files in the given replica which has the specified UploadStatus. 542 * 543 * @param replicaId The id of the replica which contain the files. 544 * @param state The ReplicaStoreState for the wanted files. 545 * @return The list of filenames for the entries in the replica which has the specified UploadStatus. 546 * @throws ArgumentNotValid If the UploadStatus is null or if the replicaId is either null or the empty string. 547 */ 548 public Collection<String> retrieveFilenamesForReplicaEntries(String replicaId, ReplicaStoreState state) 549 throws ArgumentNotValid { 550 ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state"); 551 ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId"); 552 Connection con = ArchiveDBConnection.get(); 553 final String sql = "SELECT filename FROM replicafileinfo " 554 + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id " 555 + "WHERE replica_id = ? AND upload_status = ?"; 556 try { 557 return DBUtils.selectStringList(con, sql, replicaId, state.ordinal()); 558 } finally { 559 ArchiveDBConnection.release(con); 560 } 561 } 562 563 /** 564 * Checks whether a file is already in the file table in the database. 565 * 566 * @param filename The name of the file in the database. 567 * @return Whether the file was found in the database. 568 * @throws IllegalState If more than one entry with the given filename was found. 569 */ 570 public boolean existsFileInDB(String filename) throws IllegalState { 571 // retrieve the amount of times this replica is within the database. 572 Connection con = ArchiveDBConnection.get(); 573 final String sql = "SELECT COUNT(*) FROM file WHERE filename = ?"; 574 try { 575 int count = DBUtils.selectIntValue(con, sql, filename); 576 577 // Handle the different cases for count. 578 switch (count) { 579 case 0: 580 return false; 581 case 1: 582 return true; 583 default: 584 throw new IllegalState("Cannot handle " + count + " files " + "with the name '" + filename + "'."); 585 } 586 } finally { 587 ArchiveDBConnection.release(con); 588 } 589 } 590 591 /** 592 * Method for retrieving the filelist_status for a replicafileinfo entry. 593 * 594 * @param filename The name of the file. 595 * @param replica The replica where the file should be. 596 * @return The filelist_status for the file in the replica. 597 * @throws ArgumentNotValid If the replica is null or the filename is either null or the empty string. 598 */ 599 public FileListStatus retrieveFileListStatus(String filename, Replica replica) throws ArgumentNotValid { 600 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 601 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 602 603 Connection con = ArchiveDBConnection.get(); 604 try { 605 // retrieve the filelist_status for the entry. 606 int status = ReplicaCacheHelpers.retrieveFileListStatusFromReplicaFileInfo(filename, replica.getId(), con); 607 // Return the corresponding FileListStatus 608 return FileListStatus.fromOrdinal(status); 609 } finally { 610 ArchiveDBConnection.release(con); 611 } 612 613 } 614 615 /** 616 * SQL used to update the checksum status of straightforward cases. See complete description for method below. 617 */ 618 public static final String updateChecksumStatusSql = "" + "UPDATE replicafileinfo SET checksum_status = " 619 + ChecksumStatus.OK.ordinal() + " " + "WHERE checksum_status != " + ChecksumStatus.OK.ordinal() 620 + " AND file_id IN ( " + " SELECT file_id " + " FROM ( " 621 + " SELECT file_id, COUNT(file_id) AS checksums, SUM(replicas) replicas " + " FROM ( " 622 + " SELECT file_id, COUNT(checksum) AS replicas, checksum " + " FROM replicafileinfo " 623 + " WHERE filelist_status != " + FileListStatus.MISSING.ordinal() + " AND checksum IS NOT NULL " 624 + " GROUP BY file_id, checksum " + " ) AS ss1 " + " GROUP BY file_id " + " ) AS ss2 " 625 + " WHERE checksums = 1 " + ")"; 626 627 /** 628 * SQL used to select those files whose check status has to be voted on. See complete description for method below. 629 */ 630 public static final String selectForFileChecksumVotingSql = "" + "SELECT file_id " + "FROM ( " 631 + " SELECT file_id, COUNT(file_id) AS checksums, SUM(replicas) replicas " + " FROM ( " 632 + " SELECT file_id, COUNT(checksum) AS replicas, checksum " + " FROM replicafileinfo " 633 + " WHERE filelist_status != " + FileListStatus.MISSING.ordinal() + " AND checksum IS NOT NULL " 634 + " GROUP BY file_id, checksum " + " ) AS ss1 " + " GROUP BY file_id " + ") AS ss2 " 635 + "WHERE checksums > 1 "; 636 637 /** 638 * This method is used to update the status for the checksums for all replicafileinfo entries. <br/> 639 * <br/> 640 * For each file in the database, the checksum vote is made in the following way. <br/> 641 * Each entry in the replicafileinfo table containing the file is retrieved. All the unique checksums are retrieved, 642 * e.g. if a checksum is found more than one, then it is ignored. <br/> 643 * If only one unique checksum is found, then if must be the correct one, and all the replicas with this file will 644 * have their checksum_status set to 'OK'. <br/> 645 * If more than one checksum is found, then a vote for the correct checksum is performed. This is done by counting 646 * the amount of time each of the unique checksum is found among the replicafileinfo entries for the current file. 647 * The checksum with most votes is chosen as the correct one, and the checksum_status for all the replicafileinfo 648 * entries with this checksum is set to 'OK', whereas the replicafileinfo entries with a different checksum is set 649 * to 'CORRUPT'. <br/> 650 * If no winner is found then a warning and a notification is issued, and the checksum_status for all the 651 * replicafileinfo entries with for the current file is set to 'UNKNOWN'. <br/> 652 */ 653 public void updateChecksumStatus() { 654 log.info("UpdateChecksumStatus operation commencing"); 655 Connection con = ArchiveDBConnection.get(); 656 boolean autoCommit = true; 657 try { 658 autoCommit = con.getAutoCommit(); 659 // Set checksum_status to 'OK' where there is the same 660 // checksum across all replicas. 661 DBUtils.executeSQL(con, updateChecksumStatusSql); 662 663 // Get all the fileids that need processing. 664 // Previously: "SELECT file_id FROM file" 665 Iterator<Long> fileIdsIterator = DBUtils.selectLongIterator(con, selectForFileChecksumVotingSql); 666 // For each fileid 667 while (fileIdsIterator.hasNext()) { 668 long fileId = fileIdsIterator.next(); 669 ReplicaCacheHelpers.fileChecksumVote(fileId, con); 670 } 671 } catch (SQLException e) { 672 throw new IOFailure("Error getting auto commit.\n" + ExceptionUtils.getSQLExceptionCause(e), e); 673 } finally { 674 try { 675 con.setAutoCommit(autoCommit); 676 } catch (SQLException e) { 677 log.error("Could not change auto commit back to default!"); 678 } 679 ArchiveDBConnection.release(con); 680 } 681 log.info("UpdateChecksumStatus operation completed!"); 682 } 683 684 /** 685 * Method for updating the status for a specific file for all the replicas. If the checksums for the replicas differ 686 * for some replica, then based on a checksum vote, a specific checksum is chosen as the 'correct' one, and the 687 * entries with another checksum than the 'correct one' will be marked as corrupt. 688 * 689 * @param filename The name of the file to update the status for. 690 * @throws ArgumentNotValid If the filename is either null or the empty string. 691 */ 692 @Override 693 public void updateChecksumStatus(String filename) throws ArgumentNotValid { 694 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 695 696 Connection con = ArchiveDBConnection.get(); 697 try { 698 // retrieve the id and vote! 699 Long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con); 700 ReplicaCacheHelpers.fileChecksumVote(fileId, con); 701 } finally { 702 ArchiveDBConnection.release(con); 703 } 704 } 705 706 /** 707 * Given the output of a checksum job, add the results to the database. 708 * <p> 709 * The following fields in the table are updated for each corresponding entry in the replicafileinfo table: <br/> 710 * - checksum = the given checksum. <br/> 711 * - filelist_status = ok. <br/> 712 * - filelist_checkdatetime = now. <br/> 713 * - checksum_checkdatetime = now. 714 * 715 * @param checksumOutputFile The output of a checksum job in a file 716 * @param replica The replica this checksum job is for. 717 */ 718 @Override 719 public void addChecksumInformation(File checksumOutputFile, Replica replica) { 720 // validate arguments 721 ArgumentNotValid.checkNotNull(checksumOutputFile, "File checksumOutputFile"); 722 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 723 724 // Sort the checksumOutputFile file. 725 File sortedResult = new File(checksumOutputFile.getParent(), checksumOutputFile.getName() + ".sorted"); 726 FileUtils.sortFile(checksumOutputFile, sortedResult); 727 final long datasize = FileUtils.countLines(sortedResult); 728 729 Set<Long> missingReplicaRFIs = null; 730 Connection con = ArchiveDBConnection.get(); 731 LineIterator lineIterator = null; 732 try { 733 // Make sure, that the replica exists in the database. 734 if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) { 735 String msg = "Cannot add checksum information, since the replica '" + replica.toString() 736 + "' does not exist within the database."; 737 log.warn(msg); 738 throw new IOFailure(msg); 739 } 740 741 log.info("Starting processing of {} checksum entries for replica {}", datasize, replica.getId()); 742 743 // retrieve the list of files already known by this cache. 744 // TODO This does not scale! Should the datastructure 745 // (missingReplicaRFIs) be disk-bound in some way, or optimized 746 // in some way, e.g. using it.unimi.dsi.fastutil.longs.LongArrayList 747 missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con); 748 749 // Initialize the String iterator 750 lineIterator = new LineIterator(new FileReader(sortedResult)); 751 752 String lastFilename = ""; 753 String lastChecksum = ""; 754 755 int i = 0; 756 while (lineIterator.hasNext()) { 757 String line = lineIterator.next(); 758 // log that it is in progress every so often. 759 if ((i % LOGGING_ENTRY_INTERVAL) == 0) { 760 log.info("Processed checksum list entry number {} for replica {}", i, replica); 761 // Close connection, and open another one 762 // to avoid memory-leak (NAS-2003) 763 ArchiveDBConnection.release(con); 764 con = ArchiveDBConnection.get(); 765 log.debug("Databaseconnection has now been renewed"); 766 } 767 ++i; 768 769 // parse the input. 770 final KeyValuePair<String, String> entry = ChecksumJob.parseLine(line); 771 final String filename = entry.getKey(); 772 final String checksum = entry.getValue(); 773 774 // check for duplicates 775 if (filename.equals(lastFilename)) { 776 // if different checksums, then 777 if (!checksum.equals(lastChecksum)) { 778 // log and send notification 779 String errMsg = "Unidentical duplicates of file '" + filename + "' with the checksums '" 780 + lastChecksum + "' and '" + checksum + "'. First instance used."; 781 log.warn(errMsg); 782 NotificationsFactory.getInstance().notify(errMsg, NotificationType.WARNING); 783 } else { 784 // log about duplicate identical 785 log.debug("Duplicates of the file '{}' found with the same checksum '{}'.", filename, checksum); 786 } 787 788 // avoid overhead of inserting duplicates twice. 789 continue; 790 } 791 792 // set these value to be the old values in next iteration. 793 lastFilename = filename; 794 lastChecksum = checksum; 795 796 // Process the current (filename + checksum) combo for this replica 797 // Remove the returned replicafileinfo guid from the missing entries. 798 missingReplicaRFIs.remove(ReplicaCacheHelpers.processChecksumline(filename, checksum, replica, con)); 799 } 800 } catch (IOException e) { 801 throw new IOFailure("Unable to read checksum entries from file", e); 802 } finally { 803 ArchiveDBConnection.release(con); 804 LineIterator.closeQuietly(lineIterator); 805 } 806 807 con = ArchiveDBConnection.get(); 808 try { 809 // go through the not found replicafileinfo for this replica to change 810 // their filelist_status to missing. 811 if (missingReplicaRFIs.size() > 0) { 812 log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica); 813 for (long rfi : missingReplicaRFIs) { 814 // set the replicafileinfo in the database to missing. 815 ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con); 816 } 817 } 818 819 // update the checksum updated date for this replica. 820 ReplicaCacheHelpers.updateChecksumDateForReplica(replica, con); 821 ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con); 822 823 log.info("Finished processing of {} checksum entries for replica {}", datasize, replica.getId()); 824 } finally { 825 ArchiveDBConnection.release(con); 826 } 827 } 828 829 /** 830 * Method for adding the results from a list of filenames on a replica. This list of filenames should return the 831 * list of all the files within the database. 832 * <p> 833 * For each file in the FileListJob the following fields are set for the corresponding entry in the replicafileinfo 834 * table: <br/> 835 * - filelist_status = ok. <br/> 836 * - filelist_checkdatetime = now. 837 * <p> 838 * For each entry in the replicafileinfo table for the replica which are missing in the results from the FileListJob 839 * the following fields are assigned the following values: <br/> 840 * - filelist_status = missing. <br/> 841 * - filelist_checkdatetime = now. 842 * 843 * @param filelistFile The list of filenames either parsed from a FilelistJob or the result from a 844 * GetAllFilenamesMessage. 845 * @param replica The replica, which the FilelistBatchjob has run upon. 846 * @throws ArgumentNotValid If the filelist or the replica is null. 847 * @throws UnknownID If the replica does not already exist in the database. 848 */ 849 @Override 850 public void addFileListInformation(File filelistFile, Replica replica) throws ArgumentNotValid, UnknownID { 851 ArgumentNotValid.checkNotNull(filelistFile, "File filelistFile"); 852 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 853 854 // Sort the filelist file. 855 File sortedResult = new File(filelistFile.getParent(), filelistFile.getName() + ".sorted"); 856 FileUtils.sortFile(filelistFile, sortedResult); 857 final long datasize = FileUtils.countLines(sortedResult); 858 859 Connection con = ArchiveDBConnection.get(); 860 Set<Long> missingReplicaRFIs = null; 861 LineIterator lineIterator = null; 862 try { 863 // Make sure, that the replica exists in the database. 864 if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) { 865 String errorMsg = "Cannot add filelist information, since the replica '" + replica.toString() 866 + "' does not exist in the database."; 867 log.warn(errorMsg); 868 throw new UnknownID(errorMsg); 869 } 870 871 log.info("Starting processing of {} filelist entries for replica {}", datasize, replica.getId()); 872 873 // retrieve the list of files already known by this cache. 874 // TODO This does not scale! Should this datastructure 875 // (missingReplicaRFIs) be disk-bound in some way. 876 missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con); 877 878 // Initialize String iterator 879 lineIterator = new LineIterator(new FileReader(sortedResult)); 880 881 String lastFileName = ""; 882 int i = 0; 883 while (lineIterator.hasNext()) { 884 String file = lineIterator.next(); 885 // log that it is in progress every so often. 886 if ((i % LOGGING_ENTRY_INTERVAL) == 0) { 887 log.info("Processed file list entry number {} for replica {}", i, replica); 888 // Close connection, and open another one 889 // to avoid memory-leak (NAS-2003) 890 ArchiveDBConnection.release(con); 891 con = ArchiveDBConnection.get(); 892 log.debug("Databaseconnection has now been renewed"); 893 } 894 ++i; 895 896 // handle duplicates. 897 if (file.equals(lastFileName)) { 898 log.warn("There have been found multiple files with the name '{}'", file); 899 continue; 900 } 901 902 lastFileName = file; 903 // Add information for one file, and remove the ReplicaRFI from the 904 // set of missing ones. 905 missingReplicaRFIs.remove(ReplicaCacheHelpers.addFileInformation(file, replica, con)); 906 } 907 } catch (IOException e) { 908 throw new IOFailure("Unable to read the filenames from file", e); 909 } finally { 910 ArchiveDBConnection.release(con); 911 LineIterator.closeQuietly(lineIterator); 912 } 913 914 con = ArchiveDBConnection.get(); 915 try { 916 // go through the not found replicafileinfo for this replica to change 917 // their filelist_status to missing. 918 if (missingReplicaRFIs.size() > 0) { 919 log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica); 920 for (long rfi : missingReplicaRFIs) { 921 // set the replicafileinfo in the database to missing. 922 ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con); 923 } 924 } 925 // Update the date for filelist update for this replica. 926 ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con); 927 } finally { 928 ArchiveDBConnection.release(con); 929 } 930 } 931 932 /** 933 * Get the date for the last file list job. 934 * 935 * @param replica The replica to get the date for. 936 * @return The date of the last missing files update for the replica. A null is returned if no last missing files 937 * update has been performed. 938 * @throws ArgumentNotValid If the replica is null. 939 * @throws IllegalArgumentException If the Date of the Timestamp cannot be instantiated. 940 */ 941 @Override 942 public Date getDateOfLastMissingFilesUpdate(Replica replica) throws ArgumentNotValid, IllegalArgumentException { 943 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 944 Connection con = ArchiveDBConnection.get(); 945 String result = null; 946 try { 947 // sql for retrieving this replicafileinfo_guid. 948 String sql = "SELECT filelist_updated FROM replica WHERE replica_id = ?"; 949 result = DBUtils.selectStringValue(con, sql, replica.getId()); 950 } finally { 951 ArchiveDBConnection.release(con); 952 } 953 // return null if the field has no be set for this replica. 954 if (result == null) { 955 log.debug("The 'filelist_updated' field has not been set, as no missing files update has been performed yet."); 956 return null; 957 } else { 958 // Parse the timestamp into a date. 959 return new Date(Timestamp.valueOf(result).getTime()); 960 } 961 } 962 963 /** 964 * Method for retrieving the date for the last update for corrupted files. 965 * <p> 966 * This method does not contact the replicas, it only retrieves the data from the last time the checksum was 967 * retrieved. 968 * 969 * @param replica The replica to find the date for the latest update for corruption of files. 970 * @return The date for the last checksum update. A null is returned if no wrong files update has been performed for 971 * this replica. 972 * @throws ArgumentNotValid If the replica is null. 973 * @throws IllegalArgumentException If the Date of the Timestamp cannot be instantiated. 974 */ 975 @Override 976 public Date getDateOfLastWrongFilesUpdate(Replica replica) throws ArgumentNotValid, IllegalArgumentException { 977 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 978 Connection con = ArchiveDBConnection.get(); 979 String result = null; 980 try { 981 // The SQL statement for retrieving the date for last update of 982 // checksum for the replica. 983 final String sql = "SELECT checksum_updated FROM replica WHERE replica_id = ?"; 984 result = DBUtils.selectStringValue(con, sql, replica.getId()); 985 } finally { 986 ArchiveDBConnection.release(con); 987 } 988 // return null if the field has no be set for this replica. 989 if (result == null) { 990 log.debug("The 'checksum_updated' field has not been set, as no wrong files update has been performed yet."); 991 return null; 992 } else { 993 // Parse the timestamp into a date. 994 return new Date(Timestamp.valueOf(result).getTime()); 995 } 996 } 997 998 /** 999 * Method for retrieving the number of files missing from a specific replica. 1000 * <p> 1001 * This method does not contact the replica directly, it only retrieves the count of missing files from the last 1002 * filelist update. 1003 * 1004 * @param replica The replica to find the number of missing files for. 1005 * @return The number of missing files for the replica. 1006 * @throws ArgumentNotValid If the replica is null. 1007 */ 1008 @Override 1009 public long getNumberOfMissingFilesInLastUpdate(Replica replica) throws ArgumentNotValid { 1010 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 1011 Connection con = ArchiveDBConnection.get(); 1012 // The SQL statement to retrieve the number of entries in the 1013 // replicafileinfo table with file_status set to either missing or 1014 // no_status for the replica. 1015 // FIXME Consider using a UNION instead of OR. 1016 final String sql = "SELECT COUNT(*) FROM replicafileinfo " 1017 + "WHERE replica_id = ? AND ( filelist_status = ? OR filelist_status = ?)"; 1018 try { 1019 return DBUtils.selectLongValue(con, sql, replica.getId(), FileListStatus.MISSING.ordinal(), 1020 FileListStatus.NO_FILELIST_STATUS.ordinal()); 1021 } finally { 1022 ArchiveDBConnection.release(con); 1023 } 1024 } 1025 1026 /** 1027 * Method for retrieving the list of the names of the files which was missing for the replica in the last filelist 1028 * update. 1029 * <p> 1030 * This method does not contact the replica, it only uses the database to find the files, which was missing during 1031 * the last filelist update. 1032 * 1033 * @param replica The replica to find the list of missing files for. 1034 * @return A list containing the names of the files which are missing in the given replica. 1035 * @throws ArgumentNotValid If the replica is null. 1036 */ 1037 @Override 1038 public Iterable<String> getMissingFilesInLastUpdate(Replica replica) throws ArgumentNotValid { 1039 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 1040 Connection con = ArchiveDBConnection.get(); 1041 // The SQL statement to retrieve the filenames of the missing 1042 // replicafileinfo to the given replica. 1043 final String sql = "SELECT filename FROM replicafileinfo " 1044 + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id " 1045 + "WHERE replica_id = ? AND ( filelist_status = ? OR filelist_status = ? )"; 1046 try { 1047 return DBUtils.selectStringList(con, sql, replica.getId(), FileListStatus.MISSING.ordinal(), 1048 FileListStatus.NO_FILELIST_STATUS.ordinal()); 1049 } finally { 1050 ArchiveDBConnection.release(con); 1051 } 1052 } 1053 1054 /** 1055 * Method for retrieving the amount of files with a incorrect checksum within a replica. 1056 * <p> 1057 * This method does not contact the replica, it only uses the database to count the amount of files which are 1058 * corrupt. 1059 * 1060 * @param replica The replica to find the number of corrupted files for. 1061 * @return The number of corrupted files. 1062 * @throws ArgumentNotValid If the replica is null. 1063 */ 1064 @Override 1065 public long getNumberOfWrongFilesInLastUpdate(Replica replica) throws ArgumentNotValid { 1066 ArgumentNotValid.checkNotNull(replica, "Replica"); 1067 Connection con = ArchiveDBConnection.get(); 1068 // The SQL statement to retrieve the number of corrupted entries in 1069 // the replicafileinfo table for the given replica. 1070 final String sql = "SELECT COUNT(*) FROM replicafileinfo WHERE replica_id = ? AND checksum_status = ?"; 1071 try { 1072 return DBUtils.selectLongValue(con, sql, replica.getId(), ChecksumStatus.CORRUPT.ordinal()); 1073 } finally { 1074 ArchiveDBConnection.release(con); 1075 } 1076 } 1077 1078 /** 1079 * Method for retrieving the list of the files in the replica which have a incorrect checksum. E.g. the 1080 * checksum_status is set to CORRUPT. 1081 * <p> 1082 * This method does not contact the replica, it only uses the local database. 1083 * 1084 * @param replica The replica to find the list of corrupted files for. 1085 * @return The list of files which have wrong checksums. 1086 * @throws ArgumentNotValid If the replica is null. 1087 */ 1088 @Override 1089 public Iterable<String> getWrongFilesInLastUpdate(Replica replica) throws ArgumentNotValid { 1090 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 1091 Connection con = ArchiveDBConnection.get(); 1092 // The SQL statement to retrieve the filenames for the corrupted files 1093 // in the replicafileinfo table for the given replica. 1094 String sql = "SELECT filename FROM replicafileinfo " 1095 + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id " 1096 + "WHERE replica_id = ? AND checksum_status = ?"; 1097 try { 1098 return DBUtils.selectStringList(con, sql, replica.getId(), ChecksumStatus.CORRUPT.ordinal()); 1099 } finally { 1100 ArchiveDBConnection.release(con); 1101 } 1102 } 1103 1104 /** 1105 * Method for retrieving the number of files within a replica. This count all the files which are not missing from 1106 * the replica, thus all entries in the replicafileinfo table which has the filelist_status set to OK. It is ignored 1107 * whether the files has a correct checksum. 1108 * <p> 1109 * This method does not contact the replica, it only uses the local database. 1110 * 1111 * @param replica The replica to count the number of files for. 1112 * @return The number of files within the replica. 1113 * @throws ArgumentNotValid If the replica is null. 1114 */ 1115 @Override 1116 public long getNumberOfFiles(Replica replica) throws ArgumentNotValid { 1117 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 1118 Connection con = ArchiveDBConnection.get(); 1119 // The SQL statement to retrieve the amount of entries in the 1120 // replicafileinfo table for the replica which have the 1121 // filelist_status set to OK. 1122 String sql = "SELECT COUNT(*) FROM replicafileinfo WHERE replica_id = ? AND filelist_status = ?"; 1123 try { 1124 return DBUtils.selectLongValue(con, sql, replica.getId(), FileListStatus.OK.ordinal()); 1125 } finally { 1126 ArchiveDBConnection.release(con); 1127 } 1128 } 1129 1130 /** 1131 * Method for finding a replica with a valid version of a file. This method is used in order to find a replica from 1132 * which a file should be retrieved, during the process of restoring a corrupt file on another replica. 1133 * <p> 1134 * This replica must of the type bitarchive, since a file cannot be retrieved from a checksum replica. 1135 * 1136 * @param filename The name of the file which needs to have a valid version in a bitarchive. 1137 * @return A bitarchive which contains a valid version of the file, or null if no such bitarchive exists. 1138 * @throws ArgumentNotValid If the filename is null or the empty string. 1139 */ 1140 @Override 1141 public Replica getBitarchiveWithGoodFile(String filename) throws ArgumentNotValid { 1142 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 1143 1144 Connection con = ArchiveDBConnection.get(); 1145 try { 1146 // Retrieve a list of replicas where the the checksum status is OK 1147 List<String> replicaIds = ReplicaCacheHelpers.retrieveReplicaIdsWithOKChecksumStatus(filename, con); 1148 1149 // go through the list, and return the first valid bitarchive-replica. 1150 for (String repId : replicaIds) { 1151 // Retrieve the replica type. 1152 ReplicaType repType = ReplicaCacheHelpers.retrieveReplicaType(repId, con); 1153 1154 // If the replica is of type BITARCHIVE then return it. 1155 if (repType.equals(ReplicaType.BITARCHIVE)) { 1156 log.trace("The replica with id '{}' is the first bitarchive replica which contains the file '{}' " 1157 + "with a valid checksum.", repId, filename); 1158 return Replica.getReplicaFromId(repId); 1159 } 1160 } 1161 } finally { 1162 ArchiveDBConnection.release(con); 1163 } 1164 1165 // Notify the administrator about that no proper bitarchive was found. 1166 NotificationsFactory.getInstance().notify( 1167 "No bitarchive replica " + "was found which contains the file '" + filename + "'.", 1168 NotificationType.WARNING); 1169 1170 // If no bitarchive exists that contains the file with a OK checksum_status. 1171 // then return null. 1172 return null; 1173 } 1174 1175 /** 1176 * Method for finding a replica with a valid version of a file. This method is used in order to find a replica from 1177 * which a file should be retrieved, during the process of restoring a corrupt file on another replica. 1178 * <p> 1179 * This replica must of the type bitarchive, since a file cannot be retrieved from a checksum replica. 1180 * 1181 * @param filename The name of the file which needs to have a valid version in a bitarchive. 1182 * @param badReplica The Replica which has a bad copy of the given file 1183 * @return A bitarchive which contains a valid version of the file, or null if no such bitarchive exists (in which 1184 * case, a notification is sent) 1185 * @throws ArgumentNotValid If the replica is null or the filename is either null or the empty string. 1186 */ 1187 @Override 1188 public Replica getBitarchiveWithGoodFile(String filename, Replica badReplica) throws ArgumentNotValid { 1189 ArgumentNotValid.checkNotNull(badReplica, "Replica badReplica"); 1190 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 1191 1192 Connection con = ArchiveDBConnection.get(); 1193 try { 1194 // Then retrieve a list of replicas where the the checksum status is 1195 // OK 1196 List<String> replicaIds = ReplicaCacheHelpers.retrieveReplicaIdsWithOKChecksumStatus(filename, con); 1197 1198 // Make sure, that the bad replica is not returned. 1199 replicaIds.remove(badReplica.getId()); 1200 1201 // go through the list, and return the first valid 1202 // bitarchive-replica. 1203 for (String repId : replicaIds) { 1204 // Retrieve the replica type. 1205 ReplicaType repType = ReplicaCacheHelpers.retrieveReplicaType(repId, con); 1206 1207 // If the replica is of type BITARCHIVE then return it. 1208 if (repType.equals(ReplicaType.BITARCHIVE)) { 1209 log.trace( 1210 "The replica with id '{}' is the first bitarchive replica which contains the file '{}' with a valid checksum.", 1211 repId, filename); 1212 return Replica.getReplicaFromId(repId); 1213 } 1214 } 1215 } finally { 1216 ArchiveDBConnection.release(con); 1217 } 1218 // Notify the administrator about that no proper bitarchive was found, and log the incidence 1219 final String msg = "No bitarchive replica " + "was found which contains the file '" + filename + "'."; 1220 log.warn(msg); 1221 NotificationsFactory.getInstance().notify(msg, NotificationType.WARNING); 1222 1223 return null; 1224 } 1225 1226 /** 1227 * Method for updating a specific entry in the replicafileinfo table. Based on the filename, checksum and replica it 1228 * is verified whether a file is missing, corrupt or valid. 1229 * 1230 * @param filename Name of the file. 1231 * @param checksum The checksum of the file. Is allowed to be null, if no file is found. 1232 * @param replica The replica where the file exists. 1233 * @throws ArgumentNotValid If the filename is null or the empty string, or if the replica is null. 1234 */ 1235 @Override 1236 public void updateChecksumInformationForFileOnReplica(String filename, String checksum, Replica replica) 1237 throws ArgumentNotValid { 1238 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 1239 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 1240 1241 PreparedStatement statement = null; 1242 Connection connection = null; 1243 try { 1244 connection = ArchiveDBConnection.get(); 1245 1246 long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection); 1247 1248 Date now = new Date(Calendar.getInstance().getTimeInMillis()); 1249 1250 // handle differently whether a checksum was retrieved. 1251 if (checksum == null) { 1252 // Set to MISSING! and do not update the checksum 1253 // (cannot insert null). 1254 String sql = "UPDATE replicafileinfo " 1255 + "SET filelist_status = ?, checksum_status = ?, filelist_checkdatetime = ? " 1256 + "WHERE replicafileinfo_guid = ?"; 1257 statement = DBUtils.prepareStatement(connection, sql, FileListStatus.MISSING.ordinal(), 1258 ChecksumStatus.UNKNOWN.ordinal(), now, guid); 1259 } else { 1260 String sql = "UPDATE replicafileinfo " 1261 + "SET checksum = ?, filelist_status = ?, filelist_checkdatetime = ? " 1262 + "WHERE replicafileinfo_guid = ?"; 1263 statement = DBUtils.prepareStatement(connection, sql, checksum, FileListStatus.OK.ordinal(), now, guid); 1264 } 1265 statement.executeUpdate(); 1266 connection.commit(); 1267 } catch (Exception e) { 1268 throw new IOFailure("Could not update single checksum entry.", e); 1269 } finally { 1270 DBUtils.closeStatementIfOpen(statement); 1271 if (connection != null) { 1272 ArchiveDBConnection.release(connection); 1273 } 1274 } 1275 } 1276 1277 /** 1278 * Method for inserting a line of Admin.Data into the database. It is assumed that it is a '0.4' admin.data line. 1279 * 1280 * @param line The line to insert into the database. 1281 * @return Whether the line was valid. 1282 * @throws ArgumentNotValid If the line is null. If it is empty, then it is logged. 1283 */ 1284 public boolean insertAdminEntry(String line) throws ArgumentNotValid { 1285 ArgumentNotValid.checkNotNull(line, "String line"); 1286 1287 Connection con = ArchiveDBConnection.get(); 1288 log.trace("Insert admin entry begun"); 1289 final int lengthFirstPart = 4; 1290 final int lengthOtherParts = 3; 1291 try { 1292 // split into parts. First contains 1293 String[] split = line.split(" , "); 1294 1295 // Retrieve the basic entry data. 1296 String[] entryData = split[0].split(" "); 1297 1298 // Check if enough elements 1299 if (entryData.length < lengthFirstPart) { 1300 log.warn("Bad line in Admin.data: {}", line); 1301 return false; 1302 } 1303 1304 String filename = entryData[0]; 1305 String checksum = entryData[1]; 1306 1307 long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con); 1308 1309 // If the fileId is -1, then the file is not within the file table. 1310 // Thus insert it and retrieve the id. 1311 if (fileId == -1) { 1312 fileId = ReplicaCacheHelpers.insertFileIntoDB(filename, con); 1313 } 1314 log.trace("Step 1 completed (file created in database)."); 1315 // go through the replica specifics. 1316 for (int i = 1; i < split.length; i++) { 1317 String[] repInfo = split[i].split(" "); 1318 1319 // check if correct size 1320 if (repInfo.length < lengthOtherParts) { 1321 log.warn("Bad replica information '{}' in line '{}'", split[i], line); 1322 continue; 1323 } 1324 1325 // retrieve the data for this replica 1326 String replicaId = Channels.retrieveReplicaFromIdentifierChannel(repInfo[0]).getId(); 1327 ReplicaStoreState replicaUploadStatus = ReplicaStoreState.valueOf(repInfo[1]); 1328 Date replicaDate = new Date(Long.parseLong(repInfo[2])); 1329 1330 // retrieve the guid of the replicafileinfo. 1331 long guid = ReplicaCacheHelpers.retrieveReplicaFileInfoGuid(fileId, replicaId, con); 1332 1333 // Update the replicaFileInfo with the information. 1334 ReplicaCacheHelpers.updateReplicaFileInfo(guid, checksum, replicaDate, replicaUploadStatus, con); 1335 } 1336 } catch (IllegalState e) { 1337 log.warn("Received IllegalState exception while parsing.", e); 1338 return false; 1339 } finally { 1340 ArchiveDBConnection.release(con); 1341 } 1342 log.trace("Insert admin entry finished"); 1343 return true; 1344 } 1345 1346 /** 1347 * Method for setting a specific value for the filelistdate and the checksumlistdate for all the replicas. 1348 * 1349 * @param date The new date for the checksumlist and filelist for all the replicas. 1350 * @throws ArgumentNotValid If the date is null. 1351 */ 1352 public void setAdminDate(Date date) throws ArgumentNotValid { 1353 ArgumentNotValid.checkNotNull(date, "Date date"); 1354 1355 Connection con = ArchiveDBConnection.get(); 1356 try { 1357 // set the date for the replicas. 1358 for (Replica rep : Replica.getKnown()) { 1359 ReplicaCacheHelpers.setFilelistDateForReplica(rep, date, con); 1360 ReplicaCacheHelpers.setChecksumlistDateForReplica(rep, date, con); 1361 } 1362 } finally { 1363 ArchiveDBConnection.release(con); 1364 } 1365 } 1366 1367 /** 1368 * Method for telling whether the database is empty. The database is empty if it does not contain any files. 1369 * <p> 1370 * The database will not be entirely empty, since the replicas are put into the replica table during the 1371 * instantiation of this class, but if the file table is empty, then the replicafileinfo table is also empty, and 1372 * the database will be considered empty. 1373 * 1374 * @return Whether the file list is empty. 1375 */ 1376 public boolean isEmpty() { 1377 // The SQL statement to retrieve the amount of entries in the 1378 // file table. No arguments (represented by empty Object array). 1379 final String sql = "SELECT COUNT(*) FROM file"; 1380 Connection con = ArchiveDBConnection.get(); 1381 try { 1382 return DBUtils.selectLongValue(con, sql, new Object[0]) == 0L; 1383 } finally { 1384 ArchiveDBConnection.release(con); 1385 } 1386 } 1387 1388 /** 1389 * Method to print all the tables in the database. 1390 * 1391 * @return all the tables as a text string 1392 */ 1393 public String retrieveAsText() { 1394 StringBuilder res = new StringBuilder(); 1395 String sql = ""; 1396 Connection connection = ArchiveDBConnection.get(); 1397 // Go through the replica table 1398 List<String> reps = ReplicaCacheHelpers.retrieveIdsFromReplicaTable(connection); 1399 res.append("Replica table: " + reps.size() + "\n"); 1400 res.append("GUID \trepId \trepName \trepType \tfileupdate \tchecksumupdated" + "\n"); 1401 res.append("------------------------------------------------------------\n"); 1402 for (String repId : reps) { 1403 // retrieve the replica_name 1404 sql = "SELECT replica_guid FROM replica WHERE replica_id = ?"; 1405 String repGUID = DBUtils.selectStringValue(connection, sql, repId); 1406 // retrieve the replica_name 1407 sql = "SELECT replica_name FROM replica WHERE replica_id = ?"; 1408 String repName = DBUtils.selectStringValue(connection, sql, repId); 1409 // retrieve the replica_type 1410 sql = "SELECT replica_type FROM replica WHERE replica_id = ?"; 1411 int repType = DBUtils.selectIntValue(connection, sql, repId); 1412 // retrieve the date for last updated 1413 sql = "SELECT filelist_updated FROM replica WHERE replica_id = ?"; 1414 String filelistUpdated = DBUtils.selectStringValue(connection, sql, repId); 1415 // retrieve the date for last updated 1416 sql = "SELECT checksum_updated FROM replica WHERE replica_id = ?"; 1417 String checksumUpdated = DBUtils.selectStringValue(connection, sql, repId); 1418 1419 // Print 1420 res.append(repGUID + "\t" + repId + "\t" + repName + "\t" + ReplicaType.fromOrdinal(repType).name() + "\t" 1421 + filelistUpdated + "\t" + checksumUpdated + "\n"); 1422 } 1423 res.append("\n"); 1424 1425 // Go through the file table 1426 List<String> fileIds = ReplicaCacheHelpers.retrieveIdsFromFileTable(connection); 1427 res.append("File table : " + fileIds.size() + "\n"); 1428 res.append("fileId \tfilename" + "\n"); 1429 res.append("--------------------" + "\n"); 1430 for (String fileId : fileIds) { 1431 // retrieve the file_name 1432 sql = "SELECT filename FROM file WHERE file_id = ?"; 1433 String fileName = DBUtils.selectStringValue(connection, sql, fileId); 1434 1435 // Print 1436 res.append(fileId + " \t " + fileName + "\n"); 1437 } 1438 res.append("\n"); 1439 1440 // Go through the replicafileinfo table 1441 List<String> rfiIds = ReplicaCacheHelpers.retrieveIdsFromReplicaFileInfoTable(connection); 1442 res.append("ReplicaFileInfo table : " + rfiIds.size() + "\n"); 1443 res.append("GUID \trepId \tfileId \tchecksum \tus \t\tfls \tcss \tfilelistCheckdate \tchecksumCheckdate\n"); 1444 res.append("---------------------------------------------------------------------------------------------------------\n"); 1445 for (String rfiGUID : rfiIds) { 1446 // FIXME Replace with one SELECT instead of one SELECT for each row! DOH! 1447 // retrieve the replica_id 1448 sql = "SELECT replica_id FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1449 String replicaId = DBUtils.selectStringValue(connection, sql, rfiGUID); 1450 // retrieve the file_id 1451 sql = "SELECT file_id FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1452 String fileId = DBUtils.selectStringValue(connection, sql, rfiGUID); 1453 // retrieve the checksum 1454 sql = "SELECT checksum FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1455 String checksum = DBUtils.selectStringValue(connection, sql, rfiGUID); 1456 // retrieve the upload_status 1457 sql = "SELECT upload_status FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1458 int uploadStatus = DBUtils.selectIntValue(connection, sql, rfiGUID); 1459 // retrieve the filelist_status 1460 sql = "SELECT filelist_status FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1461 int filelistStatus = DBUtils.selectIntValue(connection, sql, rfiGUID); 1462 // retrieve the checksum_status 1463 sql = "SELECT checksum_status FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1464 int checksumStatus = DBUtils.selectIntValue(connection, sql, rfiGUID); 1465 // retrieve the filelist_checkdatetime 1466 sql = "SELECT filelist_checkdatetime FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1467 String filelistCheckdatetime = DBUtils.selectStringValue(connection, sql, rfiGUID); 1468 // retrieve the checksum_checkdatetime 1469 sql = "SELECT checksum_checkdatetime FROM replicafileinfo WHERE replicafileinfo_guid = ?"; 1470 String checksumCheckdatetime = DBUtils.selectStringValue(connection, sql, rfiGUID); 1471 1472 // Print 1473 res.append(rfiGUID + " \t" + replicaId + "\t" + fileId + "\t" + checksum + "\t" 1474 + ReplicaStoreState.fromOrdinal(uploadStatus).name() + " \t" 1475 + FileListStatus.fromOrdinal(filelistStatus).name() + "\t" 1476 + ChecksumStatus.fromOrdinal(checksumStatus).name() + "\t" + filelistCheckdatetime + "\t" 1477 + checksumCheckdatetime + "\n"); 1478 } 1479 res.append("\n"); 1480 1481 return res.toString(); 1482 } 1483 1484 /** 1485 * Method for cleaning up. 1486 */ 1487 @Override 1488 public synchronized void cleanup() { 1489 instance = null; 1490 } 1491 1492}