001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.archive.arcrepositoryadmin; 025 026import java.io.BufferedReader; 027import java.io.File; 028import java.io.FileNotFoundException; 029import java.io.FileReader; 030import java.io.IOException; 031import java.util.ArrayList; 032import java.util.Date; 033import java.util.HashMap; 034import java.util.HashSet; 035import java.util.List; 036import java.util.Map; 037import java.util.Set; 038 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import dk.netarkivet.archive.ArchiveSettings; 043import dk.netarkivet.archive.arcrepository.distribute.StoreMessage; 044import dk.netarkivet.common.distribute.arcrepository.Replica; 045import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState; 046import dk.netarkivet.common.exceptions.ArgumentNotValid; 047import dk.netarkivet.common.exceptions.IOFailure; 048import dk.netarkivet.common.exceptions.UnknownID; 049import dk.netarkivet.common.utils.ApplicationUtils; 050import dk.netarkivet.common.utils.Settings; 051 052/** 053 * Class for accessing and manipulating the administrative data for the ArcRepository. In the current implementation, it 054 * consists of a file with a number of lines of the form: <filename/> <checksum/> <state/> 055 * <timestamp-for-last-state-change/> [,<bitarchive/> <storestatus/> <timestamp-for-last-state-change/>]* 056 * <p> 057 * This abstract class is overridden to give either a read/write or a readonly version of this class. 058 * 059 * @deprecated Use the DatabaseAdmin instead. 060 */ 061@Deprecated 062public abstract class AdminData { 063 private static final Logger log = LoggerFactory.getLogger(AdminData.class); 064 065 /** Admindata version. VersionNumber is the current version. */ 066 public static final String VERSION_NUMBER = "0.4"; 067 /** 068 * Admindata version. oldVersionNumber is the earlier but still valid version. 069 */ 070 private static final String OLD_VERSION_NUMBER = "0.3"; 071 /** Map containing a mapping from arcfilename to ArcRepositoryEntry. */ 072 protected Map<String, ArcRepositoryEntry> storeEntries = new HashMap<String, ArcRepositoryEntry>(); 073 /** 074 * General delimiter. TODO add constants class where these constants are placed. 075 */ 076 private static final String GENERAL_DELIMITER = " "; 077 078 /** 079 * The directory where the admin data resides, currently the directory: Settings.DIRS_ARCREPOSITORY_ADMIN. 080 */ 081 protected File adminDir; 082 083 /** The name of the admin file. */ 084 protected static final String ADMIN_FILE_NAME = "admin.data"; 085 086 /** 087 * List containing the names of all knownBitArchives. This list is updated in the setState() method But only used in 088 * the toString() method. 089 */ 090 protected List<String> knownBitArchives = new ArrayList<String>(); 091 092 /** The File object for the admin data file. */ 093 protected final File adminDataFile; 094 095 /** 096 * Common constructor for admin data. Reads current admin data from admin data file. 097 */ 098 protected AdminData() { 099 this.adminDir = new File(Settings.get(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN)); 100 ApplicationUtils.dirMustExist(adminDir); 101 102 adminDataFile = new File(adminDir, AdminData.ADMIN_FILE_NAME); 103 log.info("Using admin data file '{}'", adminDataFile.getAbsolutePath()); 104 105 if (adminDataFile.exists()) { 106 read(); // Load admindata into StoreEntries Map 107 } else { 108 log.warn("AdminDataFile ({}) was not found.", adminDataFile.getPath()); 109 } 110 } 111 112 /** 113 * Returns the one and only AdminData instance. 114 * 115 * @return the one and only AdminData instance. 116 */ 117 public static synchronized UpdateableAdminData getUpdateableInstance() { 118 return UpdateableAdminData.getInstance(); 119 } 120 121 /** 122 * Returns a read-only AdminData instance. 123 * 124 * @return a read-only AdminData instance. 125 */ 126 public static synchronized ReadOnlyAdminData getReadOnlyInstance() { 127 // no Singleton returned 128 return new ReadOnlyAdminData(); 129 } 130 131 /** 132 * Check, if there is an entry for a certain arcfile? 133 * 134 * @param arcfileName A given arcfile 135 * @return true, if there is an entry for the given arcfile 136 */ 137 public boolean hasEntry(String arcfileName) { 138 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName"); 139 return storeEntries.containsKey(arcfileName); 140 } 141 142 /** 143 * Return the ArcRepositoryEntry for a certain arcfileName. Returns null, if not found. 144 * 145 * @param arcfileName a certain filename 146 * @return the ArcRepositoryEntry for a certain arcfileName 147 */ 148 public ArcRepositoryEntry getEntry(String arcfileName) { 149 return storeEntries.get(arcfileName); 150 } 151 152 /** 153 * Tells whether there is a replyInfo associated with the given arcfile. If the file is not registered, a warning is 154 * logged and false is returned. 155 * 156 * @param arcfileName The arc file we want to reply a store request for. 157 * @return Whether setReplyInfo() has been called (and the replyInfo hasn't been removed since). 158 */ 159 public boolean hasReplyInfo(String arcfileName) { 160 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName"); 161 ArcRepositoryEntry entry = storeEntries.get(arcfileName); 162 if (entry == null) { 163 log.warn("No entry found in storeEntries for arcfilename: {}", arcfileName); 164 } 165 return entry != null && entry.hasReplyInfo(); 166 } 167 168 /** 169 * Returns whether or not a BitArchiveStoreState is registered for the given ARC file at the given bit archive. 170 * 171 * @param arcfileName The file to retrieve the state for 172 * @param replicaChannelName The name of the identification channel for the replica the state should be retrieved 173 * for. 174 * @return true if BitArchiveStoreState is registered, false otherwise. 175 */ 176 public boolean hasState(String arcfileName, String replicaChannelName) { 177 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "String arcfileName"); 178 ArgumentNotValid.checkNotNullOrEmpty(replicaChannelName, "String replicaChannelName"); 179 ArcRepositoryEntry entry = storeEntries.get(arcfileName); 180 if (entry == null) { 181 log.warn("No entry found in storeEntries for arcfilename: {}", arcfileName); 182 } 183 return entry != null && entry.hasStoreState(replicaChannelName); 184 } 185 186 /** 187 * Retrieves the storage state of a file for a specific replica. 188 * 189 * @param arcfileName The file to retrieve the state for. 190 * @param replicaChannelName The name of the identification channel for the replica the state should be retrieved 191 * for. 192 * @return The storage state. 193 * @throws UnknownID When no record exists. 194 */ 195 public ReplicaStoreState getState(String arcfileName, String replicaChannelName) throws UnknownID { 196 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "String arcfileName"); 197 ArgumentNotValid.checkNotNullOrEmpty(replicaChannelName, "String replicaChannelName"); 198 if (!hasState(arcfileName, replicaChannelName)) { 199 throw new UnknownID("No store state recorded for '" + arcfileName + "' in '" + replicaChannelName + "'"); 200 } 201 return storeEntries.get(arcfileName).getStoreState(replicaChannelName); 202 } 203 204 /** 205 * Get Checksum for a given arcfile. 206 * 207 * @param arcfileName Unique reference to file for which to retrieve checksum 208 * @return checksum the latest registered reference checksum or null, if no reference checksum is available 209 * @throws UnknownID if the file is not registered 210 * @throws ArgumentNotValid If the arcFileName is either null or the empty string. 211 */ 212 public String getCheckSum(String arcfileName) throws ArgumentNotValid, UnknownID { 213 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName"); 214 if (!hasEntry(arcfileName)) { 215 throw new UnknownID("Don't know anything about file '" + arcfileName + "'"); 216 } 217 return storeEntries.get(arcfileName).getChecksum(); 218 } 219 220 /** 221 * Reads the admin data from a file. If the data read is a valid old version the it is converted to the new version 222 * and written to disk. 223 * 224 * @throws IOFailure on trouble reading from file 225 */ 226 protected void read() throws IOFailure { 227 try { 228 BufferedReader reader = null; 229 try { 230 reader = new BufferedReader(new FileReader(adminDataFile)); 231 /* 232 * Check version. When this check is done, we either have - dataVersion.equals(versionNumber)) && 233 * !validOldVersion, or - !dataVersion.equals(versionNumber)) && validOldVersion The latter applies if 234 * the data file was empty. 235 */ 236 String dataVersion = OLD_VERSION_NUMBER; 237 238 boolean validOldVersion = false; 239 String tempVersion = reader.readLine(); 240 if (tempVersion != null) { 241 dataVersion = tempVersion; 242 } 243 if (dataVersion.equals(OLD_VERSION_NUMBER)) { 244 log.debug("admindata version: {}", OLD_VERSION_NUMBER); 245 validOldVersion = true; 246 } 247 if (!dataVersion.equals(VERSION_NUMBER) && !validOldVersion) { 248 throw new IOFailure("Invalid version" + dataVersion); 249 } 250 // Now read the data file, depending on version. 251 if (dataVersion.equals(VERSION_NUMBER)) { 252 log.debug("admindata version: {}", VERSION_NUMBER); 253 readCurrentVersion(reader); 254 } else { 255 readValidOldVersion(reader); 256 } 257 } finally { 258 if (reader != null) { 259 reader.close(); 260 } 261 } 262 } catch (FileNotFoundException e) { 263 throw new IOFailure("AdminData couldn't find admin data file", e); 264 } catch (IOException e) { 265 throw new IOFailure("AdminData couldn't find admin data file", e); 266 } 267 } 268 269 /** 270 * Read the valid old version (0.3) of the admin data. The valid old version contains lines of the format 271 * <filename/> <checksum/> [<bitarchive/> <storestatus/>]* The same filename may occur multiple times, but must 272 * always have the same checksum. This indicates updates of the storestatus for the file. Updates to checksum happen 273 * only during 'correct' operations and cause the entire file to be written, leaving the changed entry with the new 274 * checksum only. An entry-line is considered corrupt (!valid) if any of the following occur: There is no checksum. 275 * There is a bitarchive with a missing or invalid status The checksum does not match a previously found checksum. 276 * NB: If we come upon a corrupt entry-line, the entry for the filename in question is removed from admin.data 277 * 278 * @param reader The stream to read the input from. 279 */ 280 private void readValidOldVersion(BufferedReader reader) { 281 String s; 282 String logMessage; 283 try { 284 while ((s = reader.readLine()) != null) { 285 String[] parts = s.split(" "); 286 boolean valid = true; 287 String filename = parts[0]; 288 if (parts.length < 2 || parts.length % 2 != 0) { 289 logMessage = "Corrupt admin data file: Too few or not " + "an even number of fields for " 290 + filename + ": " + s; 291 log.warn(logMessage); 292 valid = false; 293 } 294 if (parts.length > 1) { 295 String checksum = parts[1]; 296 if (hasEntry(filename)) { 297 if (!checksum.equals(getCheckSum(filename))) { 298 log.warn("Wrong checksum encountered in admin data for known file '{}': Old={} New={}", 299 filename, getCheckSum(filename), checksum); 300 // this means, that the existing entry is removed 301 // from admin.data 302 valid = false; 303 } 304 } else { 305 StoreMessage replyInfo = null; 306 storeEntries.put(filename, new ArcRepositoryEntry(filename, checksum, replyInfo)); 307 } 308 } else { // parts.length == 1 309 if (hasEntry(filename)) { 310 log.debug("Entry is invalid, because no checksumstring found in line: {}", s); 311 // this means, that the existing entry 312 // is removed from admin.data 313 valid = false; 314 } else { 315 // Ignore this entry entirely, if not already 316 // entry for this filename 317 log.warn("This entry-line is ignored, because no checksumstring found in line: {}", s); 318 continue; 319 } 320 } 321 // If the entry is invalid, no reason to try parsing states 322 if (valid) { 323 ArcRepositoryEntry entry = storeEntries.get(filename); 324 for (int i = 2; i < parts.length; i += 2) { 325 try { 326 entry.setStoreState(parts[i], ReplicaStoreState.valueOf(parts[i + 1])); 327 } catch (IllegalArgumentException e) { 328 log.warn("Corrupt admin data entry. ", e); 329 valid = false; 330 break; 331 } 332 } 333 } 334 // Note that the previous if could set valid to false 335 if (!valid) { 336 log.warn("Entry for file '{}' with checksum '{}' is invalid and therefore removed after reading " 337 + "line with inconsistent information: {}", filename, storeEntries.get(filename) 338 .getChecksum(), s); 339 storeEntries.remove(filename); 340 } 341 } 342 } catch (IOException e) { 343 final String message = "Failed to read admin data from '" + adminDataFile.getPath() + "'"; 344 log.error(message); 345 throw new IOFailure(message, e); 346 } 347 } 348 349 /** 350 * Read the current version (0.4) of the admin data. The current version contains lines of the format <filename/> 351 * <checksum/> <state/> <timestamp-for-last-state-change/> [,<bitarchive/> <storestatus/> 352 * <timestamp-for-last-state-change/>]* 353 * <p> 354 * The same filename may occur multiple times, but must always have the same checksum. This indicates updates of the 355 * storestatus for the file. Updates to checksum happen only during 'correct' operations and cause the entire file 356 * to be written, leaving the changed entry with the new checksum only. An entry is considered corrupt (!valid) if 357 * any of the following occur: - There is no checksum. - There is no state - timestamp-for-last-state-change is 358 * missing - There is a bitarchive with a missing or invalid status - The checksum does not match a previously found 359 * checksum. NB: If we come upon a corrupt entry-line, the entry for the filename in question is removed from 360 * admin.data 361 * 362 * @param reader The stream to read the input from. 363 * @throws ArgumentNotValid If reader is null. 364 * @throws IOFailure If an error occurred with access to the admin.data. 365 */ 366 private void readCurrentVersion(BufferedReader reader) throws ArgumentNotValid, IOFailure { 367 ArgumentNotValid.checkNotNull(reader, "reader"); 368 369 // The expected number of elements in first part of a line. 370 final int firstPartLength = 4; 371 372 // indices for the different parts in the first line. 373 final int indexFirstPartFilename = 0; 374 final int indexFirstPartChecksum = 1; 375 final int indexFirstPartState = 2; 376 final int indexFirstPartTimestamp = 3; 377 378 // The expected number of elements in the other parts of the line. 379 final int otherPartsLength = 3; 380 381 // The indices for the different parts in the other lines. 382 final int indexOtherPartsReplica = 0; 383 final int indexOtherPartsState = 1; 384 final int indexOtherPartsTimestamp = 2; 385 386 String s; 387 try { 388 while ((s = reader.readLine()) != null) { 389 390 // Split the line up in parts defined by 391 // the ENTRY_COMPONENT_SEPARATOR_STRING 392 String[] parts = s.split(ArcRepositoryEntry.ENTRY_COMPONENT_SEPARATOR_STRING); 393 394 // parts[0] should now contain the <filename> <checksum> 395 // <state> <timestamp-for-last-state-change> 396 397 // For i=0,1.. : parts[1+i] contains the state-information 398 // for the file on our bitarchives. 399 400 String[] firstparts = parts[0].split(GENERAL_DELIMITER); 401 402 if (firstparts.length != firstPartLength) { 403 String logMessage = "Corrupt admin data file: One of the components '<filename> <checksum> " 404 + "<state> <timestamp-for-last-state-change>' is missing from this line: " + s 405 + "\nIgnoring this line"; 406 log.warn(logMessage); 407 continue; // ignore this linie, and go to next line 408 } 409 410 /** 411 * Parse the different components of filename> <checksum> <state> <timestamp-for-last-state-change> 412 */ 413 String filename = firstparts[indexFirstPartFilename]; 414 String checksumString = firstparts[indexFirstPartChecksum]; 415 String stateString = firstparts[indexFirstPartState]; 416 String timestampString = firstparts[indexFirstPartTimestamp]; 417 log.trace("Found (filename, checksum, state, timestamp): {}, {}, {}, {}", filename, checksumString, 418 stateString, timestampString); 419 420 ReplicaStoreState state = ReplicaStoreState.valueOf(stateString); 421 Long tempLong = Long.parseLong(timestampString); 422 Date timestampAsDate = new Date(tempLong); 423 424 // Check, if we already have entry for this filename 425 if (hasEntry(filename)) { 426 // check, if 'checksum' equals checksum-value in 427 // existing entry 428 if (!checksumString.equals(getCheckSum(filename))) { 429 log.warn("Wrong checksum encountered in admin data for known file '{}': Old={} New={}. " 430 + "Entry removed from admin.data and the remaining line ignored: {}", filename, 431 getCheckSum(filename), checksumString, s); 432 storeEntries.remove(filename); 433 continue; // Stop processing, and go to next line 434 } 435 } else { 436 // Add new entry for filename: 437 StoreMessage replyInfo = null; 438 storeEntries.put(filename, new ArcRepositoryEntry(filename, checksumString, replyInfo)); 439 } 440 441 // Parse the remaining parts[1..] array 442 // Expected format: 443 // <bitarchive> <storestatus> <timestamp-for-last-state-change> 444 ArcRepositoryEntry entry = getEntry(filename); 445 for (int i = 1; i < parts.length; i++) { 446 String[] bitparts = parts[i].split(GENERAL_DELIMITER); 447 if (bitparts.length != otherPartsLength) { 448 final String message = "Line incomplete. Expected 3 elements: <bitarchive> <storestatus> " 449 + "<timestamp-for-last-state-change>. Found only " + bitparts.length 450 + " elements in line: " + s; 451 log.warn(message); 452 } else { 453 String bitarchiveString = bitparts[indexOtherPartsReplica]; 454 String storestatusString = bitparts[indexOtherPartsState]; 455 timestampString = bitparts[indexOtherPartsTimestamp]; 456 state = ReplicaStoreState.valueOf(storestatusString); 457 tempLong = Long.parseLong(timestampString); 458 timestampAsDate = new Date(tempLong); 459 entry.setStoreState(bitarchiveString, state, timestampAsDate); 460 } 461 } 462 } 463 } catch (IOException e) { 464 final String message = "Failed to read admin data from '" + adminDataFile.getPath() + "'"; 465 log.error(message); 466 throw new IOFailure(message, e); 467 } 468 } 469 470 /** 471 * Returns a set of the all arcfile names in the repository. 472 * 473 * @return the set of files in the repository 474 */ 475 public Set<String> getAllFileNames() { 476 Set<String> knownFiles = new HashSet<String>(); 477 for (Map.Entry<String, ArcRepositoryEntry> entry : storeEntries.entrySet()) { 478 knownFiles.add(entry.getKey()); 479 } 480 return knownFiles; 481 } 482 483 /** 484 * Returns a set of the arcfile names that are in a given state for a specific bitarchive in the repository. 485 * 486 * @param replica the object representing the BA 487 * @param state the state to look for, e.g. ReplicaStoreState.STATE_COMPLETED 488 * @return the set of files in the repository with the given state 489 */ 490 public Set<String> getAllFileNames(Replica replica, ReplicaStoreState state) { 491 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 492 ArgumentNotValid.checkNotNull(state, "BitArchiveStoreState state"); 493 String replicaKey = replica.getIdentificationChannel().getName(); 494 Set<String> completedFiles = new HashSet<String>(); 495 for (Map.Entry<String, ArcRepositoryEntry> entry : storeEntries.entrySet()) { 496 if (entry.getValue().getStoreState(replicaKey) == state) { 497 completedFiles.add(entry.getKey()); 498 } 499 } 500 return completedFiles; 501 } 502 503 /** 504 * Return info about current object as String. 505 * 506 * @return info about current object as String. 507 */ 508 public String toString() { 509 StringBuffer out = new StringBuffer(); 510 out.append("\nAdminData:"); 511 out.append("\nKnown bitarchives:"); 512 out.append(knownBitArchives.toString()); 513 out.append(getAllFileNames().toString()); 514 return out.toString(); 515 } 516 517}