001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.archive.arcrepository.bitpreservation; 024 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import dk.netarkivet.archive.arcrepositoryadmin.ArcRepositoryEntry; 034import dk.netarkivet.common.distribute.arcrepository.Replica; 035import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState; 036import dk.netarkivet.common.distribute.arcrepository.ReplicaType; 037import dk.netarkivet.common.exceptions.ArgumentNotValid; 038 039/** 040 * This class collects the available bit preservation information for a file. <br> 041 * This information is the following: <br> 042 * 1) admin information for the file for each replica and<br> 043 * <br> 044 * 2) the actual upload status 045 */ 046public class FilePreservationState implements PreservationState { 047 048 /** The log. */ 049 private static final Logger log = LoggerFactory.getLogger(FilePreservationState.class); 050 051 /** the name of the preserved file. */ 052 private String filename; 053 054 /** the information as seen by the ArcRepository. */ 055 private ArcRepositoryEntry adminStatus; 056 057 /** 058 * The checksums of the file in the individual replica. Normally, there will only be one entry in the list, but it 059 * must also handle the case where multiple copies exist in a replica. 060 */ 061 private Map<Replica, List<String>> replica2checksum; 062 063 /** 064 * Create new instance of the preservation status for a file. 065 * 066 * @param filename The filename to get status for 067 * @param admindata The admin data for the file 068 * @param checksumMap The map with the checksums for this file in all replicas 069 * @throws ArgumentNotValid if filename is null or empty string, or if admindata is null. 070 */ 071 FilePreservationState(String filename, ArcRepositoryEntry admindata, Map<Replica, List<String>> checksumMap) 072 throws ArgumentNotValid { 073 ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename"); 074 ArgumentNotValid.checkNotNull(admindata, "ArcRepositoryEntry admindata"); 075 ArgumentNotValid.checkNotNull(checksumMap, "Map<Replica, List<String>> checksumMap"); 076 this.filename = filename; 077 adminStatus = admindata; 078 replica2checksum = checksumMap; 079 } 080 081 /** 082 * Get the checksum of this file in a specific replica. 083 * 084 * @param replica The replica to get the checksum from. 085 * @return The file's checksum, if it is present in the replica, or "" if it either is absent or an error occurred. 086 */ 087 public List<String> getReplicaChecksum(Replica replica) { 088 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 089 if (replica2checksum.containsKey(replica)) { 090 return replica2checksum.get(replica); 091 } else { 092 return Collections.emptyList(); 093 } 094 } 095 096 /** 097 * Get the MD5 checksum stored in the admin data. 098 * 099 * @return Checksum value as found in the admin data given at creation. 100 */ 101 public String getAdminChecksum() { 102 return adminStatus.getChecksum(); 103 } 104 105 /** 106 * Get the status of the file in a replica, according to the admin data. This returns the status as a string for 107 * presentation purposes only. 108 * 109 * @param replica The replica to get status for 110 * @return Status that the admin data knows for this file in the replica. 111 */ 112 public String getAdminReplicaState(Replica replica) { 113 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 114 ReplicaStoreState state = getAdminBitarchiveStoreState(replica); 115 if (state != null) { 116 return state.toString(); 117 } else { 118 return "No state"; 119 } 120 } 121 122 /** 123 * Get the status of the file in a replica, according to the admin data. 124 * 125 * @param replica The replica to get status for 126 * @return Status that the admin data knows for this file in the replica. 127 */ 128 private ReplicaStoreState getAdminBitarchiveStoreState(Replica replica) { 129 String bamonname = replica.getIdentificationChannel().getName(); 130 return adminStatus.getStoreState(bamonname); 131 } 132 133 /** 134 * Check if the admin data reflect the actual status of the archive. 135 * <p> 136 * Admin State checking: For each replica the admin state is compared to the checksum received from the replica. 137 * <p> 138 * If no checksum is received from the replica the valid admin states are UPLOAD_STARTED and UPLOAD_FAILED. If a 139 * checksum is received from the replica the valid admin state is UPLOAD_COMPLETED Admin checksum checking: The 140 * admin checksum must match the majority of reported checksums. 141 * <p> 142 * Notice that a valid Admin data record does NOT imply that everything is ok. Specifically a file may be missing 143 * from a replica, or the checksum of a file in a replica may be wrong. 144 * 145 * @return true, if admin data match the state of the replicas, false otherwise 146 */ 147 public boolean isAdminDataOk() { 148 // Check the bitarchive states against the admin information 149 for (Replica r : Replica.getKnown()) { 150 ReplicaStoreState adminstate = getAdminBitarchiveStoreState(r); 151 List<String> checksum = getReplicaChecksum(r); 152 153 // If we find an error, return false, otherwise go on to the rest. 154 if (checksum.size() == 0) { 155 if (adminstate != ReplicaStoreState.UPLOAD_STARTED && adminstate != ReplicaStoreState.UPLOAD_FAILED) { 156 return false; 157 } 158 } else { 159 if (adminstate != ReplicaStoreState.UPLOAD_COMPLETED) { 160 return false; 161 } 162 if (getAdminChecksum().length() == 0) { 163 return false; 164 } 165 } 166 } 167 168 // If we reach here, we either have no checksums anywhere or 169 // admin has a checksum, which should then agree with the majority 170 return isAdminCheckSumOk(); 171 } 172 173 /** 174 * Check if the file is missing from a replica. 175 * 176 * @param replica the replica to check. 177 * @return true if the file is missing from the replica. 178 */ 179 public boolean fileIsMissing(Replica replica) { 180 return getReplicaChecksum(replica).size() == 0; 181 } 182 183 /** 184 * Returns a reference to a bitarchive replica that contains a version of the file with the correct checksum. 185 * <p> 186 * The correct checksum is defined as the checksum that the majority of the replicas and admin data agree upon. 187 * <p> 188 * If no bitarchive replica exists with a correct version of the file null is returned. 189 * 190 * @return the name of the reference replica or null if no reference exists. 191 */ 192 public Replica getReferenceBitarchive() { 193 String referenceCheckSum = getReferenceCheckSum(); 194 log.trace("Reference-checksum for file '{}' is '{}'", filename, referenceCheckSum); 195 if ("".equals(referenceCheckSum)) { 196 return null; 197 } 198 199 // go through all the replicas to find a bitarchive replica which 200 // contains the file with the correct checksum. 201 for (Replica r : Replica.getKnown()) { 202 String cs = getUniqueChecksum(r); 203 // The replica has to have the correct checksum and be a bitarchive. 204 if (referenceCheckSum.equals(cs) && (r.getType() == ReplicaType.BITARCHIVE)) { 205 log.debug("Reference archive for file '{}' is '{}'", filename, r.getId()); 206 return r; 207 } 208 } 209 210 log.trace("No reference archive found for file '{}'", filename); 211 return null; 212 } 213 214 /** 215 * Get a checksum that the whole replica agrees upon, or else "". 216 * 217 * @param r A replica to get checksum for this file from 218 * @return The checksum for this file in the replica, if all machines that have that file agree, otherwise "". If no 219 * checksums are found, also returns "". 220 */ 221 public String getUniqueChecksum(Replica r) { 222 ArgumentNotValid.checkNotNull(r, "Replica r"); 223 List<String> checksums = getReplicaChecksum(r); 224 String checksum = null; 225 for (String s : checksums) { 226 if (checksum != null && !checksum.equals(s)) { 227 return ""; 228 } else { 229 checksum = s; 230 } 231 } 232 if (checksum != null) { 233 return checksum; 234 } else { 235 return ""; 236 } 237 } 238 239 /** 240 * Retrieve checksum that the majority of checksum references (replicas+admin) agree upon. 241 * 242 * @return the reference checksum or "" if no majority exists 243 */ 244 public String getReferenceCheckSum() { 245 // establish map from checksum to counter of occurences 246 Map<String, Integer> checksumCounts = new HashMap<String, Integer>(); 247 checksumCounts.put(adminStatus.getChecksum(), 1); 248 for (Replica baReplica : Replica.getKnown()) { 249 String checksum = getUniqueChecksum(baReplica); 250 if (checksumCounts.containsKey(checksum)) { 251 checksumCounts.put(checksum, checksumCounts.get(checksum) + 1); 252 } else { 253 checksumCounts.put(checksum, 1); 254 } 255 } 256 257 // Now determine if a checksum obtained at least half of the votes 258 int majorityCount = (Replica.getKnown().size() + 1) / 2 + 1; 259 for (Map.Entry<String, Integer> entry : checksumCounts.entrySet()) { 260 log.trace("File '{}' checksum '{}' votes {} majority count {}", filename, entry.getKey(), entry.getValue(), 261 majorityCount); 262 if (entry.getValue() >= majorityCount) { 263 return entry.getKey(); 264 } 265 } 266 267 return ""; 268 } 269 270 /** 271 * Returns true if the checksum reported by admin data is equal to the majority checksum. If no majority checksum 272 * exists true is also returned. When this method returns false it is possible to correct the admin checksum using 273 * the majority checksum - when true is returned no better checksum exists for admin data. 274 * 275 * @return true, if the checksum reported by admin data is equal to the majority checksum 276 */ 277 public boolean isAdminCheckSumOk() { 278 String referenceCheckSum = getReferenceCheckSum(); 279 if (referenceCheckSum.isEmpty()) { 280 return true; 281 } 282 return adminStatus.getChecksum().equals(referenceCheckSum); 283 } 284 285 /** 286 * Returns a human-readable representation of this object. Do not depend on this format for anything automated, as 287 * it may change at any time. 288 * 289 * @return Description of this object. 290 */ 291 public String toString() { 292 String res = "PreservationStatus for '" + filename + "'\n"; 293 if (adminStatus != null) { 294 res = res + "General store state: " + adminStatus.getGeneralStoreState().getState() + " " 295 + adminStatus.getGeneralStoreState().getLastChanged() + "\n"; 296 } 297 return res; 298 } 299 300 /** 301 * Get the filename, this FilePreservationState is about. Needed to get at the filename given to constructor, and 302 * allow for a better datastructure. 303 * 304 * @return the filename 305 */ 306 public String getFilename() { 307 return filename; 308 } 309 310}