001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.archive.arcrepository.bitpreservation; 025 026import java.util.ArrayList; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import dk.netarkivet.archive.arcrepositoryadmin.ChecksumStatus; 035import dk.netarkivet.archive.arcrepositoryadmin.FileListStatus; 036import dk.netarkivet.archive.arcrepositoryadmin.ReplicaFileInfo; 037import dk.netarkivet.common.distribute.arcrepository.Replica; 038import dk.netarkivet.common.distribute.arcrepository.ReplicaType; 039import dk.netarkivet.common.exceptions.ArgumentNotValid; 040import dk.netarkivet.common.exceptions.IllegalState; 041 042/** 043 * This class contains the preservation data based on the database data of a given filename. Contains the 044 * ReplicaFileInfos corresponding to the file. 045 */ 046public class DatabasePreservationState implements PreservationState { 047 048 /** The log. */ 049 private static final Logger log = LoggerFactory.getLogger(DatabasePreservationState.class); 050 051 /** 052 * The map containing all the entries for in the replicafileinfo table in the database and the replica they 053 * correspond to. 054 */ 055 private Map<Replica, ReplicaFileInfo> entries = new HashMap<Replica, ReplicaFileInfo>(); 056 /** The name of the file. */ 057 private String filename; 058 059 /** 060 * Constructor. 061 * 062 * @param fileName The name of the file. 063 * @param rfis A list of the ReplicaFileInfo entries in the database for the given file. 064 * @throws ArgumentNotValid If the filename is null or the empty string, or if the list of ReplicaFileInfos are null 065 * or empty. 066 */ 067 public DatabasePreservationState(String fileName, List<ReplicaFileInfo> rfis) throws ArgumentNotValid { 068 ArgumentNotValid.checkNotNullOrEmpty(fileName, "String fileName"); 069 ArgumentNotValid.checkNotNullOrEmpty(rfis, "List<ReplicaFileInfo> rfis"); 070 071 this.filename = fileName; 072 073 // retrieve the replica, and put it into the map along the fileinfo. 074 for (ReplicaFileInfo rfi : rfis) { 075 Replica rep = Replica.getReplicaFromId(rfi.getReplicaId()); 076 entries.put(rep, rfi); 077 } 078 } 079 080 /** 081 * Get the checksum of this file in a specific replica. 082 * 083 * @param replica The replica to get the checksum from. 084 * @return A list of the checksums for the file within the replica (only more than one if there is duplicates in a 085 * bitarchive replica). An empty list is returned if no file is present or if an error occurred. 086 * @throws ArgumentNotValid If the replica is null. 087 */ 088 public List<String> getReplicaChecksum(Replica replica) throws ArgumentNotValid { 089 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 090 091 // return empty list if the file is missing from replica. 092 if (entries.get(replica).getFileListState().equals(FileListStatus.MISSING)) { 093 return new ArrayList<String>(0); 094 } 095 096 // initialize resulting array. 097 List<String> res = new ArrayList<String>(1); 098 // retrieve checksum for replica, and put into array. 099 res.add(getUniqueChecksum(replica)); 100 101 return res; 102 } 103 104 /** 105 * Get the MD5 checksum stored in the admin data. Inherited dummy function. No admin data for database instance, 106 * thus no admin data checksum. 107 * 108 * @return Checksum value as found in the admin data given at creation. 109 */ 110 public String getAdminChecksum() { 111 // Dummy function. 112 return "NO ADMIN CHECKSUM!"; 113 } 114 115 /** 116 * Get the status of the file in a replica, according to the admin data. This returns the status as a string for 117 * presentation purposes only. 118 * 119 * @param replica The replica to get status for 120 * @return Status that the admin data knows for this file in the replica. 121 * @throws ArgumentNotValid If the replica is null. 122 */ 123 public String getAdminReplicaState(Replica replica) throws ArgumentNotValid { 124 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 125 126 return entries.get(replica).getUploadState().toString(); 127 } 128 129 /** 130 * INHERITED DUMMY FUNCTION! 131 * 132 * @return true, since a non-existing admin.data is OK for the database instance. 133 */ 134 public boolean isAdminDataOk() { 135 // No admin data = OK 136 return true; 137 } 138 139 /** 140 * Returns a reference to a replica that contains a version of the file with the correct checksum. 141 * <p> 142 * The correct checksum is defined as the checksum that the majority of the replica and admin data agree upon. 143 * <p> 144 * If no replica exists with a correct version of the file null is returned. 145 * 146 * @return the name of the reference replica or null if no reference exists. 147 */ 148 public Replica getReferenceBitarchive() { 149 for (Map.Entry<Replica, ReplicaFileInfo> entry : entries.entrySet()) { 150 // Check whether it is a bitarchive with OK checksum. 151 if (entry.getKey().getType().equals(ReplicaType.BITARCHIVE) 152 && entry.getValue().getChecksumStatus().equals(ChecksumStatus.OK)) { 153 log.debug("Found reference bitarchive replica for file '{}'.", filename); 154 return entry.getKey(); 155 } 156 } 157 158 // If no replica is found, then report and return null. 159 log.warn("Cannot find a reference bitarchive for the file '{}'. Returning null.", filename); 160 return null; 161 } 162 163 /** 164 * Get a checksum that the whole replica agrees upon, or else "". 165 * 166 * @param replica A replica to get checksum for this file from 167 * @return The checksum for this file in the replica, if all machines that have that file agree, otherwise "". If no 168 * checksums are found, also returns "". 169 * @throws ArgumentNotValid If the replica is null. 170 */ 171 public String getUniqueChecksum(Replica replica) throws ArgumentNotValid { 172 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 173 174 // return "" if the file is missing. 175 if (entries.get(replica).getFileListState().equals(FileListStatus.MISSING)) { 176 return ""; 177 } 178 179 return entries.get(replica).getChecksum(); 180 } 181 182 /** 183 * Check if the file is missing from a replica. 184 * 185 * @param replica the replica to check 186 * @return true if the file is missing from the replica 187 * @throws ArgumentNotValid If the replica is null. 188 */ 189 public boolean fileIsMissing(Replica replica) throws ArgumentNotValid { 190 ArgumentNotValid.checkNotNull(replica, "Replica replica"); 191 192 // TODO Is it missing if the status is unknown? 193 return entries.get(replica).getFileListState() == FileListStatus.MISSING; 194 } 195 196 /** 197 * THIS IS VOTING! Retrieve checksum that the majority of checksum references replicas agree upon. 198 * <p> 199 * TODO Voting is already done by the DatabasedActiveBitPreservation. Thus replace with finding an entry with 200 * checksum-status = OK. 201 * 202 * @return the reference checksum or "" if no majority exists 203 */ 204 public String getReferenceCheckSum() { 205 // Map containing the checksum and the count 206 Map<String, Integer> checksumCount = new HashMap<String, Integer>(); 207 208 log.debug("Creating checksum count map for voting."); 209 210 // Insert all the checksum of all the entries into the map. 211 for (ReplicaFileInfo rfi : entries.values()) { 212 String checksum = rfi.getChecksum(); 213 214 // ignore if the checksum is invalid. 215 if (checksum == null || checksum.isEmpty()) { 216 log.warn("invalid checksum for replicafileinfo: {}", rfi); 217 continue; 218 } 219 220 if (checksumCount.containsKey(checksum)) { 221 // retrieve the count and add one 222 Integer count = checksumCount.get(checksum) + 1; 223 // put the count back into the map. 224 checksumCount.put(checksum, count); 225 } else { 226 // Put the checksum into the map, with the count one. 227 checksumCount.put(checksum, Integer.valueOf(1)); 228 } 229 } 230 231 log.debug("Perform the actual voting."); 232 233 // go through the map to find the largest count. 234 int largest = -1; 235 String res = "NO CHECKSUMS!"; 236 boolean unique = false; 237 for (Map.Entry<String, Integer> checksumEntry : checksumCount.entrySet()) { 238 // check whether this has the highest count. 239 if (checksumEntry.getValue().intValue() > largest) { 240 unique = true; 241 largest = checksumEntry.getValue().intValue(); 242 res = checksumEntry.getKey(); 243 } else if (checksumEntry.getValue().intValue() == largest) { 244 // If several checksums has the same largest count, then 245 // the checksum is not unique. 246 unique = false; 247 } 248 } 249 250 // Check whether unique, and report other wise. 251 if (!unique) { 252 // TODO handle differently? send notification? 253 String errMsg = "No common checksum was found for the file '" + filename + "'." + " The checksums found: " 254 + checksumCount; 255 log.error(errMsg); 256 throw new IllegalState(errMsg); 257 } 258 259 // log the results. 260 log.info("The replicas have voted about the checksum for the file '{}' and have elected the checksum '{}'.", 261 filename, res); 262 263 return res; 264 } 265 266 /** 267 * Returns true if the checksum reported by admin data is equal to the majority checksum. If no majority checksum 268 * exists true is also returned. When this method returns false it is possible to correct the admin checksum using 269 * the majority checksum - when true is returned no better checksum exists for admin data. 270 * 271 * @return true, if the checksum reported by admin data is equal to the majority checksum 272 */ 273 public boolean isAdminCheckSumOk() { 274 // The database is always OK. 275 return true; 276 } 277 278 /** 279 * Returns a human-readable representation of this object. Do not depend on this format for anything automated, as 280 * it may change at any time. 281 * 282 * @return Description of this object. 283 */ 284 public String toString() { 285 String res = "DatabasePreservationStatus for '" + filename + "'\n"; 286 for (ReplicaFileInfo rfi : entries.values()) { 287 res += rfi.toString() + "\n"; 288 } 289 return res; 290 } 291 292 /** 293 * Get the filename, this FilePreservationState is about. Needed to get at the filename given to constructor, and 294 * allow for a better datastructure. 295 * 296 * @return the filename 297 */ 298 public String getFilename() { 299 return filename; 300 } 301 302}