001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.archive.arcrepository.bitpreservation;
024
025import java.util.Collections;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033import dk.netarkivet.archive.arcrepositoryadmin.ArcRepositoryEntry;
034import dk.netarkivet.common.distribute.arcrepository.Replica;
035import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState;
036import dk.netarkivet.common.distribute.arcrepository.ReplicaType;
037import dk.netarkivet.common.exceptions.ArgumentNotValid;
038
039/**
040 * This class collects the available bit preservation information for a file. <br>
041 * This information is the following: <br>
042 * 1) admin information for the file for each replica and<br>
043 * <br>
044 * 2) the actual upload status
045 */
046public class FilePreservationState implements PreservationState {
047
048    /** The log. */
049    private static final Logger log = LoggerFactory.getLogger(FilePreservationState.class);
050
051    /** the name of the preserved file. */
052    private String filename;
053
054    /** the information as seen by the ArcRepository. */
055    private ArcRepositoryEntry adminStatus;
056
057    /**
058     * The checksums of the file in the individual replica. Normally, there will only be one entry in the list, but it
059     * must also handle the case where multiple copies exist in a replica.
060     */
061    private Map<Replica, List<String>> replica2checksum;
062
063    /**
064     * Create new instance of the preservation status for a file.
065     *
066     * @param filename The filename to get status for
067     * @param admindata The admin data for the file
068     * @param checksumMap The map with the checksums for this file in all replicas
069     * @throws ArgumentNotValid if filename is null or empty string, or if admindata is null.
070     */
071    FilePreservationState(String filename, ArcRepositoryEntry admindata, Map<Replica, List<String>> checksumMap)
072            throws ArgumentNotValid {
073        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
074        ArgumentNotValid.checkNotNull(admindata, "ArcRepositoryEntry admindata");
075        ArgumentNotValid.checkNotNull(checksumMap, "Map<Replica, List<String>> checksumMap");
076        this.filename = filename;
077        adminStatus = admindata;
078        replica2checksum = checksumMap;
079    }
080
081    /**
082     * Get the checksum of this file in a specific replica.
083     *
084     * @param replica The replica to get the checksum from.
085     * @return The file's checksum, if it is present in the replica, or "" if it either is absent or an error occurred.
086     */
087    public List<String> getReplicaChecksum(Replica replica) {
088        ArgumentNotValid.checkNotNull(replica, "Replica replica");
089        if (replica2checksum.containsKey(replica)) {
090            return replica2checksum.get(replica);
091        } else {
092            return Collections.emptyList();
093        }
094    }
095
096    /**
097     * Get the MD5 checksum stored in the admin data.
098     *
099     * @return Checksum value as found in the admin data given at creation.
100     */
101    public String getAdminChecksum() {
102        return adminStatus.getChecksum();
103    }
104
105    /**
106     * Get the status of the file in a replica, according to the admin data. This returns the status as a string for
107     * presentation purposes only.
108     *
109     * @param replica The replica to get status for
110     * @return Status that the admin data knows for this file in the replica.
111     */
112    public String getAdminReplicaState(Replica replica) {
113        ArgumentNotValid.checkNotNull(replica, "Replica replica");
114        ReplicaStoreState state = getAdminBitarchiveStoreState(replica);
115        if (state != null) {
116            return state.toString();
117        } else {
118            return "No state";
119        }
120    }
121
122    /**
123     * Get the status of the file in a replica, according to the admin data.
124     *
125     * @param replica The replica to get status for
126     * @return Status that the admin data knows for this file in the replica.
127     */
128    private ReplicaStoreState getAdminBitarchiveStoreState(Replica replica) {
129        String bamonname = replica.getIdentificationChannel().getName();
130        return adminStatus.getStoreState(bamonname);
131    }
132
133    /**
134     * Check if the admin data reflect the actual status of the archive.
135     * <p>
136     * Admin State checking: For each replica the admin state is compared to the checksum received from the replica.
137     * <p>
138     * If no checksum is received from the replica the valid admin states are UPLOAD_STARTED and UPLOAD_FAILED. If a
139     * checksum is received from the replica the valid admin state is UPLOAD_COMPLETED Admin checksum checking: The
140     * admin checksum must match the majority of reported checksums.
141     * <p>
142     * Notice that a valid Admin data record does NOT imply that everything is ok. Specifically a file may be missing
143     * from a replica, or the checksum of a file in a replica may be wrong.
144     *
145     * @return true, if admin data match the state of the replicas, false otherwise
146     */
147    public boolean isAdminDataOk() {
148        // Check the bitarchive states against the admin information
149        for (Replica r : Replica.getKnown()) {
150            ReplicaStoreState adminstate = getAdminBitarchiveStoreState(r);
151            List<String> checksum = getReplicaChecksum(r);
152
153            // If we find an error, return false, otherwise go on to the rest.
154            if (checksum.size() == 0) {
155                if (adminstate != ReplicaStoreState.UPLOAD_STARTED && adminstate != ReplicaStoreState.UPLOAD_FAILED) {
156                    return false;
157                }
158            } else {
159                if (adminstate != ReplicaStoreState.UPLOAD_COMPLETED) {
160                    return false;
161                }
162                if (getAdminChecksum().length() == 0) {
163                    return false;
164                }
165            }
166        }
167
168        // If we reach here, we either have no checksums anywhere or
169        // admin has a checksum, which should then agree with the majority
170        return isAdminCheckSumOk();
171    }
172
173    /**
174     * Check if the file is missing from a replica.
175     *
176     * @param replica the replica to check.
177     * @return true if the file is missing from the replica.
178     */
179    public boolean fileIsMissing(Replica replica) {
180        return getReplicaChecksum(replica).size() == 0;
181    }
182
183    /**
184     * Returns a reference to a bitarchive replica that contains a version of the file with the correct checksum.
185     * <p>
186     * The correct checksum is defined as the checksum that the majority of the replicas and admin data agree upon.
187     * <p>
188     * If no bitarchive replica exists with a correct version of the file null is returned.
189     *
190     * @return the name of the reference replica or null if no reference exists.
191     */
192    public Replica getReferenceBitarchive() {
193        String referenceCheckSum = getReferenceCheckSum();
194        log.trace("Reference-checksum for file '{}' is '{}'", filename, referenceCheckSum);
195        if ("".equals(referenceCheckSum)) {
196            return null;
197        }
198
199        // go through all the replicas to find a bitarchive replica which
200        // contains the file with the correct checksum.
201        for (Replica r : Replica.getKnown()) {
202            String cs = getUniqueChecksum(r);
203            // The replica has to have the correct checksum and be a bitarchive.
204            if (referenceCheckSum.equals(cs) && (r.getType() == ReplicaType.BITARCHIVE)) {
205                log.debug("Reference archive for file '{}' is '{}'", filename, r.getId());
206                return r;
207            }
208        }
209
210        log.trace("No reference archive found for file '{}'", filename);
211        return null;
212    }
213
214    /**
215     * Get a checksum that the whole replica agrees upon, or else "".
216     *
217     * @param r A replica to get checksum for this file from
218     * @return The checksum for this file in the replica, if all machines that have that file agree, otherwise "". If no
219     * checksums are found, also returns "".
220     */
221    public String getUniqueChecksum(Replica r) {
222        ArgumentNotValid.checkNotNull(r, "Replica r");
223        List<String> checksums = getReplicaChecksum(r);
224        String checksum = null;
225        for (String s : checksums) {
226            if (checksum != null && !checksum.equals(s)) {
227                return "";
228            } else {
229                checksum = s;
230            }
231        }
232        if (checksum != null) {
233            return checksum;
234        } else {
235            return "";
236        }
237    }
238
239    /**
240     * Retrieve checksum that the majority of checksum references (replicas+admin) agree upon.
241     *
242     * @return the reference checksum or "" if no majority exists
243     */
244    public String getReferenceCheckSum() {
245        // establish map from checksum to counter of occurences
246        Map<String, Integer> checksumCounts = new HashMap<String, Integer>();
247        checksumCounts.put(adminStatus.getChecksum(), 1);
248        for (Replica baReplica : Replica.getKnown()) {
249            String checksum = getUniqueChecksum(baReplica);
250            if (checksumCounts.containsKey(checksum)) {
251                checksumCounts.put(checksum, checksumCounts.get(checksum) + 1);
252            } else {
253                checksumCounts.put(checksum, 1);
254            }
255        }
256
257        // Now determine if a checksum obtained at least half of the votes
258        int majorityCount = (Replica.getKnown().size() + 1) / 2 + 1;
259        for (Map.Entry<String, Integer> entry : checksumCounts.entrySet()) {
260            log.trace("File '{}' checksum '{}' votes {} majority count {}", filename, entry.getKey(), entry.getValue(),
261                    majorityCount);
262            if (entry.getValue() >= majorityCount) {
263                return entry.getKey();
264            }
265        }
266
267        return "";
268    }
269
270    /**
271     * Returns true if the checksum reported by admin data is equal to the majority checksum. If no majority checksum
272     * exists true is also returned. When this method returns false it is possible to correct the admin checksum using
273     * the majority checksum - when true is returned no better checksum exists for admin data.
274     *
275     * @return true, if the checksum reported by admin data is equal to the majority checksum
276     */
277    public boolean isAdminCheckSumOk() {
278        String referenceCheckSum = getReferenceCheckSum();
279        if (referenceCheckSum.isEmpty()) {
280            return true;
281        }
282        return adminStatus.getChecksum().equals(referenceCheckSum);
283    }
284
285    /**
286     * Returns a human-readable representation of this object. Do not depend on this format for anything automated, as
287     * it may change at any time.
288     *
289     * @return Description of this object.
290     */
291    public String toString() {
292        String res = "PreservationStatus for '" + filename + "'\n";
293        if (adminStatus != null) {
294            res = res + "General store state: " + adminStatus.getGeneralStoreState().getState() + " "
295                    + adminStatus.getGeneralStoreState().getLastChanged() + "\n";
296        }
297        return res;
298    }
299
300    /**
301     * Get the filename, this FilePreservationState is about. Needed to get at the filename given to constructor, and
302     * allow for a better datastructure.
303     *
304     * @return the filename
305     */
306    public String getFilename() {
307        return filename;
308    }
309
310}