001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.archive.arcrepository.bitpreservation;
025
026import java.util.ArrayList;
027import java.util.HashMap;
028import java.util.List;
029import java.util.Map;
030
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import dk.netarkivet.archive.arcrepositoryadmin.ChecksumStatus;
035import dk.netarkivet.archive.arcrepositoryadmin.FileListStatus;
036import dk.netarkivet.archive.arcrepositoryadmin.ReplicaFileInfo;
037import dk.netarkivet.common.distribute.arcrepository.Replica;
038import dk.netarkivet.common.distribute.arcrepository.ReplicaType;
039import dk.netarkivet.common.exceptions.ArgumentNotValid;
040import dk.netarkivet.common.exceptions.IllegalState;
041
042/**
043 * This class contains the preservation data based on the database data of a given filename. Contains the
044 * ReplicaFileInfos corresponding to the file.
045 */
046public class DatabasePreservationState implements PreservationState {
047
048    /** The log. */
049    private static final Logger log = LoggerFactory.getLogger(DatabasePreservationState.class);
050
051    /**
052     * The map containing all the entries for in the replicafileinfo table in the database and the replica they
053     * correspond to.
054     */
055    private Map<Replica, ReplicaFileInfo> entries = new HashMap<Replica, ReplicaFileInfo>();
056    /** The name of the file. */
057    private String filename;
058
059    /**
060     * Constructor.
061     *
062     * @param fileName The name of the file.
063     * @param rfis A list of the ReplicaFileInfo entries in the database for the given file.
064     * @throws ArgumentNotValid If the filename is null or the empty string, or if the list of ReplicaFileInfos are null
065     * or empty.
066     */
067    public DatabasePreservationState(String fileName, List<ReplicaFileInfo> rfis) throws ArgumentNotValid {
068        ArgumentNotValid.checkNotNullOrEmpty(fileName, "String fileName");
069        ArgumentNotValid.checkNotNullOrEmpty(rfis, "List<ReplicaFileInfo> rfis");
070
071        this.filename = fileName;
072
073        // retrieve the replica, and put it into the map along the fileinfo.
074        for (ReplicaFileInfo rfi : rfis) {
075            Replica rep = Replica.getReplicaFromId(rfi.getReplicaId());
076            entries.put(rep, rfi);
077        }
078    }
079
080    /**
081     * Get the checksum of this file in a specific replica.
082     *
083     * @param replica The replica to get the checksum from.
084     * @return A list of the checksums for the file within the replica (only more than one if there is duplicates in a
085     * bitarchive replica). An empty list is returned if no file is present or if an error occurred.
086     * @throws ArgumentNotValid If the replica is null.
087     */
088    public List<String> getReplicaChecksum(Replica replica) throws ArgumentNotValid {
089        ArgumentNotValid.checkNotNull(replica, "Replica replica");
090
091        // return empty list if the file is missing from replica.
092        if (entries.get(replica).getFileListState().equals(FileListStatus.MISSING)) {
093            return new ArrayList<String>(0);
094        }
095
096        // initialize resulting array.
097        List<String> res = new ArrayList<String>(1);
098        // retrieve checksum for replica, and put into array.
099        res.add(getUniqueChecksum(replica));
100
101        return res;
102    }
103
104    /**
105     * Get the MD5 checksum stored in the admin data. Inherited dummy function. No admin data for database instance,
106     * thus no admin data checksum.
107     *
108     * @return Checksum value as found in the admin data given at creation.
109     */
110    public String getAdminChecksum() {
111        // Dummy function.
112        return "NO ADMIN CHECKSUM!";
113    }
114
115    /**
116     * Get the status of the file in a replica, according to the admin data. This returns the status as a string for
117     * presentation purposes only.
118     *
119     * @param replica The replica to get status for
120     * @return Status that the admin data knows for this file in the replica.
121     * @throws ArgumentNotValid If the replica is null.
122     */
123    public String getAdminReplicaState(Replica replica) throws ArgumentNotValid {
124        ArgumentNotValid.checkNotNull(replica, "Replica replica");
125
126        return entries.get(replica).getUploadState().toString();
127    }
128
129    /**
130     * INHERITED DUMMY FUNCTION!
131     *
132     * @return true, since a non-existing admin.data is OK for the database instance.
133     */
134    public boolean isAdminDataOk() {
135        // No admin data = OK
136        return true;
137    }
138
139    /**
140     * Returns a reference to a replica that contains a version of the file with the correct checksum.
141     * <p>
142     * The correct checksum is defined as the checksum that the majority of the replica and admin data agree upon.
143     * <p>
144     * If no replica exists with a correct version of the file null is returned.
145     *
146     * @return the name of the reference replica or null if no reference exists.
147     */
148    public Replica getReferenceBitarchive() {
149        for (Map.Entry<Replica, ReplicaFileInfo> entry : entries.entrySet()) {
150            // Check whether it is a bitarchive with OK checksum.
151            if (entry.getKey().getType().equals(ReplicaType.BITARCHIVE)
152                    && entry.getValue().getChecksumStatus().equals(ChecksumStatus.OK)) {
153                log.debug("Found reference bitarchive replica for file '{}'.", filename);
154                return entry.getKey();
155            }
156        }
157
158        // If no replica is found, then report and return null.
159        log.warn("Cannot find a reference bitarchive for the file '{}'. Returning null.", filename);
160        return null;
161    }
162
163    /**
164     * Get a checksum that the whole replica agrees upon, or else "".
165     *
166     * @param replica A replica to get checksum for this file from
167     * @return The checksum for this file in the replica, if all machines that have that file agree, otherwise "". If no
168     * checksums are found, also returns "".
169     * @throws ArgumentNotValid If the replica is null.
170     */
171    public String getUniqueChecksum(Replica replica) throws ArgumentNotValid {
172        ArgumentNotValid.checkNotNull(replica, "Replica replica");
173
174        // return "" if the file is missing.
175        if (entries.get(replica).getFileListState().equals(FileListStatus.MISSING)) {
176            return "";
177        }
178
179        return entries.get(replica).getChecksum();
180    }
181
182    /**
183     * Check if the file is missing from a replica.
184     *
185     * @param replica the replica to check
186     * @return true if the file is missing from the replica
187     * @throws ArgumentNotValid If the replica is null.
188     */
189    public boolean fileIsMissing(Replica replica) throws ArgumentNotValid {
190        ArgumentNotValid.checkNotNull(replica, "Replica replica");
191
192        // TODO Is it missing if the status is unknown?
193        return entries.get(replica).getFileListState() == FileListStatus.MISSING;
194    }
195
196    /**
197     * THIS IS VOTING! Retrieve checksum that the majority of checksum references replicas agree upon.
198     * <p>
199     * TODO Voting is already done by the DatabasedActiveBitPreservation. Thus replace with finding an entry with
200     * checksum-status = OK.
201     *
202     * @return the reference checksum or "" if no majority exists
203     */
204    public String getReferenceCheckSum() {
205        // Map containing the checksum and the count
206        Map<String, Integer> checksumCount = new HashMap<String, Integer>();
207
208        log.debug("Creating checksum count map for voting.");
209
210        // Insert all the checksum of all the entries into the map.
211        for (ReplicaFileInfo rfi : entries.values()) {
212            String checksum = rfi.getChecksum();
213
214            // ignore if the checksum is invalid.
215            if (checksum == null || checksum.isEmpty()) {
216                log.warn("invalid checksum for replicafileinfo: {}", rfi);
217                continue;
218            }
219
220            if (checksumCount.containsKey(checksum)) {
221                // retrieve the count and add one
222                Integer count = checksumCount.get(checksum) + 1;
223                // put the count back into the map.
224                checksumCount.put(checksum, count);
225            } else {
226                // Put the checksum into the map, with the count one.
227                checksumCount.put(checksum, Integer.valueOf(1));
228            }
229        }
230
231        log.debug("Perform the actual voting.");
232
233        // go through the map to find the largest count.
234        int largest = -1;
235        String res = "NO CHECKSUMS!";
236        boolean unique = false;
237        for (Map.Entry<String, Integer> checksumEntry : checksumCount.entrySet()) {
238            // check whether this has the highest count.
239            if (checksumEntry.getValue().intValue() > largest) {
240                unique = true;
241                largest = checksumEntry.getValue().intValue();
242                res = checksumEntry.getKey();
243            } else if (checksumEntry.getValue().intValue() == largest) {
244                // If several checksums has the same largest count, then
245                // the checksum is not unique.
246                unique = false;
247            }
248        }
249
250        // Check whether unique, and report other wise.
251        if (!unique) {
252            // TODO handle differently? send notification?
253            String errMsg = "No common checksum was found for the file '" + filename + "'." + " The checksums found: "
254                    + checksumCount;
255            log.error(errMsg);
256            throw new IllegalState(errMsg);
257        }
258
259        // log the results.
260        log.info("The replicas have voted about the checksum for the file '{}' and have elected the checksum '{}'.",
261                filename, res);
262
263        return res;
264    }
265
266    /**
267     * Returns true if the checksum reported by admin data is equal to the majority checksum. If no majority checksum
268     * exists true is also returned. When this method returns false it is possible to correct the admin checksum using
269     * the majority checksum - when true is returned no better checksum exists for admin data.
270     *
271     * @return true, if the checksum reported by admin data is equal to the majority checksum
272     */
273    public boolean isAdminCheckSumOk() {
274        // The database is always OK.
275        return true;
276    }
277
278    /**
279     * Returns a human-readable representation of this object. Do not depend on this format for anything automated, as
280     * it may change at any time.
281     *
282     * @return Description of this object.
283     */
284    public String toString() {
285        String res = "DatabasePreservationStatus for '" + filename + "'\n";
286        for (ReplicaFileInfo rfi : entries.values()) {
287            res += rfi.toString() + "\n";
288        }
289        return res;
290    }
291
292    /**
293     * Get the filename, this FilePreservationState is about. Needed to get at the filename given to constructor, and
294     * allow for a better datastructure.
295     *
296     * @return the filename
297     */
298    public String getFilename() {
299        return filename;
300    }
301
302}