001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.archive.arcrepositoryadmin;
024
025import java.io.File;
026import java.io.FileReader;
027import java.io.IOException;
028import java.sql.Connection;
029import java.sql.Date;
030import java.sql.PreparedStatement;
031import java.sql.ResultSet;
032import java.sql.SQLException;
033import java.sql.Timestamp;
034import java.util.ArrayList;
035import java.util.Calendar;
036import java.util.Collection;
037import java.util.HashSet;
038import java.util.Iterator;
039import java.util.List;
040import java.util.Random;
041import java.util.Set;
042
043import org.apache.commons.io.LineIterator;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import dk.netarkivet.common.distribute.Channels;
048import dk.netarkivet.common.distribute.arcrepository.Replica;
049import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState;
050import dk.netarkivet.common.distribute.arcrepository.ReplicaType;
051import dk.netarkivet.common.exceptions.ArgumentNotValid;
052import dk.netarkivet.common.exceptions.IOFailure;
053import dk.netarkivet.common.exceptions.IllegalState;
054import dk.netarkivet.common.exceptions.UnknownID;
055import dk.netarkivet.common.utils.DBUtils;
056import dk.netarkivet.common.utils.ExceptionUtils;
057import dk.netarkivet.common.utils.FileUtils;
058import dk.netarkivet.common.utils.KeyValuePair;
059import dk.netarkivet.common.utils.NotificationType;
060import dk.netarkivet.common.utils.NotificationsFactory;
061import dk.netarkivet.common.utils.StringUtils;
062import dk.netarkivet.common.utils.TimeUtils;
063import dk.netarkivet.common.utils.batch.ChecksumJob;
064
065/**
066 * Method for storing the bitpreservation cache in a database.
067 * <p>
068 * This method uses the 'admin.data' file for retrieving the upload status.
069 */
070public final class ReplicaCacheDatabase implements BitPreservationDAO {
071
072    /** The log. */
073    protected static final Logger log = LoggerFactory.getLogger(ReplicaCacheDatabase.class);
074
075    /** The current instance. */
076    private static ReplicaCacheDatabase instance;
077
078    /**
079     * The number of entries between logging in either file list or checksum list. This also controls how often the
080     * database connection is renewed in methods {@link #addChecksumInformation(File, Replica)} and
081     * {@link #addFileListInformation(File, Replica)}, where the operations can take hours, and seems to leak memory.
082     */
083    private final int LOGGING_ENTRY_INTERVAL = 1000;
084
085    /** Waiting time in seconds before attempting to initialise the database again. */
086    private final int WAIT_BEFORE_INIT_RETRY = 30;
087
088    /** Number of DB INIT retries. */
089    private final int INIT_DB_RETRIES = 3;
090
091    /**
092     * Constructor. throws IllegalState if unable to initialize the database.
093     */
094    private ReplicaCacheDatabase() {
095        // Get a connection to the archive database
096        Connection con = ArchiveDBConnection.get();
097        try {
098            int retries = 0;
099            boolean initialized = false;
100            while (retries < INIT_DB_RETRIES && !initialized) {
101                retries++;
102                try {
103                    initialiseDB(con);
104                    initialized = true;
105                    log.info("Initialization of database successful");
106                    return;
107                } catch (IOFailure e) {
108                    if (retries < INIT_DB_RETRIES) {
109                        log.info("Initialization failed. Probably because another application is calling the same "
110                                + "method now. Retrying after a minimum of {} seconds: ", WAIT_BEFORE_INIT_RETRY, e);
111                        waitSome();
112                    } else {
113                        throw new IllegalState("Unable to initialize the database.");
114                    }
115                }
116            }
117        } finally {
118            ArchiveDBConnection.release(con);
119        }
120    }
121
122    /**
123     * Wait a while.
124     */
125    private void waitSome() {
126        Random rand = new Random();
127        try {
128            Thread.sleep(WAIT_BEFORE_INIT_RETRY * TimeUtils.SECOND_IN_MILLIS + rand.nextInt(WAIT_BEFORE_INIT_RETRY));
129        } catch (InterruptedException e1) {
130            // Ignored
131        }
132    }
133
134    /**
135     * Method for retrieving the current instance of this class.
136     *
137     * @return The current instance.
138     */
139    public static synchronized ReplicaCacheDatabase getInstance() {
140        if (instance == null) {
141            instance = new ReplicaCacheDatabase();
142        }
143        return instance;
144    }
145
146    /**
147     * Method for initialising the database. This basically makes sure that all the replicas are within the database,
148     * and that no unknown replicas have been defined.
149     *
150     * @param connection An open connection to the archive database
151     */
152    protected void initialiseDB(Connection connection) {
153        // retrieve the list of replicas.
154        Collection<Replica> replicas = Replica.getKnown();
155        // Retrieve the replica IDs currently in the database.
156        List<String> repIds = ReplicaCacheHelpers.retrieveIdsFromReplicaTable(connection);
157        log.debug("IDs for replicas already in the database: {}", StringUtils.conjoin(",", repIds));
158        for (Replica rep : replicas) {
159            // try removing the id from the temporary list of IDs within the DB.
160            // If the remove is not successful, then the replica is already
161            // in the database.
162            if (!repIds.remove(rep.getId())) {
163                // if the replica id cannot be removed from the list, then it
164                // does not exist in the database and must be added.
165                log.info("Inserting replica '{}' in database.", rep.toString());
166                ReplicaCacheHelpers.insertReplicaIntoDB(rep, connection);
167            } else {
168                // Otherwise it already exists in the DB.
169                log.debug("Replica '{}' already inserted in database.", rep.toString());
170            }
171        }
172
173        // If unknown replica ids are found, then throw exception.
174        if (repIds.size() > 0) {
175            throw new IllegalState("The database contain identifiers for the following replicas, which are not "
176                    + "defined in the settings: " + repIds);
177        }
178    }
179
180    /**
181     * Method for retrieving the entry in the replicafileinfo table for a given file and replica.
182     *
183     * @param filename The name of the file for the entry.
184     * @param replica The replica of the entry.
185     * @return The replicafileinfo entry corresponding to the given filename and replica.
186     * @throws ArgumentNotValid If the filename is either null or empty, or if the replica is null.
187     */
188    public ReplicaFileInfo getReplicaFileInfo(String filename, Replica replica) throws ArgumentNotValid {
189        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
190        ArgumentNotValid.checkNotNull(replica, "Replica replica");
191
192        // retrieve replicafileinfo for the given filename
193        // FIXME Use joins!
194        String sql = "SELECT replicafileinfo_guid, replica_id, " + "replicafileinfo.file_id, "
195                + "segment_id, checksum, upload_status, filelist_status, "
196                + "checksum_status, filelist_checkdatetime, checksum_checkdatetime " + "FROM replicafileinfo, file "
197                + " WHERE file.file_id = replicafileinfo.file_id" + " AND file.filename=? AND replica_id=?";
198
199        PreparedStatement s = null;
200        Connection con = ArchiveDBConnection.get();
201        try {
202            s = DBUtils.prepareStatement(con, sql, filename, replica.getId());
203            ResultSet res = s.executeQuery();
204            if (res.next()) {
205                // return the corresponding replica file info.
206                return new ReplicaFileInfo(res);
207            } else {
208                return null;
209            }
210        } catch (SQLException e) {
211            final String message = "SQL error while selecting ResultsSet by executing statement '" + sql + "'.";
212            log.warn(message, e);
213            throw new IOFailure(message, e);
214        } finally {
215            DBUtils.closeStatementIfOpen(s);
216            ArchiveDBConnection.release(con);
217        }
218
219    }
220
221    /**
222     * Method for retrieving the checksum for a specific file. Since a file is not directly attached with a checksum,
223     * the checksum of a file must be found by having the replicafileinfo entries for the file vote about it.
224     *
225     * @param filename The name of the file, whose checksum are to be found.
226     * @return The checksum of the file, or a Null if no validated checksum can be found.
227     * @throws ArgumentNotValid If the filename is either null or the empty string.
228     */
229    public String getChecksum(String filename) throws ArgumentNotValid {
230        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
231
232        Connection con = ArchiveDBConnection.get();
233        try {
234            // retrieve the fileId
235            long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con);
236
237            // Check if a checksum with status OK for the file can be found in
238            // the database
239            for (Replica rep : Replica.getKnown()) {
240                // Return the checksum, if it has a valid status.
241                if (ReplicaCacheHelpers.retrieveChecksumStatusForReplicaFileInfoEntry(fileId, rep.getId(), con) == ChecksumStatus.OK) {
242                    return ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(), con);
243                }
244            }
245
246            // log that we vote about the file.
247            log.debug("No commonly accepted checksum for the file '{}' has previously been found. "
248                    + "Voting to achieve one.", filename);
249
250            // retrieves all the UNKNOWN_STATE checksums, and return if unanimous.
251            Set<String> checksums = new HashSet<String>();
252
253            for (Replica rep : Replica.getKnown()) {
254                if (ReplicaCacheHelpers.retrieveChecksumStatusForReplicaFileInfoEntry(fileId, rep.getId(), con) != ChecksumStatus.CORRUPT) {
255                    String tmpChecksum = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId,
256                            rep.getId(), con);
257                    if (tmpChecksum != null) {
258                        checksums.add(tmpChecksum);
259                    } else {
260                        log.info("Replica '{}' has a null checksum for the file '{}'.", rep.getId(),
261                                ReplicaCacheHelpers.retrieveFilenameForFileId(fileId, con));
262                    }
263                }
264            }
265
266            // check if unanimous (thus exactly one!)
267            if (checksums.size() == 1) {
268                // return the first and only value.
269                return checksums.iterator().next();
270            }
271
272            // If no checksums are found, then return null.
273            if (checksums.size() == 0) {
274                log.warn("No checksums found for file '{}'.", filename);
275                return null;
276            }
277
278            log.info("No unanimous checksum found for file '{}'.", filename);
279            // put all into a list for voting
280            List<String> checksumList = new ArrayList<String>();
281            for (Replica rep : Replica.getKnown()) {
282                String cs = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(), con);
283
284                if (cs != null) {
285                    checksumList.add(cs);
286                } else {
287                    // log when it is second time we find this checksum to be null?
288                    log.debug("Replica '{}' has a null checksum for the file '{}'.", rep.getId(),
289                            ReplicaCacheHelpers.retrieveFilenameForFileId(fileId, con));
290                }
291            }
292
293            // vote and return the most frequent checksum.
294            return ReplicaCacheHelpers.vote(checksumList);
295
296        } finally {
297            ArchiveDBConnection.release(con);
298        }
299    }
300
301    /**
302     * Retrieves the names of all the files in the file table of the database.
303     *
304     * @return The list of filenames known by the database.
305     */
306    public Collection<String> retrieveAllFilenames() {
307        Connection con = ArchiveDBConnection.get();
308        // make sql query.
309        final String sql = "SELECT filename FROM file";
310        try {
311            // Perform the select.
312            return DBUtils.selectStringList(con, sql, new Object[] {});
313        } finally {
314            ArchiveDBConnection.release(con);
315        }
316    }
317
318    /**
319     * Retrieves the ReplicaStoreState for the entry in the replicafileinfo table, which refers to the given file and
320     * replica.
321     *
322     * @param filename The name of the file in the filetable.
323     * @param replicaId The id of the replica.
324     * @return The ReplicaStoreState for the specified entry.
325     * @throws ArgumentNotValid If the replicaId or the filename are eihter null or the empty string.
326     */
327    public ReplicaStoreState getReplicaStoreState(String filename, String replicaId) throws ArgumentNotValid {
328        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
329        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
330
331        Connection con = ArchiveDBConnection.get();
332
333        // Make query for extracting the upload status.
334        // FIXME Use joins.
335        String sql = "SELECT upload_status FROM replicafileinfo, file WHERE "
336                + "replicafileinfo.file_id = file.file_id AND file.filename = ? " + "AND replica_id = ?";
337        try {
338            // execute the query.
339            int ordinal = DBUtils.selectIntValue(con, sql, filename, replicaId);
340
341            // return the corresponding ReplicaStoreState.
342            return ReplicaStoreState.fromOrdinal(ordinal);
343        } finally {
344            ArchiveDBConnection.release(con);
345        }
346    }
347
348    /**
349     * Sets the ReplicaStoreState for the entry in the replicafileinfo table.
350     *
351     * @param filename The name of the file in the filetable.
352     * @param replicaId The id of the replica.
353     * @param state The ReplicaStoreState for the specified entry.
354     * @throws ArgumentNotValid If the replicaId or the filename are eihter null or the empty string. Or if the
355     * ReplicaStoreState is null.
356     */
357    public void setReplicaStoreState(String filename, String replicaId, ReplicaStoreState state)
358            throws ArgumentNotValid {
359        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
360        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
361        ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state");
362
363        Connection con = ArchiveDBConnection.get();
364        PreparedStatement statement = null;
365        try {
366            // retrieve the guid for the file.
367            long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con);
368
369            // Make query for updating the upload status
370            if (state == ReplicaStoreState.UPLOAD_COMPLETED) {
371                // An UPLOAD_COMPLETE
372                // UPLOAD_COMPLETE => filelist_status = OK, checksum_status = OK
373                String sql = "UPDATE replicafileinfo SET upload_status = ?, "
374                        + "filelist_status = ?, checksum_status = ? " + "WHERE replica_id = ? AND file_id = ?";
375                statement = DBUtils.prepareStatement(con, sql, state.ordinal(), FileListStatus.OK.ordinal(),
376                        ChecksumStatus.OK.ordinal(), replicaId, fileId);
377            } else {
378                String sql = "UPDATE replicafileinfo SET upload_status = ? WHERE replica_id = ? AND file_id = ?";
379                statement = DBUtils.prepareStatement(con, sql, state.ordinal(), replicaId, fileId);
380            }
381
382            // execute the update and commit to database.
383            statement.executeUpdate();
384            con.commit();
385        } catch (SQLException e) {
386            String errMsg = "Received the following SQL error while updating  the database: "
387                    + ExceptionUtils.getSQLExceptionCause(e);
388            log.warn(errMsg, e);
389            throw new IOFailure(errMsg, e);
390        } finally {
391            DBUtils.closeStatementIfOpen(statement);
392            ArchiveDBConnection.release(con);
393        }
394    }
395
396    /**
397     * Creates a new entry for the filename for each replica, and give it the given checksum and set the upload_status =
398     * UNKNOWN_UPLOAD_STATUS.
399     *
400     * @param filename The name of the file.
401     * @param checksum The checksum of the file.
402     * @throws ArgumentNotValid If the filename or the checksum is either null or the empty string.
403     * @throws IllegalState If the file exists with another checksum on one of the replicas. Or if the file has already
404     * been completely uploaded to one of the replicas.
405     */
406    public void insertNewFileForUpload(String filename, String checksum) throws ArgumentNotValid, IllegalState {
407        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
408        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checkums");
409
410        Connection con = ArchiveDBConnection.get();
411        // retrieve the fileId for the filename.
412        long fileId;
413
414        try {
415            // insert into DB, or make sure that it can be inserted.
416            if (existsFileInDB(filename)) {
417                // retrieve the fileId of the existing file.
418                fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con);
419
420                // Check the entries for this file associated with the replicas.
421                for (Replica rep : Replica.getKnown()) {
422                    // Ensure that the file has not been completely uploaded to a
423                    // replica.
424                    ReplicaStoreState us = ReplicaCacheHelpers.retrieveUploadStatus(fileId, rep.getId(), con);
425
426                    if (us.equals(ReplicaStoreState.UPLOAD_COMPLETED)) {
427                        throw new IllegalState("The file has already been completely uploaded to the replica: " + rep);
428                    }
429
430                    // make sure that it has not been attempted uploaded with
431                    // another checksum
432                    String entryCs = ReplicaCacheHelpers.retrieveChecksumForReplicaFileInfoEntry(fileId, rep.getId(),
433                            con);
434
435                    // throw an exception if the registered checksum differs.
436                    if (entryCs != null && !checksum.equals(entryCs)) {
437                        throw new IllegalState("The file '" + filename + "' with checksum '" + entryCs
438                                + "' has attempted being uploaded with the checksum '" + checksum + "'");
439                    }
440                }
441            } else {
442                fileId = ReplicaCacheHelpers.insertFileIntoDB(filename, con);
443            }
444
445            for (Replica rep : Replica.getKnown()) {
446                // retrieve the guid for the corresponding replicafileinfo entry
447                long guid = ReplicaCacheHelpers.retrieveReplicaFileInfoGuid(fileId, rep.getId(), con);
448
449                // Update with the correct information.
450                ReplicaCacheHelpers.updateReplicaFileInfo(guid, checksum, ReplicaStoreState.UNKNOWN_UPLOAD_STATE, con);
451            }
452        } finally {
453            ArchiveDBConnection.release(con);
454        }
455    }
456
457    /**
458     * Method for inserting an entry into the database about a file upload has begun for a specific replica. It is not
459     * tested whether the entry has another checksum or another UploadStatus.
460     *
461     * @param filename The name of the file.
462     * @param replica The replica for the replicafileinfo.
463     * @param state The new ReplicaStoreState for the entry.
464     * @throws ArgumentNotValid If the filename is either null or the empty string. Or if the replica or the status is
465     * null.
466     */
467    public void changeStateOfReplicafileinfo(String filename, Replica replica, ReplicaStoreState state)
468            throws ArgumentNotValid {
469        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
470        ArgumentNotValid.checkNotNull(replica, "Replica rep");
471        ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state");
472
473        PreparedStatement statement = null;
474        Connection connection = null;
475        try {
476            connection = ArchiveDBConnection.get();
477            // retrieve the replicafileinfo_guid for this filename .
478            long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection);
479            statement = connection.prepareStatement("UPDATE replicafileinfo SET upload_status = ? "
480                    + "WHERE replicafileinfo_guid = ?");
481            statement.setLong(1, state.ordinal());
482            statement.setLong(2, guid);
483
484            // Perform the update.
485            statement.executeUpdate();
486            connection.commit();
487        } catch (SQLException e) {
488            throw new IllegalState("Cannot update status and checksum of a replicafileinfo in the database.", e);
489        } finally {
490            DBUtils.closeStatementIfOpen(statement);
491            if (connection != null) {
492                ArchiveDBConnection.release(connection);
493            }
494        }
495    }
496
497    /**
498     * Method for inserting an entry into the database about a file upload has begun for a specific replica. It is not
499     * tested whether the entry has another checksum or another UploadStatus.
500     *
501     * @param filename The name of the file.
502     * @param checksum The new checksum for the entry.
503     * @param replica The replica for the replicafileinfo.
504     * @param state The new ReplicaStoreState for the entry.
505     * @throws ArgumentNotValid If the filename or the checksum is either null or the empty string. Or if the replica or
506     * the status is null.
507     * @throws IllegalState If an sql exception is thrown.
508     */
509    public void changeStateOfReplicafileinfo(String filename, String checksum, Replica replica, ReplicaStoreState state)
510            throws ArgumentNotValid, IllegalState {
511        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
512        ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
513        ArgumentNotValid.checkNotNull(replica, "Replica rep");
514        ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state");
515
516        PreparedStatement statement = null;
517        Connection connection = null;
518        try {
519            connection = ArchiveDBConnection.get();
520            // retrieve the replicafileinfo_guid for this filename .
521            long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection);
522
523            statement = connection.prepareStatement("UPDATE replicafileinfo SET upload_status = ?, checksum = ? "
524                    + "WHERE replicafileinfo_guid = ?");
525            statement.setLong(1, state.ordinal());
526            statement.setString(2, checksum);
527            statement.setLong(3, guid);
528
529            // Perform the update.
530            statement.executeUpdate();
531            connection.commit();
532        } catch (SQLException e) {
533            throw new IllegalState("Cannot update status and checksum of a replicafileinfo in the database.", e);
534        } finally {
535            DBUtils.closeStatementIfOpen(statement);
536            ArchiveDBConnection.release(connection);
537        }
538    }
539
540    /**
541     * Retrieves the names of all the files in the given replica which has the specified UploadStatus.
542     *
543     * @param replicaId The id of the replica which contain the files.
544     * @param state The ReplicaStoreState for the wanted files.
545     * @return The list of filenames for the entries in the replica which has the specified UploadStatus.
546     * @throws ArgumentNotValid If the UploadStatus is null or if the replicaId is either null or the empty string.
547     */
548    public Collection<String> retrieveFilenamesForReplicaEntries(String replicaId, ReplicaStoreState state)
549            throws ArgumentNotValid {
550        ArgumentNotValid.checkNotNull(state, "ReplicaStoreState state");
551        ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
552        Connection con = ArchiveDBConnection.get();
553        final String sql = "SELECT filename FROM replicafileinfo "
554                + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id "
555                + "WHERE replica_id = ? AND upload_status = ?";
556        try {
557            return DBUtils.selectStringList(con, sql, replicaId, state.ordinal());
558        } finally {
559            ArchiveDBConnection.release(con);
560        }
561    }
562
563    /**
564     * Checks whether a file is already in the file table in the database.
565     *
566     * @param filename The name of the file in the database.
567     * @return Whether the file was found in the database.
568     * @throws IllegalState If more than one entry with the given filename was found.
569     */
570    public boolean existsFileInDB(String filename) throws IllegalState {
571        // retrieve the amount of times this replica is within the database.
572        Connection con = ArchiveDBConnection.get();
573        final String sql = "SELECT COUNT(*) FROM file WHERE filename = ?";
574        try {
575            int count = DBUtils.selectIntValue(con, sql, filename);
576
577            // Handle the different cases for count.
578            switch (count) {
579            case 0:
580                return false;
581            case 1:
582                return true;
583            default:
584                throw new IllegalState("Cannot handle " + count + " files " + "with the name '" + filename + "'.");
585            }
586        } finally {
587            ArchiveDBConnection.release(con);
588        }
589    }
590
591    /**
592     * Method for retrieving the filelist_status for a replicafileinfo entry.
593     *
594     * @param filename The name of the file.
595     * @param replica The replica where the file should be.
596     * @return The filelist_status for the file in the replica.
597     * @throws ArgumentNotValid If the replica is null or the filename is either null or the empty string.
598     */
599    public FileListStatus retrieveFileListStatus(String filename, Replica replica) throws ArgumentNotValid {
600        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
601        ArgumentNotValid.checkNotNull(replica, "Replica replica");
602
603        Connection con = ArchiveDBConnection.get();
604        try {
605            // retrieve the filelist_status for the entry.
606            int status = ReplicaCacheHelpers.retrieveFileListStatusFromReplicaFileInfo(filename, replica.getId(), con);
607            // Return the corresponding FileListStatus
608            return FileListStatus.fromOrdinal(status);
609        } finally {
610            ArchiveDBConnection.release(con);
611        }
612
613    }
614
615    /**
616     * SQL used to update the checksum status of straightforward cases. See complete description for method below.
617     */
618    public static final String updateChecksumStatusSql = "" + "UPDATE replicafileinfo SET checksum_status = "
619            + ChecksumStatus.OK.ordinal() + " " + "WHERE checksum_status != " + ChecksumStatus.OK.ordinal()
620            + " AND file_id IN ( " + "  SELECT file_id " + "  FROM ( "
621            + "    SELECT file_id, COUNT(file_id) AS checksums, SUM(replicas) replicas " + "    FROM ( "
622            + "      SELECT file_id, COUNT(checksum) AS replicas, checksum " + "      FROM replicafileinfo "
623            + "      WHERE filelist_status != " + FileListStatus.MISSING.ordinal() + " AND checksum IS NOT NULL "
624            + "      GROUP BY file_id, checksum " + "    ) AS ss1 " + "    GROUP BY file_id " + "  ) AS ss2 "
625            + "  WHERE checksums = 1 " + ")";
626
627    /**
628     * SQL used to select those files whose check status has to be voted on. See complete description for method below.
629     */
630    public static final String selectForFileChecksumVotingSql = "" + "SELECT file_id " + "FROM ( "
631            + "  SELECT file_id, COUNT(file_id) AS checksums, SUM(replicas) replicas " + "  FROM ( "
632            + "    SELECT file_id, COUNT(checksum) AS replicas, checksum " + "    FROM replicafileinfo "
633            + "    WHERE filelist_status != " + FileListStatus.MISSING.ordinal() + " AND checksum IS NOT NULL "
634            + "    GROUP BY file_id, checksum " + "  ) AS ss1 " + "  GROUP BY file_id " + ") AS ss2 "
635            + "WHERE checksums > 1 ";
636
637    /**
638     * This method is used to update the status for the checksums for all replicafileinfo entries. <br/>
639     * <br/>
640     * For each file in the database, the checksum vote is made in the following way. <br/>
641     * Each entry in the replicafileinfo table containing the file is retrieved. All the unique checksums are retrieved,
642     * e.g. if a checksum is found more than one, then it is ignored. <br/>
643     * If only one unique checksum is found, then if must be the correct one, and all the replicas with this file will
644     * have their checksum_status set to 'OK'. <br/>
645     * If more than one checksum is found, then a vote for the correct checksum is performed. This is done by counting
646     * the amount of time each of the unique checksum is found among the replicafileinfo entries for the current file.
647     * The checksum with most votes is chosen as the correct one, and the checksum_status for all the replicafileinfo
648     * entries with this checksum is set to 'OK', whereas the replicafileinfo entries with a different checksum is set
649     * to 'CORRUPT'. <br/>
650     * If no winner is found then a warning and a notification is issued, and the checksum_status for all the
651     * replicafileinfo entries with for the current file is set to 'UNKNOWN'. <br/>
652     */
653    public void updateChecksumStatus() {
654        log.info("UpdateChecksumStatus operation commencing");
655        Connection con = ArchiveDBConnection.get();
656        boolean autoCommit = true;
657        try {
658            autoCommit = con.getAutoCommit();
659            // Set checksum_status to 'OK' where there is the same
660            // checksum across all replicas.
661            DBUtils.executeSQL(con, updateChecksumStatusSql);
662
663            // Get all the fileids that need processing.
664            // Previously: "SELECT file_id FROM file"
665            Iterator<Long> fileIdsIterator = DBUtils.selectLongIterator(con, selectForFileChecksumVotingSql);
666            // For each fileid
667            while (fileIdsIterator.hasNext()) {
668                long fileId = fileIdsIterator.next();
669                ReplicaCacheHelpers.fileChecksumVote(fileId, con);
670            }
671        } catch (SQLException e) {
672            throw new IOFailure("Error getting auto commit.\n" + ExceptionUtils.getSQLExceptionCause(e), e);
673        } finally {
674            try {
675                con.setAutoCommit(autoCommit);
676            } catch (SQLException e) {
677                log.error("Could not change auto commit back to default!");
678            }
679            ArchiveDBConnection.release(con);
680        }
681        log.info("UpdateChecksumStatus operation completed!");
682    }
683
684    /**
685     * Method for updating the status for a specific file for all the replicas. If the checksums for the replicas differ
686     * for some replica, then based on a checksum vote, a specific checksum is chosen as the 'correct' one, and the
687     * entries with another checksum than the 'correct one' will be marked as corrupt.
688     *
689     * @param filename The name of the file to update the status for.
690     * @throws ArgumentNotValid If the filename is either null or the empty string.
691     */
692    @Override
693    public void updateChecksumStatus(String filename) throws ArgumentNotValid {
694        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
695
696        Connection con = ArchiveDBConnection.get();
697        try {
698            // retrieve the id and vote!
699            Long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con);
700            ReplicaCacheHelpers.fileChecksumVote(fileId, con);
701        } finally {
702            ArchiveDBConnection.release(con);
703        }
704    }
705
706    /**
707     * Given the output of a checksum job, add the results to the database.
708     * <p>
709     * The following fields in the table are updated for each corresponding entry in the replicafileinfo table: <br/>
710     * - checksum = the given checksum. <br/>
711     * - filelist_status = ok. <br/>
712     * - filelist_checkdatetime = now. <br/>
713     * - checksum_checkdatetime = now.
714     *
715     * @param checksumOutputFile The output of a checksum job in a file
716     * @param replica The replica this checksum job is for.
717     */
718    @Override
719    public void addChecksumInformation(File checksumOutputFile, Replica replica) {
720        // validate arguments
721        ArgumentNotValid.checkNotNull(checksumOutputFile, "File checksumOutputFile");
722        ArgumentNotValid.checkNotNull(replica, "Replica replica");
723
724        // Sort the checksumOutputFile file.
725        File sortedResult = new File(checksumOutputFile.getParent(), checksumOutputFile.getName() + ".sorted");
726        FileUtils.sortFile(checksumOutputFile, sortedResult);
727        final long datasize = FileUtils.countLines(sortedResult);
728
729        Set<Long> missingReplicaRFIs = null;
730        Connection con = ArchiveDBConnection.get();
731        LineIterator lineIterator = null;
732        try {
733            // Make sure, that the replica exists in the database.
734            if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) {
735                String msg = "Cannot add checksum information, since the replica '" + replica.toString()
736                        + "' does not exist within the database.";
737                log.warn(msg);
738                throw new IOFailure(msg);
739            }
740
741            log.info("Starting processing of {} checksum entries for replica {}", datasize, replica.getId());
742
743            // retrieve the list of files already known by this cache.
744            // TODO This does not scale! Should the datastructure
745            // (missingReplicaRFIs) be disk-bound in some way, or optimized
746            // in some way, e.g. using it.unimi.dsi.fastutil.longs.LongArrayList
747            missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con);
748
749            // Initialize the String iterator
750            lineIterator = new LineIterator(new FileReader(sortedResult));
751
752            String lastFilename = "";
753            String lastChecksum = "";
754
755            int i = 0;
756            while (lineIterator.hasNext()) {
757                String line = lineIterator.next();
758                // log that it is in progress every so often.
759                if ((i % LOGGING_ENTRY_INTERVAL) == 0) {
760                    log.info("Processed checksum list entry number {} for replica {}", i, replica);
761                    // Close connection, and open another one
762                    // to avoid memory-leak (NAS-2003)
763                    ArchiveDBConnection.release(con);
764                    con = ArchiveDBConnection.get();
765                    log.debug("Databaseconnection has now been renewed");
766                }
767                ++i;
768
769                // parse the input.
770                final KeyValuePair<String, String> entry = ChecksumJob.parseLine(line);
771                final String filename = entry.getKey();
772                final String checksum = entry.getValue();
773
774                // check for duplicates
775                if (filename.equals(lastFilename)) {
776                    // if different checksums, then
777                    if (!checksum.equals(lastChecksum)) {
778                        // log and send notification
779                        String errMsg = "Unidentical duplicates of file '" + filename + "' with the checksums '"
780                                + lastChecksum + "' and '" + checksum + "'. First instance used.";
781                        log.warn(errMsg);
782                        NotificationsFactory.getInstance().notify(errMsg, NotificationType.WARNING);
783                    } else {
784                        // log about duplicate identical
785                        log.debug("Duplicates of the file '{}' found with the same checksum '{}'.", filename, checksum);
786                    }
787
788                    // avoid overhead of inserting duplicates twice.
789                    continue;
790                }
791
792                // set these value to be the old values in next iteration.
793                lastFilename = filename;
794                lastChecksum = checksum;
795
796                // Process the current (filename + checksum) combo for this replica
797                // Remove the returned replicafileinfo guid from the missing entries.
798                missingReplicaRFIs.remove(ReplicaCacheHelpers.processChecksumline(filename, checksum, replica, con));
799            }
800        } catch (IOException e) {
801            throw new IOFailure("Unable to read checksum entries from file", e);
802        } finally {
803            ArchiveDBConnection.release(con);
804            LineIterator.closeQuietly(lineIterator);
805        }
806
807        con = ArchiveDBConnection.get();
808        try {
809            // go through the not found replicafileinfo for this replica to change
810            // their filelist_status to missing.
811            if (missingReplicaRFIs.size() > 0) {
812                log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica);
813                for (long rfi : missingReplicaRFIs) {
814                    // set the replicafileinfo in the database to missing.
815                    ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con);
816                }
817            }
818
819            // update the checksum updated date for this replica.
820            ReplicaCacheHelpers.updateChecksumDateForReplica(replica, con);
821            ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con);
822
823            log.info("Finished processing of {} checksum entries for replica {}", datasize, replica.getId());
824        } finally {
825            ArchiveDBConnection.release(con);
826        }
827    }
828
829    /**
830     * Method for adding the results from a list of filenames on a replica. This list of filenames should return the
831     * list of all the files within the database.
832     * <p>
833     * For each file in the FileListJob the following fields are set for the corresponding entry in the replicafileinfo
834     * table: <br/>
835     * - filelist_status = ok. <br/>
836     * - filelist_checkdatetime = now.
837     * <p>
838     * For each entry in the replicafileinfo table for the replica which are missing in the results from the FileListJob
839     * the following fields are assigned the following values: <br/>
840     * - filelist_status = missing. <br/>
841     * - filelist_checkdatetime = now.
842     *
843     * @param filelistFile The list of filenames either parsed from a FilelistJob or the result from a
844     * GetAllFilenamesMessage.
845     * @param replica The replica, which the FilelistBatchjob has run upon.
846     * @throws ArgumentNotValid If the filelist or the replica is null.
847     * @throws UnknownID If the replica does not already exist in the database.
848     */
849    @Override
850    public void addFileListInformation(File filelistFile, Replica replica) throws ArgumentNotValid, UnknownID {
851        ArgumentNotValid.checkNotNull(filelistFile, "File filelistFile");
852        ArgumentNotValid.checkNotNull(replica, "Replica replica");
853
854        // Sort the filelist file.
855        File sortedResult = new File(filelistFile.getParent(), filelistFile.getName() + ".sorted");
856        FileUtils.sortFile(filelistFile, sortedResult);
857        final long datasize = FileUtils.countLines(sortedResult);
858
859        Connection con = ArchiveDBConnection.get();
860        Set<Long> missingReplicaRFIs = null;
861        LineIterator lineIterator = null;
862        try {
863            // Make sure, that the replica exists in the database.
864            if (!ReplicaCacheHelpers.existsReplicaInDB(replica, con)) {
865                String errorMsg = "Cannot add filelist information, since the replica '" + replica.toString()
866                        + "' does not exist in the database.";
867                log.warn(errorMsg);
868                throw new UnknownID(errorMsg);
869            }
870
871            log.info("Starting processing of {} filelist entries for replica {}", datasize, replica.getId());
872
873            // retrieve the list of files already known by this cache.
874            // TODO This does not scale! Should this datastructure
875            // (missingReplicaRFIs) be disk-bound in some way.
876            missingReplicaRFIs = ReplicaCacheHelpers.retrieveReplicaFileInfoGuidsForReplica(replica.getId(), con);
877
878            // Initialize String iterator
879            lineIterator = new LineIterator(new FileReader(sortedResult));
880
881            String lastFileName = "";
882            int i = 0;
883            while (lineIterator.hasNext()) {
884                String file = lineIterator.next();
885                // log that it is in progress every so often.
886                if ((i % LOGGING_ENTRY_INTERVAL) == 0) {
887                    log.info("Processed file list entry number {} for replica {}", i, replica);
888                    // Close connection, and open another one
889                    // to avoid memory-leak (NAS-2003)
890                    ArchiveDBConnection.release(con);
891                    con = ArchiveDBConnection.get();
892                    log.debug("Databaseconnection has now been renewed");
893                }
894                ++i;
895
896                // handle duplicates.
897                if (file.equals(lastFileName)) {
898                    log.warn("There have been found multiple files with the name '{}'", file);
899                    continue;
900                }
901
902                lastFileName = file;
903                // Add information for one file, and remove the ReplicaRFI from the
904                // set of missing ones.
905                missingReplicaRFIs.remove(ReplicaCacheHelpers.addFileInformation(file, replica, con));
906            }
907        } catch (IOException e) {
908            throw new IOFailure("Unable to read the filenames from file", e);
909        } finally {
910            ArchiveDBConnection.release(con);
911            LineIterator.closeQuietly(lineIterator);
912        }
913
914        con = ArchiveDBConnection.get();
915        try {
916            // go through the not found replicafileinfo for this replica to change
917            // their filelist_status to missing.
918            if (missingReplicaRFIs.size() > 0) {
919                log.warn("Found {} missing files for replica '{}'.", missingReplicaRFIs.size(), replica);
920                for (long rfi : missingReplicaRFIs) {
921                    // set the replicafileinfo in the database to missing.
922                    ReplicaCacheHelpers.updateReplicaFileInfoMissingFromFilelist(rfi, con);
923                }
924            }
925            // Update the date for filelist update for this replica.
926            ReplicaCacheHelpers.updateFilelistDateForReplica(replica, con);
927        } finally {
928            ArchiveDBConnection.release(con);
929        }
930    }
931
932    /**
933     * Get the date for the last file list job.
934     *
935     * @param replica The replica to get the date for.
936     * @return The date of the last missing files update for the replica. A null is returned if no last missing files
937     * update has been performed.
938     * @throws ArgumentNotValid If the replica is null.
939     * @throws IllegalArgumentException If the Date of the Timestamp cannot be instantiated.
940     */
941    @Override
942    public Date getDateOfLastMissingFilesUpdate(Replica replica) throws ArgumentNotValid, IllegalArgumentException {
943        ArgumentNotValid.checkNotNull(replica, "Replica replica");
944        Connection con = ArchiveDBConnection.get();
945        String result = null;
946        try {
947            // sql for retrieving this replicafileinfo_guid.
948            String sql = "SELECT filelist_updated FROM replica WHERE replica_id = ?";
949            result = DBUtils.selectStringValue(con, sql, replica.getId());
950        } finally {
951            ArchiveDBConnection.release(con);
952        }
953        // return null if the field has no be set for this replica.
954        if (result == null) {
955            log.debug("The 'filelist_updated' field has not been set, as no missing files update has been performed yet.");
956            return null;
957        } else {
958            // Parse the timestamp into a date.
959            return new Date(Timestamp.valueOf(result).getTime());
960        }
961    }
962
963    /**
964     * Method for retrieving the date for the last update for corrupted files.
965     * <p>
966     * This method does not contact the replicas, it only retrieves the data from the last time the checksum was
967     * retrieved.
968     *
969     * @param replica The replica to find the date for the latest update for corruption of files.
970     * @return The date for the last checksum update. A null is returned if no wrong files update has been performed for
971     * this replica.
972     * @throws ArgumentNotValid If the replica is null.
973     * @throws IllegalArgumentException If the Date of the Timestamp cannot be instantiated.
974     */
975    @Override
976    public Date getDateOfLastWrongFilesUpdate(Replica replica) throws ArgumentNotValid, IllegalArgumentException {
977        ArgumentNotValid.checkNotNull(replica, "Replica replica");
978        Connection con = ArchiveDBConnection.get();
979        String result = null;
980        try {
981            // The SQL statement for retrieving the date for last update of
982            // checksum for the replica.
983            final String sql = "SELECT checksum_updated FROM replica WHERE replica_id = ?";
984            result = DBUtils.selectStringValue(con, sql, replica.getId());
985        } finally {
986            ArchiveDBConnection.release(con);
987        }
988        // return null if the field has no be set for this replica.
989        if (result == null) {
990            log.debug("The 'checksum_updated' field has not been set, as no wrong files update has been performed yet.");
991            return null;
992        } else {
993            // Parse the timestamp into a date.
994            return new Date(Timestamp.valueOf(result).getTime());
995        }
996    }
997
998    /**
999     * Method for retrieving the number of files missing from a specific replica.
1000     * <p>
1001     * This method does not contact the replica directly, it only retrieves the count of missing files from the last
1002     * filelist update.
1003     *
1004     * @param replica The replica to find the number of missing files for.
1005     * @return The number of missing files for the replica.
1006     * @throws ArgumentNotValid If the replica is null.
1007     */
1008    @Override
1009    public long getNumberOfMissingFilesInLastUpdate(Replica replica) throws ArgumentNotValid {
1010        ArgumentNotValid.checkNotNull(replica, "Replica replica");
1011        Connection con = ArchiveDBConnection.get();
1012        // The SQL statement to retrieve the number of entries in the
1013        // replicafileinfo table with file_status set to either missing or
1014        // no_status for the replica.
1015        // FIXME Consider using a UNION instead of OR.
1016        final String sql = "SELECT COUNT(*) FROM replicafileinfo "
1017                + "WHERE replica_id = ? AND ( filelist_status = ? OR filelist_status = ?)";
1018        try {
1019            return DBUtils.selectLongValue(con, sql, replica.getId(), FileListStatus.MISSING.ordinal(),
1020                    FileListStatus.NO_FILELIST_STATUS.ordinal());
1021        } finally {
1022            ArchiveDBConnection.release(con);
1023        }
1024    }
1025
1026    /**
1027     * Method for retrieving the list of the names of the files which was missing for the replica in the last filelist
1028     * update.
1029     * <p>
1030     * This method does not contact the replica, it only uses the database to find the files, which was missing during
1031     * the last filelist update.
1032     *
1033     * @param replica The replica to find the list of missing files for.
1034     * @return A list containing the names of the files which are missing in the given replica.
1035     * @throws ArgumentNotValid If the replica is null.
1036     */
1037    @Override
1038    public Iterable<String> getMissingFilesInLastUpdate(Replica replica) throws ArgumentNotValid {
1039        ArgumentNotValid.checkNotNull(replica, "Replica replica");
1040        Connection con = ArchiveDBConnection.get();
1041        // The SQL statement to retrieve the filenames of the missing
1042        // replicafileinfo to the given replica.
1043        final String sql = "SELECT filename FROM replicafileinfo "
1044                + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id "
1045                + "WHERE replica_id = ? AND ( filelist_status = ? OR filelist_status = ? )";
1046        try {
1047            return DBUtils.selectStringList(con, sql, replica.getId(), FileListStatus.MISSING.ordinal(),
1048                    FileListStatus.NO_FILELIST_STATUS.ordinal());
1049        } finally {
1050            ArchiveDBConnection.release(con);
1051        }
1052    }
1053
1054    /**
1055     * Method for retrieving the amount of files with a incorrect checksum within a replica.
1056     * <p>
1057     * This method does not contact the replica, it only uses the database to count the amount of files which are
1058     * corrupt.
1059     *
1060     * @param replica The replica to find the number of corrupted files for.
1061     * @return The number of corrupted files.
1062     * @throws ArgumentNotValid If the replica is null.
1063     */
1064    @Override
1065    public long getNumberOfWrongFilesInLastUpdate(Replica replica) throws ArgumentNotValid {
1066        ArgumentNotValid.checkNotNull(replica, "Replica");
1067        Connection con = ArchiveDBConnection.get();
1068        // The SQL statement to retrieve the number of corrupted entries in
1069        // the replicafileinfo table for the given replica.
1070        final String sql = "SELECT COUNT(*) FROM replicafileinfo WHERE replica_id = ? AND checksum_status = ?";
1071        try {
1072            return DBUtils.selectLongValue(con, sql, replica.getId(), ChecksumStatus.CORRUPT.ordinal());
1073        } finally {
1074            ArchiveDBConnection.release(con);
1075        }
1076    }
1077
1078    /**
1079     * Method for retrieving the list of the files in the replica which have a incorrect checksum. E.g. the
1080     * checksum_status is set to CORRUPT.
1081     * <p>
1082     * This method does not contact the replica, it only uses the local database.
1083     *
1084     * @param replica The replica to find the list of corrupted files for.
1085     * @return The list of files which have wrong checksums.
1086     * @throws ArgumentNotValid If the replica is null.
1087     */
1088    @Override
1089    public Iterable<String> getWrongFilesInLastUpdate(Replica replica) throws ArgumentNotValid {
1090        ArgumentNotValid.checkNotNull(replica, "Replica replica");
1091        Connection con = ArchiveDBConnection.get();
1092        // The SQL statement to retrieve the filenames for the corrupted files
1093        // in the replicafileinfo table for the given replica.
1094        String sql = "SELECT filename FROM replicafileinfo "
1095                + "LEFT OUTER JOIN file ON replicafileinfo.file_id = file.file_id "
1096                + "WHERE replica_id = ? AND checksum_status = ?";
1097        try {
1098            return DBUtils.selectStringList(con, sql, replica.getId(), ChecksumStatus.CORRUPT.ordinal());
1099        } finally {
1100            ArchiveDBConnection.release(con);
1101        }
1102    }
1103
1104    /**
1105     * Method for retrieving the number of files within a replica. This count all the files which are not missing from
1106     * the replica, thus all entries in the replicafileinfo table which has the filelist_status set to OK. It is ignored
1107     * whether the files has a correct checksum.
1108     * <p>
1109     * This method does not contact the replica, it only uses the local database.
1110     *
1111     * @param replica The replica to count the number of files for.
1112     * @return The number of files within the replica.
1113     * @throws ArgumentNotValid If the replica is null.
1114     */
1115    @Override
1116    public long getNumberOfFiles(Replica replica) throws ArgumentNotValid {
1117        ArgumentNotValid.checkNotNull(replica, "Replica replica");
1118        Connection con = ArchiveDBConnection.get();
1119        // The SQL statement to retrieve the amount of entries in the
1120        // replicafileinfo table for the replica which have the
1121        // filelist_status set to OK.
1122        String sql = "SELECT COUNT(*) FROM replicafileinfo WHERE replica_id  = ? AND filelist_status = ?";
1123        try {
1124            return DBUtils.selectLongValue(con, sql, replica.getId(), FileListStatus.OK.ordinal());
1125        } finally {
1126            ArchiveDBConnection.release(con);
1127        }
1128    }
1129
1130    /**
1131     * Method for finding a replica with a valid version of a file. This method is used in order to find a replica from
1132     * which a file should be retrieved, during the process of restoring a corrupt file on another replica.
1133     * <p>
1134     * This replica must of the type bitarchive, since a file cannot be retrieved from a checksum replica.
1135     *
1136     * @param filename The name of the file which needs to have a valid version in a bitarchive.
1137     * @return A bitarchive which contains a valid version of the file, or null if no such bitarchive exists.
1138     * @throws ArgumentNotValid If the filename is null or the empty string.
1139     */
1140    @Override
1141    public Replica getBitarchiveWithGoodFile(String filename) throws ArgumentNotValid {
1142        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
1143
1144        Connection con = ArchiveDBConnection.get();
1145        try {
1146            // Retrieve a list of replicas where the the checksum status is OK
1147            List<String> replicaIds = ReplicaCacheHelpers.retrieveReplicaIdsWithOKChecksumStatus(filename, con);
1148
1149            // go through the list, and return the first valid bitarchive-replica.
1150            for (String repId : replicaIds) {
1151                // Retrieve the replica type.
1152                ReplicaType repType = ReplicaCacheHelpers.retrieveReplicaType(repId, con);
1153
1154                // If the replica is of type BITARCHIVE then return it.
1155                if (repType.equals(ReplicaType.BITARCHIVE)) {
1156                    log.trace("The replica with id '{}' is the first bitarchive replica which contains the file '{}' "
1157                            + "with a valid checksum.", repId, filename);
1158                    return Replica.getReplicaFromId(repId);
1159                }
1160            }
1161        } finally {
1162            ArchiveDBConnection.release(con);
1163        }
1164
1165        // Notify the administrator about that no proper bitarchive was found.
1166        NotificationsFactory.getInstance().notify(
1167                "No bitarchive replica " + "was found which contains the file '" + filename + "'.",
1168                NotificationType.WARNING);
1169
1170        // If no bitarchive exists that contains the file with a OK checksum_status.
1171        // then return null.
1172        return null;
1173    }
1174
1175    /**
1176     * Method for finding a replica with a valid version of a file. This method is used in order to find a replica from
1177     * which a file should be retrieved, during the process of restoring a corrupt file on another replica.
1178     * <p>
1179     * This replica must of the type bitarchive, since a file cannot be retrieved from a checksum replica.
1180     *
1181     * @param filename The name of the file which needs to have a valid version in a bitarchive.
1182     * @param badReplica The Replica which has a bad copy of the given file
1183     * @return A bitarchive which contains a valid version of the file, or null if no such bitarchive exists (in which
1184     * case, a notification is sent)
1185     * @throws ArgumentNotValid If the replica is null or the filename is either null or the empty string.
1186     */
1187    @Override
1188    public Replica getBitarchiveWithGoodFile(String filename, Replica badReplica) throws ArgumentNotValid {
1189        ArgumentNotValid.checkNotNull(badReplica, "Replica badReplica");
1190        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
1191
1192        Connection con = ArchiveDBConnection.get();
1193        try {
1194            // Then retrieve a list of replicas where the the checksum status is
1195            // OK
1196            List<String> replicaIds = ReplicaCacheHelpers.retrieveReplicaIdsWithOKChecksumStatus(filename, con);
1197
1198            // Make sure, that the bad replica is not returned.
1199            replicaIds.remove(badReplica.getId());
1200
1201            // go through the list, and return the first valid
1202            // bitarchive-replica.
1203            for (String repId : replicaIds) {
1204                // Retrieve the replica type.
1205                ReplicaType repType = ReplicaCacheHelpers.retrieveReplicaType(repId, con);
1206
1207                // If the replica is of type BITARCHIVE then return it.
1208                if (repType.equals(ReplicaType.BITARCHIVE)) {
1209                    log.trace(
1210                            "The replica with id '{}' is the first bitarchive replica which contains the file '{}' with a valid checksum.",
1211                            repId, filename);
1212                    return Replica.getReplicaFromId(repId);
1213                }
1214            }
1215        } finally {
1216            ArchiveDBConnection.release(con);
1217        }
1218        // Notify the administrator about that no proper bitarchive was found, and log the incidence
1219        final String msg = "No bitarchive replica " + "was found which contains the file '" + filename + "'.";
1220        log.warn(msg);
1221        NotificationsFactory.getInstance().notify(msg, NotificationType.WARNING);
1222
1223        return null;
1224    }
1225
1226    /**
1227     * Method for updating a specific entry in the replicafileinfo table. Based on the filename, checksum and replica it
1228     * is verified whether a file is missing, corrupt or valid.
1229     *
1230     * @param filename Name of the file.
1231     * @param checksum The checksum of the file. Is allowed to be null, if no file is found.
1232     * @param replica The replica where the file exists.
1233     * @throws ArgumentNotValid If the filename is null or the empty string, or if the replica is null.
1234     */
1235    @Override
1236    public void updateChecksumInformationForFileOnReplica(String filename, String checksum, Replica replica)
1237            throws ArgumentNotValid {
1238        ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
1239        ArgumentNotValid.checkNotNull(replica, "Replica replica");
1240
1241        PreparedStatement statement = null;
1242        Connection connection = null;
1243        try {
1244            connection = ArchiveDBConnection.get();
1245
1246            long guid = ReplicaCacheHelpers.retrieveGuidForFilenameOnReplica(filename, replica.getId(), connection);
1247
1248            Date now = new Date(Calendar.getInstance().getTimeInMillis());
1249
1250            // handle differently whether a checksum was retrieved.
1251            if (checksum == null) {
1252                // Set to MISSING! and do not update the checksum
1253                // (cannot insert null).
1254                String sql = "UPDATE replicafileinfo "
1255                        + "SET filelist_status = ?, checksum_status = ?, filelist_checkdatetime = ? "
1256                        + "WHERE replicafileinfo_guid = ?";
1257                statement = DBUtils.prepareStatement(connection, sql, FileListStatus.MISSING.ordinal(),
1258                        ChecksumStatus.UNKNOWN.ordinal(), now, guid);
1259            } else {
1260                String sql = "UPDATE replicafileinfo "
1261                        + "SET checksum = ?, filelist_status = ?, filelist_checkdatetime = ? "
1262                        + "WHERE replicafileinfo_guid = ?";
1263                statement = DBUtils.prepareStatement(connection, sql, checksum, FileListStatus.OK.ordinal(), now, guid);
1264            }
1265            statement.executeUpdate();
1266            connection.commit();
1267        } catch (Exception e) {
1268            throw new IOFailure("Could not update single checksum entry.", e);
1269        } finally {
1270            DBUtils.closeStatementIfOpen(statement);
1271            if (connection != null) {
1272                ArchiveDBConnection.release(connection);
1273            }
1274        }
1275    }
1276
1277    /**
1278     * Method for inserting a line of Admin.Data into the database. It is assumed that it is a '0.4' admin.data line.
1279     *
1280     * @param line The line to insert into the database.
1281     * @return Whether the line was valid.
1282     * @throws ArgumentNotValid If the line is null. If it is empty, then it is logged.
1283     */
1284    public boolean insertAdminEntry(String line) throws ArgumentNotValid {
1285        ArgumentNotValid.checkNotNull(line, "String line");
1286
1287        Connection con = ArchiveDBConnection.get();
1288        log.trace("Insert admin entry begun");
1289        final int lengthFirstPart = 4;
1290        final int lengthOtherParts = 3;
1291        try {
1292            // split into parts. First contains
1293            String[] split = line.split(" , ");
1294
1295            // Retrieve the basic entry data.
1296            String[] entryData = split[0].split(" ");
1297
1298            // Check if enough elements
1299            if (entryData.length < lengthFirstPart) {
1300                log.warn("Bad line in Admin.data: {}", line);
1301                return false;
1302            }
1303
1304            String filename = entryData[0];
1305            String checksum = entryData[1];
1306
1307            long fileId = ReplicaCacheHelpers.retrieveIdForFile(filename, con);
1308
1309            // If the fileId is -1, then the file is not within the file table.
1310            // Thus insert it and retrieve the id.
1311            if (fileId == -1) {
1312                fileId = ReplicaCacheHelpers.insertFileIntoDB(filename, con);
1313            }
1314            log.trace("Step 1 completed (file created in database).");
1315            // go through the replica specifics.
1316            for (int i = 1; i < split.length; i++) {
1317                String[] repInfo = split[i].split(" ");
1318
1319                // check if correct size
1320                if (repInfo.length < lengthOtherParts) {
1321                    log.warn("Bad replica information '{}' in line '{}'", split[i], line);
1322                    continue;
1323                }
1324
1325                // retrieve the data for this replica
1326                String replicaId = Channels.retrieveReplicaFromIdentifierChannel(repInfo[0]).getId();
1327                ReplicaStoreState replicaUploadStatus = ReplicaStoreState.valueOf(repInfo[1]);
1328                Date replicaDate = new Date(Long.parseLong(repInfo[2]));
1329
1330                // retrieve the guid of the replicafileinfo.
1331                long guid = ReplicaCacheHelpers.retrieveReplicaFileInfoGuid(fileId, replicaId, con);
1332
1333                // Update the replicaFileInfo with the information.
1334                ReplicaCacheHelpers.updateReplicaFileInfo(guid, checksum, replicaDate, replicaUploadStatus, con);
1335            }
1336        } catch (IllegalState e) {
1337            log.warn("Received IllegalState exception while parsing.", e);
1338            return false;
1339        } finally {
1340            ArchiveDBConnection.release(con);
1341        }
1342        log.trace("Insert admin entry finished");
1343        return true;
1344    }
1345
1346    /**
1347     * Method for setting a specific value for the filelistdate and the checksumlistdate for all the replicas.
1348     *
1349     * @param date The new date for the checksumlist and filelist for all the replicas.
1350     * @throws ArgumentNotValid If the date is null.
1351     */
1352    public void setAdminDate(Date date) throws ArgumentNotValid {
1353        ArgumentNotValid.checkNotNull(date, "Date date");
1354
1355        Connection con = ArchiveDBConnection.get();
1356        try {
1357            // set the date for the replicas.
1358            for (Replica rep : Replica.getKnown()) {
1359                ReplicaCacheHelpers.setFilelistDateForReplica(rep, date, con);
1360                ReplicaCacheHelpers.setChecksumlistDateForReplica(rep, date, con);
1361            }
1362        } finally {
1363            ArchiveDBConnection.release(con);
1364        }
1365    }
1366
1367    /**
1368     * Method for telling whether the database is empty. The database is empty if it does not contain any files.
1369     * <p>
1370     * The database will not be entirely empty, since the replicas are put into the replica table during the
1371     * instantiation of this class, but if the file table is empty, then the replicafileinfo table is also empty, and
1372     * the database will be considered empty.
1373     *
1374     * @return Whether the file list is empty.
1375     */
1376    public boolean isEmpty() {
1377        // The SQL statement to retrieve the amount of entries in the
1378        // file table. No arguments (represented by empty Object array).
1379        final String sql = "SELECT COUNT(*) FROM file";
1380        Connection con = ArchiveDBConnection.get();
1381        try {
1382            return DBUtils.selectLongValue(con, sql, new Object[0]) == 0L;
1383        } finally {
1384            ArchiveDBConnection.release(con);
1385        }
1386    }
1387
1388    /**
1389     * Method to print all the tables in the database.
1390     *
1391     * @return all the tables as a text string
1392     */
1393    public String retrieveAsText() {
1394        StringBuilder res = new StringBuilder();
1395        String sql = "";
1396        Connection connection = ArchiveDBConnection.get();
1397        // Go through the replica table
1398        List<String> reps = ReplicaCacheHelpers.retrieveIdsFromReplicaTable(connection);
1399        res.append("Replica table: " + reps.size() + "\n");
1400        res.append("GUID \trepId \trepName \trepType \tfileupdate \tchecksumupdated" + "\n");
1401        res.append("------------------------------------------------------------\n");
1402        for (String repId : reps) {
1403            // retrieve the replica_name
1404            sql = "SELECT replica_guid FROM replica WHERE replica_id = ?";
1405            String repGUID = DBUtils.selectStringValue(connection, sql, repId);
1406            // retrieve the replica_name
1407            sql = "SELECT replica_name FROM replica WHERE replica_id = ?";
1408            String repName = DBUtils.selectStringValue(connection, sql, repId);
1409            // retrieve the replica_type
1410            sql = "SELECT replica_type FROM replica WHERE replica_id = ?";
1411            int repType = DBUtils.selectIntValue(connection, sql, repId);
1412            // retrieve the date for last updated
1413            sql = "SELECT filelist_updated FROM replica WHERE replica_id = ?";
1414            String filelistUpdated = DBUtils.selectStringValue(connection, sql, repId);
1415            // retrieve the date for last updated
1416            sql = "SELECT checksum_updated FROM replica WHERE replica_id = ?";
1417            String checksumUpdated = DBUtils.selectStringValue(connection, sql, repId);
1418
1419            // Print
1420            res.append(repGUID + "\t" + repId + "\t" + repName + "\t" + ReplicaType.fromOrdinal(repType).name() + "\t"
1421                    + filelistUpdated + "\t" + checksumUpdated + "\n");
1422        }
1423        res.append("\n");
1424
1425        // Go through the file table
1426        List<String> fileIds = ReplicaCacheHelpers.retrieveIdsFromFileTable(connection);
1427        res.append("File table : " + fileIds.size() + "\n");
1428        res.append("fileId \tfilename" + "\n");
1429        res.append("--------------------" + "\n");
1430        for (String fileId : fileIds) {
1431            // retrieve the file_name
1432            sql = "SELECT filename FROM file WHERE file_id = ?";
1433            String fileName = DBUtils.selectStringValue(connection, sql, fileId);
1434
1435            // Print
1436            res.append(fileId + " \t " + fileName + "\n");
1437        }
1438        res.append("\n");
1439
1440        // Go through the replicafileinfo table
1441        List<String> rfiIds = ReplicaCacheHelpers.retrieveIdsFromReplicaFileInfoTable(connection);
1442        res.append("ReplicaFileInfo table : " + rfiIds.size() + "\n");
1443        res.append("GUID \trepId \tfileId \tchecksum \tus \t\tfls \tcss \tfilelistCheckdate \tchecksumCheckdate\n");
1444        res.append("---------------------------------------------------------------------------------------------------------\n");
1445        for (String rfiGUID : rfiIds) {
1446            // FIXME Replace with one SELECT instead of one SELECT for each row! DOH!
1447            // retrieve the replica_id
1448            sql = "SELECT replica_id FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1449            String replicaId = DBUtils.selectStringValue(connection, sql, rfiGUID);
1450            // retrieve the file_id
1451            sql = "SELECT file_id FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1452            String fileId = DBUtils.selectStringValue(connection, sql, rfiGUID);
1453            // retrieve the checksum
1454            sql = "SELECT checksum FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1455            String checksum = DBUtils.selectStringValue(connection, sql, rfiGUID);
1456            // retrieve the upload_status
1457            sql = "SELECT upload_status FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1458            int uploadStatus = DBUtils.selectIntValue(connection, sql, rfiGUID);
1459            // retrieve the filelist_status
1460            sql = "SELECT filelist_status FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1461            int filelistStatus = DBUtils.selectIntValue(connection, sql, rfiGUID);
1462            // retrieve the checksum_status
1463            sql = "SELECT checksum_status FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1464            int checksumStatus = DBUtils.selectIntValue(connection, sql, rfiGUID);
1465            // retrieve the filelist_checkdatetime
1466            sql = "SELECT filelist_checkdatetime FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1467            String filelistCheckdatetime = DBUtils.selectStringValue(connection, sql, rfiGUID);
1468            // retrieve the checksum_checkdatetime
1469            sql = "SELECT checksum_checkdatetime FROM replicafileinfo WHERE replicafileinfo_guid = ?";
1470            String checksumCheckdatetime = DBUtils.selectStringValue(connection, sql, rfiGUID);
1471
1472            // Print
1473            res.append(rfiGUID + " \t" + replicaId + "\t" + fileId + "\t" + checksum + "\t"
1474                    + ReplicaStoreState.fromOrdinal(uploadStatus).name() + "  \t"
1475                    + FileListStatus.fromOrdinal(filelistStatus).name() + "\t"
1476                    + ChecksumStatus.fromOrdinal(checksumStatus).name() + "\t" + filelistCheckdatetime + "\t"
1477                    + checksumCheckdatetime + "\n");
1478        }
1479        res.append("\n");
1480
1481        return res.toString();
1482    }
1483
1484    /**
1485     * Method for cleaning up.
1486     */
1487    @Override
1488    public synchronized void cleanup() {
1489        instance = null;
1490    }
1491
1492}