001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.archive.arcrepositoryadmin;
024
025import java.io.File;
026import java.sql.Date;
027
028import dk.netarkivet.common.distribute.arcrepository.Replica;
029import dk.netarkivet.common.utils.CleanupIF;
030
031/**
032 * This is an interface for communicating with bitpreservation databases.
033 */
034public interface BitPreservationDAO extends CleanupIF {
035    /**
036     * Given the output of a checksum job, add the results to the database. NOTE: the Checksum version of Replica must
037     * be implemented with output in the same form as checksumJobOutput for implementation of bitArchive replicas
038     *
039     * @param checksumOutput The parsed output of a GetAllChecksumMessage as a File with ChecksumJob lines, i.e.
040     * filename##checksum.
041     * @param replica The replica this checksum job is for.
042     */
043    void addChecksumInformation(File checksumOutput, Replica replica);
044
045    /**
046     * Given the output of a file list job, add the results to the database. NOTE: the Checksum version of Replica must
047     * be implemented with output in the same form as filelistJobOutput for implementation of bitArchive replicas
048     *
049     * @param filelistOutput A file with a list of filenames for the given replica.
050     * @param replica The replica this filelist job is for.
051     */
052    void addFileListInformation(File filelistOutput, Replica replica);
053
054    /**
055     * Return files with upload_status = COMPLETE for the replica, but the filelist_status = MISSING. This is done by
056     * querying the database for files with no or different update date from the last known update date for bitarchive,
057     * but which are present from admin data.
058     *
059     * @param replica The replica to check for.
060     * @return The list of missing files for a specific replica.
061     */
062    Iterable<String> getMissingFilesInLastUpdate(Replica replica);
063
064    /**
065     * Return files with filelist_status CORRUPT for the replica, but not present in the last missing files job. This is
066     * done by querying the database for files with different checksum from the checksum in the last known update date
067     * for bitarchive, but which are present from admin data.
068     *
069     * @param replica The replica to check for.
070     * @return The list of wrong files for the replica in the last update.
071     */
072    Iterable<String> getWrongFilesInLastUpdate(Replica replica);
073
074    /**
075     * Return the count of missing files for replica.
076     *
077     * @param replica The replica to get the count for.
078     * @return The count of missing files for a replica.
079     */
080    long getNumberOfMissingFilesInLastUpdate(Replica replica);
081
082    /**
083     * Return the count of corrupt files for replica.
084     *
085     * @param replica The replica to get the count for.
086     * @return The number of wrong files for a replica.
087     */
088    long getNumberOfWrongFilesInLastUpdate(Replica replica);
089
090    /**
091     * Returns the count of files in the replica which is not missing.
092     *
093     * @param replica The replica to have the files.
094     * @return The number of files, which does not have filelist_status = MISSING.
095     */
096    long getNumberOfFiles(Replica replica);
097
098    /**
099     * Get the date for the last file list job.
100     *
101     * @param replica The replica to get the date for.
102     * @return The date of the last missing files update for the replica.
103     */
104    Date getDateOfLastMissingFilesUpdate(Replica replica);
105
106    /**
107     * Get the date for the last file list job.
108     *
109     * @param replica The replica to get the date for.
110     * @return The date of the last wrong file update for the replica.
111     */
112    Date getDateOfLastWrongFilesUpdate(Replica replica);
113
114    /**
115     * Method for retrieving a replica which has the file and the checksum_status = OK.
116     *
117     * @param filename The name of the file.
118     * @return A replica which contains the file, or null if no such replica can be found.
119     */
120    Replica getBitarchiveWithGoodFile(String filename);
121
122    /**
123     * Method for retrieving a replica which has the file and the checksum_status = OK.
124     *
125     * @param filename The name of the file.
126     * @param badReplica A replica which is known to contain a corrupt instance of this file.
127     * @return A replica which contains the file, or null if no such replica can be found.
128     */
129    Replica getBitarchiveWithGoodFile(String filename, Replica badReplica);
130
131    /**
132     * Method for updating the status for the files for all the replicas. If the checksums of the archives differ for
133     * some replicas, then based on a checksum vote, a specific checksum is chosen as the 'correct' one, and the entries
134     * with another checksum that this 'correct' one will be marked as corrupt.
135     */
136    void updateChecksumStatus();
137
138    /**
139     * Method for updating the status for a specific file for all the replicas. If the checksums for the replicas differ
140     * for some replica, then based on a checksum vote, a specific checksum is chosen as the 'correct' one, and the
141     * entries with another checksum than the 'correct one' will be marked as corrupt. If no winner of the voting is
142     * found, the all instances will be chosen to have 'UNKNOWN' checksum status.
143     *
144     * @param filename The name of the file to update the status for.
145     */
146    void updateChecksumStatus(String filename);
147
148    /**
149     * Method for retrieving the entry in the replicafileinfo table for a given file and replica.
150     *
151     * @param filename The name of the file for the entry.
152     * @param replica The replica of the entry.
153     * @return The replicafileinfo entry corresponding to the given filename and replica.
154     */
155    ReplicaFileInfo getReplicaFileInfo(String filename, Replica replica);
156
157    /**
158     * Method for updating a specific entry in the replicafileinfo table.
159     *
160     * @param filename Name of the file.
161     * @param checksum The checksum of the file.
162     * @param replica The replica where the file exists.
163     */
164    void updateChecksumInformationForFileOnReplica(String filename, String checksum, Replica replica);
165
166    /**
167     * Method for cleaning up when done.
168     */
169    void cleanup();
170}