001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.archive.arcrepositoryadmin;
025
026import java.io.BufferedReader;
027import java.io.File;
028import java.io.FileNotFoundException;
029import java.io.FileReader;
030import java.io.IOException;
031import java.util.ArrayList;
032import java.util.Date;
033import java.util.HashMap;
034import java.util.HashSet;
035import java.util.List;
036import java.util.Map;
037import java.util.Set;
038
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import dk.netarkivet.archive.ArchiveSettings;
043import dk.netarkivet.archive.arcrepository.distribute.StoreMessage;
044import dk.netarkivet.common.distribute.arcrepository.Replica;
045import dk.netarkivet.common.distribute.arcrepository.ReplicaStoreState;
046import dk.netarkivet.common.exceptions.ArgumentNotValid;
047import dk.netarkivet.common.exceptions.IOFailure;
048import dk.netarkivet.common.exceptions.UnknownID;
049import dk.netarkivet.common.utils.ApplicationUtils;
050import dk.netarkivet.common.utils.Settings;
051
052/**
053 * Class for accessing and manipulating the administrative data for the ArcRepository. In the current implementation, it
054 * consists of a file with a number of lines of the form: <filename/> <checksum/> <state/>
055 * <timestamp-for-last-state-change/> [,<bitarchive/> <storestatus/> <timestamp-for-last-state-change/>]*
056 * <p>
057 * This abstract class is overridden to give either a read/write or a readonly version of this class.
058 *
059 * @deprecated Use the DatabaseAdmin instead.
060 */
061@Deprecated
062public abstract class AdminData {
063    private static final Logger log = LoggerFactory.getLogger(AdminData.class);
064
065    /** Admindata version. VersionNumber is the current version. */
066    public static final String VERSION_NUMBER = "0.4";
067    /**
068     * Admindata version. oldVersionNumber is the earlier but still valid version.
069     */
070    private static final String OLD_VERSION_NUMBER = "0.3";
071    /** Map containing a mapping from arcfilename to ArcRepositoryEntry. */
072    protected Map<String, ArcRepositoryEntry> storeEntries = new HashMap<String, ArcRepositoryEntry>();
073    /**
074     * General delimiter. TODO add constants class where these constants are placed.
075     */
076    private static final String GENERAL_DELIMITER = " ";
077
078    /**
079     * The directory where the admin data resides, currently the directory: Settings.DIRS_ARCREPOSITORY_ADMIN.
080     */
081    protected File adminDir;
082
083    /** The name of the admin file. */
084    protected static final String ADMIN_FILE_NAME = "admin.data";
085
086    /**
087     * List containing the names of all knownBitArchives. This list is updated in the setState() method But only used in
088     * the toString() method.
089     */
090    protected List<String> knownBitArchives = new ArrayList<String>();
091
092    /** The File object for the admin data file. */
093    protected final File adminDataFile;
094
095    /**
096     * Common constructor for admin data. Reads current admin data from admin data file.
097     */
098    protected AdminData() {
099        this.adminDir = new File(Settings.get(ArchiveSettings.DIRS_ARCREPOSITORY_ADMIN));
100        ApplicationUtils.dirMustExist(adminDir);
101
102        adminDataFile = new File(adminDir, AdminData.ADMIN_FILE_NAME);
103        log.info("Using admin data file '{}'", adminDataFile.getAbsolutePath());
104
105        if (adminDataFile.exists()) {
106            read(); // Load admindata into StoreEntries Map
107        } else {
108            log.warn("AdminDataFile ({}) was not found.", adminDataFile.getPath());
109        }
110    }
111
112    /**
113     * Returns the one and only AdminData instance.
114     *
115     * @return the one and only AdminData instance.
116     */
117    public static synchronized UpdateableAdminData getUpdateableInstance() {
118        return UpdateableAdminData.getInstance();
119    }
120
121    /**
122     * Returns a read-only AdminData instance.
123     *
124     * @return a read-only AdminData instance.
125     */
126    public static synchronized ReadOnlyAdminData getReadOnlyInstance() {
127        // no Singleton returned
128        return new ReadOnlyAdminData();
129    }
130
131    /**
132     * Check, if there is an entry for a certain arcfile?
133     *
134     * @param arcfileName A given arcfile
135     * @return true, if there is an entry for the given arcfile
136     */
137    public boolean hasEntry(String arcfileName) {
138        ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName");
139        return storeEntries.containsKey(arcfileName);
140    }
141
142    /**
143     * Return the ArcRepositoryEntry for a certain arcfileName. Returns null, if not found.
144     *
145     * @param arcfileName a certain filename
146     * @return the ArcRepositoryEntry for a certain arcfileName
147     */
148    public ArcRepositoryEntry getEntry(String arcfileName) {
149        return storeEntries.get(arcfileName);
150    }
151
152    /**
153     * Tells whether there is a replyInfo associated with the given arcfile. If the file is not registered, a warning is
154     * logged and false is returned.
155     *
156     * @param arcfileName The arc file we want to reply a store request for.
157     * @return Whether setReplyInfo() has been called (and the replyInfo hasn't been removed since).
158     */
159    public boolean hasReplyInfo(String arcfileName) {
160        ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName");
161        ArcRepositoryEntry entry = storeEntries.get(arcfileName);
162        if (entry == null) {
163            log.warn("No entry found in storeEntries for arcfilename: {}", arcfileName);
164        }
165        return entry != null && entry.hasReplyInfo();
166    }
167
168    /**
169     * Returns whether or not a BitArchiveStoreState is registered for the given ARC file at the given bit archive.
170     *
171     * @param arcfileName The file to retrieve the state for
172     * @param replicaChannelName The name of the identification channel for the replica the state should be retrieved
173     * for.
174     * @return true if BitArchiveStoreState is registered, false otherwise.
175     */
176    public boolean hasState(String arcfileName, String replicaChannelName) {
177        ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "String arcfileName");
178        ArgumentNotValid.checkNotNullOrEmpty(replicaChannelName, "String replicaChannelName");
179        ArcRepositoryEntry entry = storeEntries.get(arcfileName);
180        if (entry == null) {
181            log.warn("No entry found in storeEntries for arcfilename: {}", arcfileName);
182        }
183        return entry != null && entry.hasStoreState(replicaChannelName);
184    }
185
186    /**
187     * Retrieves the storage state of a file for a specific replica.
188     *
189     * @param arcfileName The file to retrieve the state for.
190     * @param replicaChannelName The name of the identification channel for the replica the state should be retrieved
191     * for.
192     * @return The storage state.
193     * @throws UnknownID When no record exists.
194     */
195    public ReplicaStoreState getState(String arcfileName, String replicaChannelName) throws UnknownID {
196        ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "String arcfileName");
197        ArgumentNotValid.checkNotNullOrEmpty(replicaChannelName, "String replicaChannelName");
198        if (!hasState(arcfileName, replicaChannelName)) {
199            throw new UnknownID("No store state recorded for '" + arcfileName + "' in '" + replicaChannelName + "'");
200        }
201        return storeEntries.get(arcfileName).getStoreState(replicaChannelName);
202    }
203
204    /**
205     * Get Checksum for a given arcfile.
206     *
207     * @param arcfileName Unique reference to file for which to retrieve checksum
208     * @return checksum the latest registered reference checksum or null, if no reference checksum is available
209     * @throws UnknownID if the file is not registered
210     * @throws ArgumentNotValid If the arcFileName is either null or the empty string.
211     */
212    public String getCheckSum(String arcfileName) throws ArgumentNotValid, UnknownID {
213        ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfileName");
214        if (!hasEntry(arcfileName)) {
215            throw new UnknownID("Don't know anything about file '" + arcfileName + "'");
216        }
217        return storeEntries.get(arcfileName).getChecksum();
218    }
219
220    /**
221     * Reads the admin data from a file. If the data read is a valid old version the it is converted to the new version
222     * and written to disk.
223     *
224     * @throws IOFailure on trouble reading from file
225     */
226    protected void read() throws IOFailure {
227        try {
228            BufferedReader reader = null;
229            try {
230                reader = new BufferedReader(new FileReader(adminDataFile));
231                /*
232                 * Check version. When this check is done, we either have - dataVersion.equals(versionNumber)) &&
233                 * !validOldVersion, or - !dataVersion.equals(versionNumber)) && validOldVersion The latter applies if
234                 * the data file was empty.
235                 */
236                String dataVersion = OLD_VERSION_NUMBER;
237
238                boolean validOldVersion = false;
239                String tempVersion = reader.readLine();
240                if (tempVersion != null) {
241                    dataVersion = tempVersion;
242                }
243                if (dataVersion.equals(OLD_VERSION_NUMBER)) {
244                    log.debug("admindata version: {}", OLD_VERSION_NUMBER);
245                    validOldVersion = true;
246                }
247                if (!dataVersion.equals(VERSION_NUMBER) && !validOldVersion) {
248                    throw new IOFailure("Invalid version" + dataVersion);
249                }
250                // Now read the data file, depending on version.
251                if (dataVersion.equals(VERSION_NUMBER)) {
252                    log.debug("admindata version: {}", VERSION_NUMBER);
253                    readCurrentVersion(reader);
254                } else {
255                    readValidOldVersion(reader);
256                }
257            } finally {
258                if (reader != null) {
259                    reader.close();
260                }
261            }
262        } catch (FileNotFoundException e) {
263            throw new IOFailure("AdminData couldn't find admin data file", e);
264        } catch (IOException e) {
265            throw new IOFailure("AdminData couldn't find admin data file", e);
266        }
267    }
268
269    /**
270     * Read the valid old version (0.3) of the admin data. The valid old version contains lines of the format
271     * <filename/> <checksum/> [<bitarchive/> <storestatus/>]* The same filename may occur multiple times, but must
272     * always have the same checksum. This indicates updates of the storestatus for the file. Updates to checksum happen
273     * only during 'correct' operations and cause the entire file to be written, leaving the changed entry with the new
274     * checksum only. An entry-line is considered corrupt (!valid) if any of the following occur: There is no checksum.
275     * There is a bitarchive with a missing or invalid status The checksum does not match a previously found checksum.
276     * NB: If we come upon a corrupt entry-line, the entry for the filename in question is removed from admin.data
277     *
278     * @param reader The stream to read the input from.
279     */
280    private void readValidOldVersion(BufferedReader reader) {
281        String s;
282        String logMessage;
283        try {
284            while ((s = reader.readLine()) != null) {
285                String[] parts = s.split(" ");
286                boolean valid = true;
287                String filename = parts[0];
288                if (parts.length < 2 || parts.length % 2 != 0) {
289                    logMessage = "Corrupt admin data file:  Too few or not " + "an even number of fields for "
290                            + filename + ": " + s;
291                    log.warn(logMessage);
292                    valid = false;
293                }
294                if (parts.length > 1) {
295                    String checksum = parts[1];
296                    if (hasEntry(filename)) {
297                        if (!checksum.equals(getCheckSum(filename))) {
298                            log.warn("Wrong checksum encountered in admin data for known file '{}': Old={} New={}",
299                                    filename, getCheckSum(filename), checksum);
300                            // this means, that the existing entry is removed
301                            // from admin.data
302                            valid = false;
303                        }
304                    } else {
305                        StoreMessage replyInfo = null;
306                        storeEntries.put(filename, new ArcRepositoryEntry(filename, checksum, replyInfo));
307                    }
308                } else { // parts.length == 1
309                    if (hasEntry(filename)) {
310                        log.debug("Entry is invalid, because no checksumstring found in line: {}", s);
311                        // this means, that the existing entry
312                        // is removed from admin.data
313                        valid = false;
314                    } else {
315                        // Ignore this entry entirely, if not already
316                        // entry for this filename
317                        log.warn("This entry-line is ignored, because no checksumstring found in line: {}", s);
318                        continue;
319                    }
320                }
321                // If the entry is invalid, no reason to try parsing states
322                if (valid) {
323                    ArcRepositoryEntry entry = storeEntries.get(filename);
324                    for (int i = 2; i < parts.length; i += 2) {
325                        try {
326                            entry.setStoreState(parts[i], ReplicaStoreState.valueOf(parts[i + 1]));
327                        } catch (IllegalArgumentException e) {
328                            log.warn("Corrupt admin data entry. ", e);
329                            valid = false;
330                            break;
331                        }
332                    }
333                }
334                // Note that the previous if could set valid to false
335                if (!valid) {
336                    log.warn("Entry for file '{}' with checksum '{}' is invalid and therefore removed after reading "
337                            + "line with inconsistent information: {}", filename, storeEntries.get(filename)
338                            .getChecksum(), s);
339                    storeEntries.remove(filename);
340                }
341            }
342        } catch (IOException e) {
343            final String message = "Failed to read admin data from '" + adminDataFile.getPath() + "'";
344            log.error(message);
345            throw new IOFailure(message, e);
346        }
347    }
348
349    /**
350     * Read the current version (0.4) of the admin data. The current version contains lines of the format <filename/>
351     * <checksum/> <state/> <timestamp-for-last-state-change/> [,<bitarchive/> <storestatus/>
352     * <timestamp-for-last-state-change/>]*
353     * <p>
354     * The same filename may occur multiple times, but must always have the same checksum. This indicates updates of the
355     * storestatus for the file. Updates to checksum happen only during 'correct' operations and cause the entire file
356     * to be written, leaving the changed entry with the new checksum only. An entry is considered corrupt (!valid) if
357     * any of the following occur: - There is no checksum. - There is no state - timestamp-for-last-state-change is
358     * missing - There is a bitarchive with a missing or invalid status - The checksum does not match a previously found
359     * checksum. NB: If we come upon a corrupt entry-line, the entry for the filename in question is removed from
360     * admin.data
361     *
362     * @param reader The stream to read the input from.
363     * @throws ArgumentNotValid If reader is null.
364     * @throws IOFailure If an error occurred with access to the admin.data.
365     */
366    private void readCurrentVersion(BufferedReader reader) throws ArgumentNotValid, IOFailure {
367        ArgumentNotValid.checkNotNull(reader, "reader");
368
369        // The expected number of elements in first part of a line.
370        final int firstPartLength = 4;
371
372        // indices for the different parts in the first line.
373        final int indexFirstPartFilename = 0;
374        final int indexFirstPartChecksum = 1;
375        final int indexFirstPartState = 2;
376        final int indexFirstPartTimestamp = 3;
377
378        // The expected number of elements in the other parts of the line.
379        final int otherPartsLength = 3;
380
381        // The indices for the different parts in the other lines.
382        final int indexOtherPartsReplica = 0;
383        final int indexOtherPartsState = 1;
384        final int indexOtherPartsTimestamp = 2;
385
386        String s;
387        try {
388            while ((s = reader.readLine()) != null) {
389
390                // Split the line up in parts defined by
391                // the ENTRY_COMPONENT_SEPARATOR_STRING
392                String[] parts = s.split(ArcRepositoryEntry.ENTRY_COMPONENT_SEPARATOR_STRING);
393
394                // parts[0] should now contain the <filename> <checksum>
395                // <state> <timestamp-for-last-state-change>
396
397                // For i=0,1.. : parts[1+i] contains the state-information
398                // for the file on our bitarchives.
399
400                String[] firstparts = parts[0].split(GENERAL_DELIMITER);
401
402                if (firstparts.length != firstPartLength) {
403                    String logMessage = "Corrupt admin data file: One of the components '<filename> <checksum> "
404                            + "<state> <timestamp-for-last-state-change>' is missing from this line: " + s
405                            + "\nIgnoring this line";
406                    log.warn(logMessage);
407                    continue; // ignore this linie, and go to next line
408                }
409
410                /**
411                 * Parse the different components of filename> <checksum> <state> <timestamp-for-last-state-change>
412                 */
413                String filename = firstparts[indexFirstPartFilename];
414                String checksumString = firstparts[indexFirstPartChecksum];
415                String stateString = firstparts[indexFirstPartState];
416                String timestampString = firstparts[indexFirstPartTimestamp];
417                log.trace("Found (filename, checksum, state, timestamp): {}, {}, {}, {}", filename, checksumString,
418                        stateString, timestampString);
419
420                ReplicaStoreState state = ReplicaStoreState.valueOf(stateString);
421                Long tempLong = Long.parseLong(timestampString);
422                Date timestampAsDate = new Date(tempLong);
423
424                // Check, if we already have entry for this filename
425                if (hasEntry(filename)) {
426                    // check, if 'checksum' equals checksum-value in
427                    // existing entry
428                    if (!checksumString.equals(getCheckSum(filename))) {
429                        log.warn("Wrong checksum encountered in admin data for known file '{}': Old={} New={}. "
430                                + "Entry removed from admin.data and the remaining line ignored: {}", filename,
431                                getCheckSum(filename), checksumString, s);
432                        storeEntries.remove(filename);
433                        continue; // Stop processing, and go to next line
434                    }
435                } else {
436                    // Add new entry for filename:
437                    StoreMessage replyInfo = null;
438                    storeEntries.put(filename, new ArcRepositoryEntry(filename, checksumString, replyInfo));
439                }
440
441                // Parse the remaining parts[1..] array
442                // Expected format:
443                // <bitarchive> <storestatus> <timestamp-for-last-state-change>
444                ArcRepositoryEntry entry = getEntry(filename);
445                for (int i = 1; i < parts.length; i++) {
446                    String[] bitparts = parts[i].split(GENERAL_DELIMITER);
447                    if (bitparts.length != otherPartsLength) {
448                        final String message = "Line incomplete. Expected 3 elements: <bitarchive> <storestatus> "
449                                + "<timestamp-for-last-state-change>. Found only " + bitparts.length
450                                + " elements in line: " + s;
451                        log.warn(message);
452                    } else {
453                        String bitarchiveString = bitparts[indexOtherPartsReplica];
454                        String storestatusString = bitparts[indexOtherPartsState];
455                        timestampString = bitparts[indexOtherPartsTimestamp];
456                        state = ReplicaStoreState.valueOf(storestatusString);
457                        tempLong = Long.parseLong(timestampString);
458                        timestampAsDate = new Date(tempLong);
459                        entry.setStoreState(bitarchiveString, state, timestampAsDate);
460                    }
461                }
462            }
463        } catch (IOException e) {
464            final String message = "Failed to read admin data from '" + adminDataFile.getPath() + "'";
465            log.error(message);
466            throw new IOFailure(message, e);
467        }
468    }
469
470    /**
471     * Returns a set of the all arcfile names in the repository.
472     *
473     * @return the set of files in the repository
474     */
475    public Set<String> getAllFileNames() {
476        Set<String> knownFiles = new HashSet<String>();
477        for (Map.Entry<String, ArcRepositoryEntry> entry : storeEntries.entrySet()) {
478            knownFiles.add(entry.getKey());
479        }
480        return knownFiles;
481    }
482
483    /**
484     * Returns a set of the arcfile names that are in a given state for a specific bitarchive in the repository.
485     *
486     * @param replica the object representing the BA
487     * @param state the state to look for, e.g. ReplicaStoreState.STATE_COMPLETED
488     * @return the set of files in the repository with the given state
489     */
490    public Set<String> getAllFileNames(Replica replica, ReplicaStoreState state) {
491        ArgumentNotValid.checkNotNull(replica, "Replica replica");
492        ArgumentNotValid.checkNotNull(state, "BitArchiveStoreState state");
493        String replicaKey = replica.getIdentificationChannel().getName();
494        Set<String> completedFiles = new HashSet<String>();
495        for (Map.Entry<String, ArcRepositoryEntry> entry : storeEntries.entrySet()) {
496            if (entry.getValue().getStoreState(replicaKey) == state) {
497                completedFiles.add(entry.getKey());
498            }
499        }
500        return completedFiles;
501    }
502
503    /**
504     * Return info about current object as String.
505     *
506     * @return info about current object as String.
507     */
508    public String toString() {
509        StringBuffer out = new StringBuffer();
510        out.append("\nAdminData:");
511        out.append("\nKnown bitarchives:");
512        out.append(knownBitArchives.toString());
513        out.append(getAllFileNames().toString());
514        return out.toString();
515    }
516
517}