001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils.batch;
024
025import java.io.File;
026import java.io.IOException;
027import java.io.ObjectInputStream;
028import java.io.ObjectOutputStream;
029import java.io.OutputStream;
030
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import dk.netarkivet.common.exceptions.ArgumentNotValid;
035import dk.netarkivet.common.exceptions.IOFailure;
036import dk.netarkivet.common.utils.ChecksumCalculator;
037import dk.netarkivet.common.utils.KeyValuePair;
038
039/**
040 * Class responsible for checksumming a list of files.
041 */
042@SuppressWarnings({"serial"})
043public class ChecksumJob extends FileBatchJob {
044
045    /** The log. */
046    protected static final transient Logger log = LoggerFactory.getLogger(ChecksumJob.class);
047
048    /**
049     * Characters used for separating a file identifier from the checksum in the output from a checksum job.
050     */
051    public static final String STRING_FILENAME_SEPARATOR = "##";
052
053    /** The constructor. */
054    public ChecksumJob() {
055        // Keep the batchJobTimeout at default (-1) so it will be overridden
056        // by the settings for default batch timeout.
057    }
058
059    /**
060     * Initialization of a ChecksumJob: a new structure for storing files failed is created.
061     *
062     * @param os The output stream where the output data is written.
063     * @see FileBatchJob#initialize(OutputStream)
064     */
065    public void initialize(OutputStream os) {
066    }
067
068    /**
069     * Generates MD5 checksum for file identified by 'file' and writes the checksum to the given OutputStream. Errors
070     * during checksumming are logged and files on which checksumming fails are stored in filesFailed.
071     *
072     * @param file The file to process.
073     * @param os The outputStream to write the result to
074     * @return false, if errors occurred while processing the file
075     * @see FileBatchJob#processFile(File, OutputStream)
076     */
077    public boolean processFile(File file, OutputStream os) {
078        ArgumentNotValid.checkNotNull(file, "file");
079        try {
080            os.write((file.getName() + STRING_FILENAME_SEPARATOR + ChecksumCalculator.calculateMd5(file) + "\n")
081                    .getBytes());
082        } catch (IOException e) {
083            log.warn("Checksumming of file {} failed: ", file.getName(), e);
084            return false;
085        }
086        return true;
087    }
088
089    /**
090     * Finishing the job requires nothing particular.
091     *
092     * @param os The output stream where the output data is written.
093     * @see FileBatchJob#finish(OutputStream)
094     */
095    public void finish(OutputStream os) {
096    }
097
098    /**
099     * Create a line in checksum job format from a filename and a checksum.
100     *
101     * @param filename A filename (no path)
102     * @param checksum An MD5 checksum
103     * @return A string of the correct format for a checksum job output.
104     */
105    public static String makeLine(String filename, String checksum) {
106        ArgumentNotValid.checkNotNullOrEmpty(filename, "filename");
107        ArgumentNotValid.checkNotNullOrEmpty(checksum, "checksum");
108        return filename + STRING_FILENAME_SEPARATOR + checksum;
109    }
110
111    /**
112     * Parse a line of output into a key-value pair.
113     *
114     * @param line The line to parse, of the form <b>filename</b>##<b>checksum</b>
115     * @return The filename->checksum mapping.
116     * @throws ArgumentNotValid if the line is not on the correct form.
117     */
118    public static KeyValuePair<String, String> parseLine(String line) throws ArgumentNotValid {
119        ArgumentNotValid.checkNotNull(line, "checksum line");
120        String[] parts = line.split(STRING_FILENAME_SEPARATOR);
121        if (parts.length != 2) {
122            throw new ArgumentNotValid("String '" + line + "' is not on checksum output form");
123        }
124        return new KeyValuePair<String, String>(parts[0], parts[1]);
125    }
126
127    /**
128     * Write a human-readily description of this ChecksumJob object. Writes out the name of the ChecksumJob, the number
129     * of files processed, and the number of files that failed during processing.
130     *
131     * @return a human-readily description of this ChecksumJob object
132     */
133    public String toString() {
134        int noOfFailedFiles;
135        if (filesFailed == null) {
136            noOfFailedFiles = 0;
137        } else {
138            noOfFailedFiles = filesFailed.size();
139        }
140        return ("Checksum job " + getClass().getName() + ": [Files Processed = " + noOfFilesProcessed
141                + "; Files  failed = " + noOfFailedFiles + "]");
142    }
143
144    /**
145     * Invoke default method for deserializing object, and reinitialise the logger.
146     *
147     * @param s the InputStream
148     */
149    private void readObject(ObjectInputStream s) {
150        try {
151            s.defaultReadObject();
152        } catch (Exception e) {
153            throw new IOFailure("Unexpected error during deserialization", e);
154        }
155    }
156
157    /**
158     * Invoke default method for serializing object.
159     *
160     * @param s the OutputStream
161     * @throws IOFailure If an exception is caught during writing of the object.
162     */
163    private void writeObject(ObjectOutputStream s) throws IOFailure {
164        try {
165            s.defaultWriteObject();
166        } catch (Exception e) {
167            throw new IOFailure("Unexpected error during serialization", e);
168        }
169    }
170
171}