001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.utils.batch; 024 025import java.io.File; 026import java.io.IOException; 027import java.io.ObjectInputStream; 028import java.io.ObjectOutputStream; 029import java.io.OutputStream; 030 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import dk.netarkivet.common.exceptions.ArgumentNotValid; 035import dk.netarkivet.common.exceptions.IOFailure; 036import dk.netarkivet.common.utils.ChecksumCalculator; 037import dk.netarkivet.common.utils.KeyValuePair; 038 039/** 040 * Class responsible for checksumming a list of files. 041 */ 042@SuppressWarnings({"serial"}) 043public class ChecksumJob extends FileBatchJob { 044 045 /** The log. */ 046 protected static final transient Logger log = LoggerFactory.getLogger(ChecksumJob.class); 047 048 /** 049 * Characters used for separating a file identifier from the checksum in the output from a checksum job. 050 */ 051 public static final String STRING_FILENAME_SEPARATOR = "##"; 052 053 /** The constructor. */ 054 public ChecksumJob() { 055 // Keep the batchJobTimeout at default (-1) so it will be overridden 056 // by the settings for default batch timeout. 057 } 058 059 /** 060 * Initialization of a ChecksumJob: a new structure for storing files failed is created. 061 * 062 * @param os The output stream where the output data is written. 063 * @see FileBatchJob#initialize(OutputStream) 064 */ 065 public void initialize(OutputStream os) { 066 } 067 068 /** 069 * Generates MD5 checksum for file identified by 'file' and writes the checksum to the given OutputStream. Errors 070 * during checksumming are logged and files on which checksumming fails are stored in filesFailed. 071 * 072 * @param file The file to process. 073 * @param os The outputStream to write the result to 074 * @return false, if errors occurred while processing the file 075 * @see FileBatchJob#processFile(File, OutputStream) 076 */ 077 public boolean processFile(File file, OutputStream os) { 078 ArgumentNotValid.checkNotNull(file, "file"); 079 try { 080 os.write((file.getName() + STRING_FILENAME_SEPARATOR + ChecksumCalculator.calculateMd5(file) + "\n") 081 .getBytes()); 082 } catch (IOException e) { 083 log.warn("Checksumming of file {} failed: ", file.getName(), e); 084 return false; 085 } 086 return true; 087 } 088 089 /** 090 * Finishing the job requires nothing particular. 091 * 092 * @param os The output stream where the output data is written. 093 * @see FileBatchJob#finish(OutputStream) 094 */ 095 public void finish(OutputStream os) { 096 } 097 098 /** 099 * Create a line in checksum job format from a filename and a checksum. 100 * 101 * @param filename A filename (no path) 102 * @param checksum An MD5 checksum 103 * @return A string of the correct format for a checksum job output. 104 */ 105 public static String makeLine(String filename, String checksum) { 106 ArgumentNotValid.checkNotNullOrEmpty(filename, "filename"); 107 ArgumentNotValid.checkNotNullOrEmpty(checksum, "checksum"); 108 return filename + STRING_FILENAME_SEPARATOR + checksum; 109 } 110 111 /** 112 * Parse a line of output into a key-value pair. 113 * 114 * @param line The line to parse, of the form <b>filename</b>##<b>checksum</b> 115 * @return The filename->checksum mapping. 116 * @throws ArgumentNotValid if the line is not on the correct form. 117 */ 118 public static KeyValuePair<String, String> parseLine(String line) throws ArgumentNotValid { 119 ArgumentNotValid.checkNotNull(line, "checksum line"); 120 String[] parts = line.split(STRING_FILENAME_SEPARATOR); 121 if (parts.length != 2) { 122 throw new ArgumentNotValid("String '" + line + "' is not on checksum output form"); 123 } 124 return new KeyValuePair<String, String>(parts[0], parts[1]); 125 } 126 127 /** 128 * Write a human-readily description of this ChecksumJob object. Writes out the name of the ChecksumJob, the number 129 * of files processed, and the number of files that failed during processing. 130 * 131 * @return a human-readily description of this ChecksumJob object 132 */ 133 public String toString() { 134 int noOfFailedFiles; 135 if (filesFailed == null) { 136 noOfFailedFiles = 0; 137 } else { 138 noOfFailedFiles = filesFailed.size(); 139 } 140 return ("Checksum job " + getClass().getName() + ": [Files Processed = " + noOfFilesProcessed 141 + "; Files failed = " + noOfFailedFiles + "]"); 142 } 143 144 /** 145 * Invoke default method for deserializing object, and reinitialise the logger. 146 * 147 * @param s the InputStream 148 */ 149 private void readObject(ObjectInputStream s) { 150 try { 151 s.defaultReadObject(); 152 } catch (Exception e) { 153 throw new IOFailure("Unexpected error during deserialization", e); 154 } 155 } 156 157 /** 158 * Invoke default method for serializing object. 159 * 160 * @param s the OutputStream 161 * @throws IOFailure If an exception is caught during writing of the object. 162 */ 163 private void writeObject(ObjectOutputStream s) throws IOFailure { 164 try { 165 s.defaultWriteObject(); 166 } catch (Exception e) { 167 throw new IOFailure("Unexpected error during serialization", e); 168 } 169 } 170 171}