001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils;
024
025import java.io.File;
026import java.io.FileInputStream;
027import java.io.FileNotFoundException;
028import java.io.IOException;
029import java.io.InputStream;
030import java.security.MessageDigest;
031import java.security.NoSuchAlgorithmException;
032
033import org.apache.commons.io.IOUtils;
034
035import dk.netarkivet.common.Constants;
036import dk.netarkivet.common.exceptions.ArgumentNotValid;
037import dk.netarkivet.common.exceptions.IOFailure;
038import dk.netarkivet.common.exceptions.IllegalState;
039
040/**
041 * Calculates MD5 or SHA1 checksums on files using the built-in Java methods.
042 */
043public final class ChecksumCalculator {
044
045    /** Defines the MD5 checksum algorithm */
046    public static final String MD5 = "MD5";
047    /** Defines the SHA1 checksum algorithm */
048    public static final String SHA1 = "SHA1";
049
050    /**
051     * Calculate MD5 for a file.
052     *
053     * @param src The file to calculate MD5 for.
054     * @return The MD5 sum of a file as a 32 characters long Hex string.
055     */
056    public static String calculateMd5(final File src) {
057        ArgumentNotValid.checkNotNull(src, "File src");
058        ArgumentNotValid.checkTrue(src.isFile(), "Argument should be a file");
059        // Get the MD5 and return it
060        try {
061            final FileInputStream fileInputStream = new FileInputStream(src);
062            try {
063                return calculateMd5(fileInputStream);
064            } finally {
065                IOUtils.closeQuietly(fileInputStream);
066            }
067        } catch (FileNotFoundException e) {
068            throw new IOFailure("Could not read file '" + src.getAbsolutePath() + "'", e);
069        }
070    }
071
072    /**
073     * Calculate the SHA-1 DIGEST for a file.
074     *
075     * @param src The file to calculate SHA-1 for.
076     * @return The SHA-1 sum of a file as a 32 characters long Hex string.
077     */
078    public static String calculateSha1(final File src) {
079        ArgumentNotValid.checkNotNull(src, "File src");
080        ArgumentNotValid.checkTrue(src.isFile(), "Argument should be a file");
081        // Get the SHA-1 digest and return it
082        try {
083            final FileInputStream fileInputStream = new FileInputStream(src);
084            try {
085                return calculateSha1(fileInputStream);
086            } finally {
087                IOUtils.closeQuietly(fileInputStream);
088            }
089        } catch (FileNotFoundException e) {
090            throw new IOFailure("Could not read file '" + src.getAbsolutePath() + "'", e);
091        }
092    }
093
094    /**
095     * Calculates an MD5 digest on an InputStream, throwing away the data itself. Throws Alert if there is an error
096     * reading from the stream
097     *
098     * @param instream An <code>InputStream</code> to calculate the MD5 digest on. The contents of the stream will be
099     * consumed by this call, but the stream will not be closed.
100     * @return The calculated MD5 digest as a string.
101     */
102    public static String calculateMd5(final InputStream instream) {
103        return calculateDigest(instream, MD5);
104    }
105
106    /**
107     * Calculates an SHA-1 digest on an InputStream, throwing away the data itself. Throws Alert if there is an error
108     * reading from the stream
109     *
110     * @param instream An <code>InputStream</code> to calculate the SHA-1 digest on. The contents of the stream will be
111     * consumed by this call, but the stream will not be closed.
112     * @return The calculated SHA-1 digest as a string.
113     */
114    public static String calculateSha1(final InputStream instream) {
115        return calculateDigest(instream, SHA1);
116    }
117
118    /**
119     * Generate an MD5 for a byte array.
120     *
121     * @param msg The given bytearray
122     * @return the MD5 for a byte array
123     */
124    public static String calculateMd5(final byte[] msg) {
125        return toHex(getMessageDigest(MD5).digest(msg));
126    }
127
128    /**
129     * Calculates a digest on an InputStream, throwing away the data itself. Throws Alert if there is an error reading
130     * from the stream
131     *
132     * @param instream An <code>InputStream</code> to calculate the digest on. The contents of the stream will be
133     * consumed by this call, but the stream will not be closed.
134     * @param algorithm digest algorithm to use
135     * @return The calculated digest as a string.
136     */
137    private static String calculateDigest(final InputStream instream, final String algorithm) {
138        final byte[] buffer = new byte[Constants.IO_BUFFER_SIZE];
139        final MessageDigest messageDigest = getMessageDigest(algorithm);
140        messageDigest.reset();
141        int bytesRead;
142        try {
143            while ((bytesRead = instream.read(buffer)) != -1) {
144                messageDigest.update(buffer, 0, bytesRead);
145            }
146        } catch (IOException e) {
147            throw new IOFailure("Error making a '" + algorithm + "' digest on the inputstream", e);
148        }
149        return toHex(messageDigest.digest());
150    }
151
152    private static final char[] hexdigit = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e',
153            'f'};
154
155    /**
156     * Converts a byte array to a hexstring.
157     *
158     * @param ba the bytearray to be converted
159     * @return ba converted to a hexstring
160     */
161    public static String toHex(final byte[] ba) {
162        int baLen = ba.length;
163        char[] hexchars = new char[baLen * 2];
164        int cIdx = 0;
165        for (int i = 0; i < baLen; ++i) {
166            hexchars[cIdx++] = hexdigit[(ba[i] >> 4) & 0x0F];
167            hexchars[cIdx++] = hexdigit[ba[i] & 0x0F];
168        }
169        return new String(hexchars);
170    }
171
172    /**
173     * Get a MessageDigest for a specific algorithm.
174     *
175     * @param algorithm a specific MessageDigest algorithm.
176     * @return a MessageDigest for a specific algorithm
177     */
178    public static MessageDigest getMessageDigest(final String algorithm) {
179        MessageDigest messageDigest;
180        try {
181            messageDigest = MessageDigest.getInstance(algorithm);
182        } catch (NoSuchAlgorithmException e) {
183            throw new IllegalState("The '" + algorithm + "' algorithm is not available", e);
184        }
185        return messageDigest;
186    }
187
188}