001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.distribute;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FilterInputStream;
029import java.io.IOException;
030import java.io.InputStream;
031import java.net.URL;
032import java.net.URLConnection;
033import java.security.DigestInputStream;
034
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import dk.netarkivet.common.exceptions.ArgumentNotValid;
039import dk.netarkivet.common.exceptions.IOFailure;
040import dk.netarkivet.common.utils.ChecksumCalculator;
041import dk.netarkivet.common.utils.Settings;
042import dk.netarkivet.common.utils.SystemUtils;
043
044/**
045 * A remote file implemented with point-to-point HTTP communication. Optimised to communicate locally, if file is on the
046 * same host. Optimised to transfer 0 byte files inline.
047 */
048@SuppressWarnings({"serial"})
049public class HTTPRemoteFile extends AbstractRemoteFile {
050
051    /** The logger for this class. */
052    private static final Logger log = LoggerFactory.getLogger(HTTPRemoteFile.class);
053
054    /** The default place in classpath where the settings file can be found. */
055    private static String DEFAULT_SETTINGS_CLASSPATH = "dk/netarkivet/common/distribute/HTTPRemoteFileSettings.xml";
056
057    /*
058     * The static initialiser is called when the class is loaded. It will add default values for all settings defined in
059     * this class, by loading them from a settings.xml file in classpath.
060     */
061    static {
062        Settings.addDefaultClasspathSettings(DEFAULT_SETTINGS_CLASSPATH);
063    }
064
065    /** The name of the host this file originated on. */
066    protected final String hostname;
067    /** The url that exposes this remote file. */
068    protected final URL url;
069    /** If useChecksums is true, contains the file checksum. */
070    protected final String checksum;
071
072    // NOTE: The constants defining setting names below are left non-final on
073    // purpose! Otherwise, the static initialiser that loads default values
074    // will not run.
075
076    /**
077     * <b>settings.common.remoteFile.port</b>: <br>
078     * The setting for the HTTP remotefile port number used.
079     */
080    public static String HTTPREMOTEFILE_PORT_NUMBER = "settings.common.remoteFile.port";
081
082    /**
083     * Initialises a remote file implemented by point-to-point HTTP communication.
084     *
085     * @param file The file to make a remote file for
086     * @param useChecksums Whether communications are checksummed. If true, getChecksum will also return the checksum.
087     * @param fileDeletable if true, the file given to this method is deletable, once it is transferred.
088     * @param multipleDownloads if true, the file may be transferred more than once. Otherwise, all file handles are
089     * attempted to be made invalid after the first transfer, although no guarantees are made.
090     * @throws ArgumentNotValid if file is null, or not a readable file.
091     * @throws IOFailure if checksums are requested, but i/o errors occur while checksumming.
092     */
093    protected HTTPRemoteFile(File file, boolean useChecksums, boolean fileDeletable, boolean multipleDownloads) {
094        super(file, useChecksums, fileDeletable, multipleDownloads);
095        this.hostname = SystemUtils.getLocalHostName();
096        if (filesize > 0) {
097            this.url = getRegistry().registerFile(this.file, this.fileDeletable);
098        } else {
099            this.url = null;
100        }
101        if (useChecksums) {
102            this.checksum = ChecksumCalculator.calculateMd5(file);
103        } else {
104            this.checksum = null;
105        }
106    }
107
108    /**
109     * Initialises a remote file implemented by point-to-point HTTP communication.
110     *
111     * @param f The file to make a remote file for
112     * @param useChecksums Whether communications are checksummed. If true, getChecksum will also return the checksum.
113     * @param fileDeletable if true, the file given to this method is deletable, once it is transferred.
114     * @param multipleDownloads if true, the file may be transferred more than once. Otherwise, all file handles are
115     * attempted to be made invalid after the first transfer, although no guarantees are made.
116     * @throws ArgumentNotValid if file is null, or not a readable file.
117     * @throws IOFailure if checksums are requested, but i/o errors occur while checksumming.
118     */
119    public static RemoteFile getInstance(File f, Boolean useChecksums, Boolean fileDeletable, Boolean multipleDownloads) {
120        return new HTTPRemoteFile(f, useChecksums, fileDeletable, multipleDownloads);
121    }
122
123    /**
124     * Get the webserver registry for this class of files. Meant to be subclassed for specialised versions of this file.
125     *
126     * @return The reigstry.
127     */
128    protected HTTPRemoteFileRegistry getRegistry() {
129        return HTTPRemoteFileRegistry.getInstance();
130    }
131
132    /**
133     * Copy this remote file to the given file. If the file resides on the current machine, remote file transfer is done
134     * locally. Otherwise, the remote file is transferred over http. If the file is not set to be able to be transferred
135     * multiple times, it is cleaned up after the transfer.
136     *
137     * @param destFile The file to write the remote file to.
138     * @throws ArgumentNotValid on null destFile, or parent to destfile is not a writeable directory, or destfile exists
139     * and cannot be overwritten.
140     * @throws IOFailure on I/O trouble writing remote file to destination.
141     */
142    public void copyTo(File destFile) {
143        ArgumentNotValid.checkNotNull(destFile, "File destFile");
144        destFile = destFile.getAbsoluteFile();
145        if ((!destFile.isFile() || !destFile.canWrite())
146                && (!destFile.getParentFile().isDirectory() || !destFile.getParentFile().canWrite())) {
147            throw new ArgumentNotValid("Destfile '" + destFile + "' does not point to a writable file for "
148                    + "remote file '" + file + "'");
149        }
150        if (isLocal() && fileDeletable && !multipleDownloads && !useChecksums) {
151            if (file.renameTo(destFile)) {
152                cleanup();
153                return;
154            }
155            // if rename fails we fall back to normal usage.
156        }
157        super.copyTo(destFile);
158    }
159
160    /**
161     * Get an input stream representing the remote file. If the file resides on the current machine, the input stream is
162     * to the local file. Otherwise, the remote file is transferred over http. The close method of the input stream will
163     * cleanup this handle, and if checksums are requested, will check the checksums on close. If the file is not set to
164     * be able to be transferred multiple times, it is cleaned up after the transfer.
165     *
166     * @return An input stream for the remote file.
167     * @throws IOFailure on I/O trouble generating inputstream for remote file. Also, the returned remote file will
168     * throw IOFailure on close, if checksums are requested, but do not match.
169     */
170    public InputStream getInputStream() {
171        if (filesize == 0) {
172            return new ByteArrayInputStream(new byte[] {});
173        }
174        try {
175            InputStream is = null;
176            if (isLocal()) {
177                is = new FileInputStream(file);
178            } else {
179                URLConnection urlConnection = getRegistry().openConnection(url);
180                // ensure not getting some cached version
181                urlConnection.setUseCaches(false);
182                is = urlConnection.getInputStream();
183            }
184            if (useChecksums) {
185                is = new DigestInputStream(is, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
186            }
187            return new FilterInputStream(is) {
188                public void close() {
189                    if (useChecksums) {
190                        String newChecksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest()
191                                .digest());
192                        if (!newChecksum.equals(checksum)) {
193                            throw new IOFailure("Checksum mismatch! Expected '" + checksum + "' but was '"
194                                    + newChecksum + "'");
195                        }
196                    }
197                    if (!multipleDownloads) {
198                        cleanup();
199                    }
200                }
201            };
202        } catch (IOException e) {
203            throw new IOFailure("Unable to get inputstream for '" + file + "' from '" + url + "'", e);
204        }
205    }
206
207    /**
208     * Invalidate all file handles, by asking the remote registry to remove the url for this remote file from the list
209     * of shared files. Invalidating a file handle may delete the original files, if deletable. This method does not
210     * throw exceptions, but will warn on errors.
211     */
212    public void cleanup() {
213        if (filesize == 0) {
214            return;
215        }
216        try {
217            URLConnection urlConnection = getRegistry().openConnection(getRegistry().getCleanupUrl(url));
218            urlConnection.setUseCaches(false);
219            urlConnection.connect();
220            urlConnection.getInputStream();
221        } catch (IOException e) {
222            log.warn("Unable to cleanup file '{}' with URL'{}'", file.getAbsolutePath(), url, e);
223        }
224    }
225
226    /**
227     * Get checksum for file, or null if checksums were not requested.
228     *
229     * @return checksum for file, or null if checksums were not requested.
230     */
231    public String getChecksum() {
232        return checksum;
233    }
234
235    /**
236     * Helper method to determine if file resides on local machine.
237     *
238     * @return true if the file is on the local machine, false otherwise.
239     */
240    protected boolean isLocal() {
241        return SystemUtils.getLocalHostName().equals(hostname) && file.isFile() && file.canRead();
242    }
243
244    /**
245     * Retrieval of the number of retries for retrieving a file from a HTTP server. TODO define a setting for HTTP
246     * retries, just like for the FTP retries.
247     *
248     * @return The number of retries. Currently a constant: 1.
249     */
250    @Override
251    public int getNumberOfRetries() {
252        // TODO make settings for this.
253        return 1;
254    }
255
256}