001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.distribute; 024 025import java.io.ByteArrayInputStream; 026import java.io.File; 027import java.io.FileInputStream; 028import java.io.FilterInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.net.URL; 032import java.net.URLConnection; 033import java.security.DigestInputStream; 034 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import dk.netarkivet.common.exceptions.ArgumentNotValid; 039import dk.netarkivet.common.exceptions.IOFailure; 040import dk.netarkivet.common.utils.ChecksumCalculator; 041import dk.netarkivet.common.utils.Settings; 042import dk.netarkivet.common.utils.SystemUtils; 043 044/** 045 * A remote file implemented with point-to-point HTTP communication. Optimised to communicate locally, if file is on the 046 * same host. Optimised to transfer 0 byte files inline. 047 */ 048@SuppressWarnings({"serial"}) 049public class HTTPRemoteFile extends AbstractRemoteFile { 050 051 /** The logger for this class. */ 052 private static final Logger log = LoggerFactory.getLogger(HTTPRemoteFile.class); 053 054 /** The default place in classpath where the settings file can be found. */ 055 private static String DEFAULT_SETTINGS_CLASSPATH = "dk/netarkivet/common/distribute/HTTPRemoteFileSettings.xml"; 056 057 /* 058 * The static initialiser is called when the class is loaded. It will add default values for all settings defined in 059 * this class, by loading them from a settings.xml file in classpath. 060 */ 061 static { 062 Settings.addDefaultClasspathSettings(DEFAULT_SETTINGS_CLASSPATH); 063 } 064 065 /** The name of the host this file originated on. */ 066 protected final String hostname; 067 /** The url that exposes this remote file. */ 068 protected final URL url; 069 /** If useChecksums is true, contains the file checksum. */ 070 protected final String checksum; 071 072 // NOTE: The constants defining setting names below are left non-final on 073 // purpose! Otherwise, the static initialiser that loads default values 074 // will not run. 075 076 /** 077 * <b>settings.common.remoteFile.port</b>: <br> 078 * The setting for the HTTP remotefile port number used. 079 */ 080 public static String HTTPREMOTEFILE_PORT_NUMBER = "settings.common.remoteFile.port"; 081 082 /** 083 * Initialises a remote file implemented by point-to-point HTTP communication. 084 * 085 * @param file The file to make a remote file for 086 * @param useChecksums Whether communications are checksummed. If true, getChecksum will also return the checksum. 087 * @param fileDeletable if true, the file given to this method is deletable, once it is transferred. 088 * @param multipleDownloads if true, the file may be transferred more than once. Otherwise, all file handles are 089 * attempted to be made invalid after the first transfer, although no guarantees are made. 090 * @throws ArgumentNotValid if file is null, or not a readable file. 091 * @throws IOFailure if checksums are requested, but i/o errors occur while checksumming. 092 */ 093 protected HTTPRemoteFile(File file, boolean useChecksums, boolean fileDeletable, boolean multipleDownloads) { 094 super(file, useChecksums, fileDeletable, multipleDownloads); 095 this.hostname = SystemUtils.getLocalHostName(); 096 if (filesize > 0) { 097 this.url = getRegistry().registerFile(this.file, this.fileDeletable); 098 } else { 099 this.url = null; 100 } 101 if (useChecksums) { 102 this.checksum = ChecksumCalculator.calculateMd5(file); 103 } else { 104 this.checksum = null; 105 } 106 } 107 108 /** 109 * Initialises a remote file implemented by point-to-point HTTP communication. 110 * 111 * @param f The file to make a remote file for 112 * @param useChecksums Whether communications are checksummed. If true, getChecksum will also return the checksum. 113 * @param fileDeletable if true, the file given to this method is deletable, once it is transferred. 114 * @param multipleDownloads if true, the file may be transferred more than once. Otherwise, all file handles are 115 * attempted to be made invalid after the first transfer, although no guarantees are made. 116 * @throws ArgumentNotValid if file is null, or not a readable file. 117 * @throws IOFailure if checksums are requested, but i/o errors occur while checksumming. 118 */ 119 public static RemoteFile getInstance(File f, Boolean useChecksums, Boolean fileDeletable, Boolean multipleDownloads) { 120 return new HTTPRemoteFile(f, useChecksums, fileDeletable, multipleDownloads); 121 } 122 123 /** 124 * Get the webserver registry for this class of files. Meant to be subclassed for specialised versions of this file. 125 * 126 * @return The reigstry. 127 */ 128 protected HTTPRemoteFileRegistry getRegistry() { 129 return HTTPRemoteFileRegistry.getInstance(); 130 } 131 132 /** 133 * Copy this remote file to the given file. If the file resides on the current machine, remote file transfer is done 134 * locally. Otherwise, the remote file is transferred over http. If the file is not set to be able to be transferred 135 * multiple times, it is cleaned up after the transfer. 136 * 137 * @param destFile The file to write the remote file to. 138 * @throws ArgumentNotValid on null destFile, or parent to destfile is not a writeable directory, or destfile exists 139 * and cannot be overwritten. 140 * @throws IOFailure on I/O trouble writing remote file to destination. 141 */ 142 public void copyTo(File destFile) { 143 ArgumentNotValid.checkNotNull(destFile, "File destFile"); 144 destFile = destFile.getAbsoluteFile(); 145 if ((!destFile.isFile() || !destFile.canWrite()) 146 && (!destFile.getParentFile().isDirectory() || !destFile.getParentFile().canWrite())) { 147 throw new ArgumentNotValid("Destfile '" + destFile + "' does not point to a writable file for " 148 + "remote file '" + file + "'"); 149 } 150 if (isLocal() && fileDeletable && !multipleDownloads && !useChecksums) { 151 if (file.renameTo(destFile)) { 152 cleanup(); 153 return; 154 } 155 // if rename fails we fall back to normal usage. 156 } 157 super.copyTo(destFile); 158 } 159 160 /** 161 * Get an input stream representing the remote file. If the file resides on the current machine, the input stream is 162 * to the local file. Otherwise, the remote file is transferred over http. The close method of the input stream will 163 * cleanup this handle, and if checksums are requested, will check the checksums on close. If the file is not set to 164 * be able to be transferred multiple times, it is cleaned up after the transfer. 165 * 166 * @return An input stream for the remote file. 167 * @throws IOFailure on I/O trouble generating inputstream for remote file. Also, the returned remote file will 168 * throw IOFailure on close, if checksums are requested, but do not match. 169 */ 170 public InputStream getInputStream() { 171 if (filesize == 0) { 172 return new ByteArrayInputStream(new byte[] {}); 173 } 174 try { 175 InputStream is = null; 176 if (isLocal()) { 177 is = new FileInputStream(file); 178 } else { 179 URLConnection urlConnection = getRegistry().openConnection(url); 180 // ensure not getting some cached version 181 urlConnection.setUseCaches(false); 182 is = urlConnection.getInputStream(); 183 } 184 if (useChecksums) { 185 is = new DigestInputStream(is, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5)); 186 } 187 return new FilterInputStream(is) { 188 public void close() { 189 if (useChecksums) { 190 String newChecksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest() 191 .digest()); 192 if (!newChecksum.equals(checksum)) { 193 throw new IOFailure("Checksum mismatch! Expected '" + checksum + "' but was '" 194 + newChecksum + "'"); 195 } 196 } 197 if (!multipleDownloads) { 198 cleanup(); 199 } 200 } 201 }; 202 } catch (IOException e) { 203 throw new IOFailure("Unable to get inputstream for '" + file + "' from '" + url + "'", e); 204 } 205 } 206 207 /** 208 * Invalidate all file handles, by asking the remote registry to remove the url for this remote file from the list 209 * of shared files. Invalidating a file handle may delete the original files, if deletable. This method does not 210 * throw exceptions, but will warn on errors. 211 */ 212 public void cleanup() { 213 if (filesize == 0) { 214 return; 215 } 216 try { 217 URLConnection urlConnection = getRegistry().openConnection(getRegistry().getCleanupUrl(url)); 218 urlConnection.setUseCaches(false); 219 urlConnection.connect(); 220 urlConnection.getInputStream(); 221 } catch (IOException e) { 222 log.warn("Unable to cleanup file '{}' with URL'{}'", file.getAbsolutePath(), url, e); 223 } 224 } 225 226 /** 227 * Get checksum for file, or null if checksums were not requested. 228 * 229 * @return checksum for file, or null if checksums were not requested. 230 */ 231 public String getChecksum() { 232 return checksum; 233 } 234 235 /** 236 * Helper method to determine if file resides on local machine. 237 * 238 * @return true if the file is on the local machine, false otherwise. 239 */ 240 protected boolean isLocal() { 241 return SystemUtils.getLocalHostName().equals(hostname) && file.isFile() && file.canRead(); 242 } 243 244 /** 245 * Retrieval of the number of retries for retrieving a file from a HTTP server. TODO define a setting for HTTP 246 * retries, just like for the FTP retries. 247 * 248 * @return The number of retries. Currently a constant: 1. 249 */ 250 @Override 251 public int getNumberOfRetries() { 252 // TODO make settings for this. 253 return 1; 254 } 255 256}