001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.viewerproxy; 025 026import java.io.File; 027import java.io.IOException; 028import java.io.OutputStream; 029import java.io.Serializable; 030import java.util.Arrays; 031 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import dk.netarkivet.common.CommonSettings; 036import dk.netarkivet.common.Constants; 037import dk.netarkivet.common.distribute.arcrepository.BatchStatus; 038import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord; 039import dk.netarkivet.common.distribute.arcrepository.Replica; 040import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient; 041import dk.netarkivet.common.exceptions.ArgumentNotValid; 042import dk.netarkivet.common.exceptions.IOFailure; 043import dk.netarkivet.common.utils.FileUtils; 044import dk.netarkivet.common.utils.Settings; 045import dk.netarkivet.common.utils.batch.FileBatchJob; 046import dk.netarkivet.harvester.HarvesterSettings; 047 048 049/** 050 * Wrapper for an URIResolver, which retrieves raw data on given specific URLs, and forwards all others to the wrapped 051 * handler. This allows you to get metadata, individual files, and individual records. 052 */ 053@SuppressWarnings({"serial", "unused"}) 054public class GetDataResolver extends CommandResolver { 055 /** Logger for this class. */ 056 private static final Logger log = LoggerFactory.getLogger(GetDataResolver.class); 057 058 /** The client for the arc repository. */ 059 ViewerArcRepositoryClient client; 060 061 /** Command for getting a single file from the bitarchive. */ 062 public static final String GET_FILE_COMMAND = "/getFile"; 063 /** 064 * Command for getting a specific record (file+offset) from an (W)ARC file in the bitarchive. 065 */ 066 public static final String GET_RECORD_COMMAND = "/getRecord"; 067 /** Command for getting all metadata for a single job. */ 068 public static final String GET_METADATA_COMMAND = "/getMetadata"; 069 070 /** Parameter defining the file to return the getting files or records. */ 071 public static final String FILE_NAME_PARAMETER = "arcFile"; 072 /** Parameter defining the offset into an ARC file for getting a record. */ 073 public static final String FILE_OFFSET_PARAMETER = "arcOffset"; 074 /** Parameter for ids of jobs to get metadata for. */ 075 public static final String JOB_ID_PARAMETER = "jobID"; 076 077 /** HTTP response code for OK. */ 078 private static final int OK_RESPONSE_CODE = 200; 079 080 /** HTTP response code for failed. */ 081 private static final int FAILED_RESPONSE_CODE = 500; 082 083 /** 084 * Make a new GetDataResolver, which calls commands on the arcrepository, and forwards all other requests to the 085 * given URIResolver. 086 * 087 * @param ur The URIResolver to handle all other uris. 088 * @param client the arcrepository client 089 * @throws ArgumentNotValid if either argument is null. 090 */ 091 public GetDataResolver(URIResolver ur, ViewerArcRepositoryClient client) { 092 super(ur); 093 ArgumentNotValid.checkNotNull(client, "ArcRepositoryClient client"); 094 this.client = client; 095 } 096 097 /** 098 * Handles parsing of the URL and delegating to relevant methods for known commands. Commands are: getFile - params: 099 * fileName - effect: get the full file specified by the parameter from the bitarchive. getRecord - params: 100 * fileName,offset - effect: get a single ARC record from the bitarchive. getMetadata - params: jobID - effect: get 101 * all metadata for a single job from the bitarchive. 102 * 103 * @param request The request to check 104 * @param response The response to give command results to if it is a command 105 * @return Whether this was a command URL 106 * @throws IOFailure in any trouble. 107 */ 108 protected boolean executeCommand(Request request, Response response) { 109 // If the url is for this host (potential command) 110 if (isCommandHostRequest(request)) { 111 log.debug("Executing command " + request.getURI()); 112 // get path 113 String path = request.getURI().getPath(); 114 if (path.equals(GetDataResolver.GET_FILE_COMMAND)) { 115 doGetFile(request, response); 116 return true; 117 } 118 if (path.equals(GetDataResolver.GET_RECORD_COMMAND)) { 119 doGetRecord(request, response); 120 return true; 121 } 122 if (path.equals(GetDataResolver.GET_METADATA_COMMAND)) { 123 doGetMetadata(request, response); 124 return true; 125 } 126 } 127 return false; 128 } 129 130 /** 131 * Get all metadata for a given job id, and write it to response. Multiple metadata files will be concatenated. 132 * 133 * @param request A get metadata request; a parameter jobID is expected to be set. 134 * @param response Metadata will be written to this response. 135 * @throws IOFailure in case of missing or bad parameters. 136 */ 137 private void doGetMetadata(Request request, Response response) { 138 String idString = getParameterOrThrowException(request, JOB_ID_PARAMETER); 139 try { 140 Long id = Long.parseLong(idString); 141 FileBatchJob job = new GetFileBatchJob(); 142 job.processOnlyFilesMatching(".*" + id + ".*" + Settings.get(CommonSettings.METADATAFILE_REGEX_SUFFIX)); 143 BatchStatus b = client.batch(job, Settings.get(CommonSettings.USE_REPLICA_ID)); 144 if (b.getNoOfFilesProcessed() > b.getFilesFailed().size() && b.hasResultFile()) { 145 b.appendResults(response.getOutputStream()); 146 response.setStatus(OK_RESPONSE_CODE); 147 } else { 148 if (b.getNoOfFilesProcessed() > 0) { 149 throw new IOFailure("Error finding metadata for job " + id + ": Processed " 150 + b.getNoOfFilesProcessed() + ", failed on files " + b.getFilesFailed()); 151 } else { 152 throw new IOFailure("No metadata found for job " + id + " or error while fetching metadata"); 153 } 154 } 155 } catch (NumberFormatException e) { 156 String errMsg = "The value '" + idString + "' of Parameter jobID is not a parsable job id"; 157 log.warn(errMsg, e); 158 throw new IOFailure(errMsg, e); 159 } 160 } 161 162 /** 163 * Get a record from an ARC file, and write it to response. If the record has size greater than 164 * settings.viewerproxy.maxSizeInBrowser then a header is added to turn the response into a file-download. 165 * 166 * @param request A getRecord request; parameters arcFile and arcOffset are expected to be set. 167 * @param response Metadata will be written to this response. 168 * @throws IOFailure in case of missing or bad parameters. 169 */ 170 private void doGetRecord(Request request, Response response) { 171 String fileName = getParameterOrThrowException(request, FILE_NAME_PARAMETER); 172 String offsetString = getParameterOrThrowException(request, FILE_OFFSET_PARAMETER); 173 try { 174 Long offset = Long.parseLong(offsetString); 175 BitarchiveRecord record = client.get(fileName, offset); 176 if (record == null) { 177 throw new IOFailure("Null record returned by " + "ViewerArcRepositoryClient.get(" + fileName + "," 178 + offset + "),"); 179 } 180 long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER); 181 // TODO: what happens if the record already has these headers defined? 182 if (record.getLength() > maxSize) { 183 response.addHeaderField("Content-Disposition", "Attachment; filename=record.txt"); 184 response.addHeaderField("Content-Type", "application/octet-stream"); 185 } 186 record.getData(response.getOutputStream()); 187 response.setStatus(OK_RESPONSE_CODE); 188 } catch (NumberFormatException e) { 189 String errMsg = "Unable to parse offsetstring '" + offsetString + "' as long"; 190 log.warn(errMsg, e); 191 throw new IOFailure(errMsg, e); 192 } 193 } 194 195 /** 196 * Get a file from bitarchive, and write it to response. 197 * 198 * @param request A getFile request; parameter arcFile is expected to be set. 199 * @param response File will be written to this response. 200 * @throws IOFailure in any trouble. 201 */ 202 private void doGetFile(Request request, Response response) { 203 String fileName = getParameterOrThrowException(request, FILE_NAME_PARAMETER); 204 long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER); 205 try { 206 File tempFile = null; 207 try { 208 tempFile = File.createTempFile(fileName, "download", FileUtils.getTempDir()); 209 client.getFile(fileName, Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)), 210 tempFile); 211 long size = tempFile.length(); 212 response.addHeaderField("Content-Disposition", "Attachment; filename=" + fileName); 213 if (size > maxSize) { 214 log.info("Requested file {} of size {} is larger than maximum object in browser. Forcing browser to save file to disk"); 215 response.addHeaderField("Content-Type", "application/octet-stream"); 216 } 217 response.setStatus(OK_RESPONSE_CODE); 218 response.getOutputStream().flush(); 219 FileUtils.writeFileToStream(tempFile, response.getOutputStream()); 220 } finally { 221 if (tempFile != null) { 222 FileUtils.remove(tempFile); 223 } 224 } 225 } catch (IOException e) { 226 String errMsg = "Failure to getFile '" + fileName + "': "; 227 log.warn(errMsg, e); 228 throw new IOFailure(errMsg, e); 229 } 230 } 231 232 /** 233 * Get a single parameter out of a parameter-map, checking for errors, including empty string parameter value. 234 * 235 * @param request The request with the parameters 236 * @param name The name of the parameter 237 * @return The single value found trimmed 238 * @throws IOFailure if an error was encountered. 239 */ 240 private String getParameterOrThrowException(Request request, String name) { 241 String[] values = request.getParameterMap().get(name); 242 if (values == null || values.length == 0) { 243 throw new IOFailure("Missing parameter '" + name + "'"); 244 } 245 if (values.length > 1) { 246 throw new IOFailure("Multiple parameters for '" + name + "': " + Arrays.asList(values)); 247 } 248 // Check that trimmed value is not empty string 249 String returnValue = values[0].trim(); 250 if (returnValue.isEmpty()) { 251 throw new IOFailure("Trimmed value of parameter '" + name + "' is empty string!"); 252 } 253 return returnValue; 254 } 255 256 /** 257 * The trivial batch job: simply concatenate batched files to output. 258 */ 259 private static class GetFileBatchJob extends FileBatchJob implements Serializable { 260 261 public GetFileBatchJob() { 262 batchJobTimeout = 10 * Constants.ONE_MIN_IN_MILLIES; 263 } 264 265 /** Does nothing. */ 266 public void initialize(OutputStream os) { 267 } 268 269 /** 270 * Simply write file to output. 271 * 272 * @param file File to write to output. 273 * @param os Outputstream to write to. 274 * @return true always 275 */ 276 public boolean processFile(File file, OutputStream os) { 277 FileUtils.writeFileToStream(file, os); 278 return true; 279 } 280 281 /** does nothing. */ 282 public void finish(OutputStream os) { 283 } 284 } 285}