001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.viewerproxy; 025 026import java.io.File; 027import java.io.IOException; 028import java.io.OutputStream; 029import java.io.Serializable; 030import java.util.Arrays; 031 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import dk.netarkivet.common.CommonSettings; 036import dk.netarkivet.common.Constants; 037import dk.netarkivet.common.distribute.arcrepository.BatchStatus; 038import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord; 039import dk.netarkivet.common.distribute.arcrepository.Replica; 040import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient; 041import dk.netarkivet.common.exceptions.ArgumentNotValid; 042import dk.netarkivet.common.exceptions.IOFailure; 043import dk.netarkivet.common.utils.FileUtils; 044import dk.netarkivet.common.utils.Settings; 045import dk.netarkivet.common.utils.batch.FileBatchJob; 046import dk.netarkivet.harvester.HarvesterSettings; 047 048 049/** 050 * Wrapper for an URIResolver, which retrieves raw data on given specific URLs, and forwards all others to the wrapped 051 * handler. This allows you to get metadata, individual files, and individual records. 052 */ 053@SuppressWarnings({"serial", "unused"}) 054public class GetDataResolver extends CommandResolver { 055 /** Logger for this class. */ 056 private static final Logger log = LoggerFactory.getLogger(GetDataResolver.class); 057 058 /** The client for the arc repository. */ 059 ViewerArcRepositoryClient client; 060 061 /** Command for getting a single file from the bitarchive. */ 062 public static final String GET_FILE_COMMAND = "/getFile"; 063 /** 064 * Command for getting a specific record (file+offset) from an ARC file in the bitarchive. 065 */ 066 public static final String GET_RECORD_COMMAND = "/getRecord"; 067 /** Command for getting all metadata for a single job. */ 068 public static final String GET_METADATA_COMMAND = "/getMetadata"; 069 070 /** Parameter defining the file to return the getting files or records. */ 071 public static final String FILE_NAME_PARAMETER = "arcFile"; 072 /** Parameter defining the offset into an ARC file for getting a record. */ 073 public static final String FILE_OFFSET_PARAMETER = "arcOffset"; 074 /** Parameter for ids of jobs to get metadata for. */ 075 public static final String JOB_ID_PARAMETER = "jobID"; 076 077 /** HTTP response code for OK. */ 078 private static final int OK_RESPONSE_CODE = 200; 079 080 /** HTTP response code for failed. */ 081 private static final int FAILED_RESPONSE_CODE = 500; 082 083 /** 084 * Make a new GetDataResolver, which calls commands on the arcrepository, and forwards all other requests to the 085 * given URIResolver. 086 * 087 * @param ur The URIResolver to handle all other uris. 088 * @param client the arcrepository client 089 * @throws ArgumentNotValid if either argument is null. 090 */ 091 public GetDataResolver(URIResolver ur, ViewerArcRepositoryClient client) { 092 super(ur); 093 ArgumentNotValid.checkNotNull(client, "ArcRepositoryClient client"); 094 this.client = client; 095 } 096 097 /** 098 * Handles parsing of the URL and delegating to relevant methods for known commands. Commands are: getFile - params: 099 * fileName - effect: get the full file specified by the parameter from the bitarchive. getRecord - params: 100 * fileName,offset - effect: get a single ARC record from the bitarchive. getMetadata - params: jobID - effect: get 101 * all metadata for a single job from the bitarchive. 102 * 103 * @param request The request to check 104 * @param response The response to give command results to if it is a command 105 * @return Whether this was a command URL 106 * @throws IOFailure in any trouble. 107 */ 108 protected boolean executeCommand(Request request, Response response) { 109 // If the url is for this host (potential command) 110 if (isCommandHostRequest(request)) { 111 log.debug("Executing command " + request.getURI()); 112 // get path 113 String path = request.getURI().getPath(); 114 if (path.equals(GetDataResolver.GET_FILE_COMMAND)) { 115 doGetFile(request, response); 116 return true; 117 } 118 if (path.equals(GetDataResolver.GET_RECORD_COMMAND)) { 119 doGetRecord(request, response); 120 return true; 121 } 122 if (path.equals(GetDataResolver.GET_METADATA_COMMAND)) { 123 doGetMetadata(request, response); 124 return true; 125 } 126 } 127 return false; 128 } 129 130 /** 131 * Get all metadata for a given job id, and write it to response. Multiple metadata files will be concatenated. 132 * 133 * @param request A get metadata request; a parameter jobID is expected to be set. 134 * @param response Metadata will be written to this response. 135 * @throws IOFailure in any trouble. 136 */ 137 private void doGetMetadata(Request request, Response response) { 138 String idString = getParameter(request, JOB_ID_PARAMETER); 139 // TODO in which case will getParameter return null (if ever) 140 // if yes, handle the case: idString==null 141 if (idString != null) { 142 try { 143 Long id = Long.parseLong(idString); 144 FileBatchJob job = new GetFileBatchJob(); 145 job.processOnlyFilesMatching(id + Constants.METADATA_FILE_PATTERN_SUFFIX); 146 BatchStatus b = client.batch(job, Settings.get(CommonSettings.USE_REPLICA_ID)); 147 if (b.getNoOfFilesProcessed() > b.getFilesFailed().size() && b.hasResultFile()) { 148 b.appendResults(response.getOutputStream()); 149 response.setStatus(OK_RESPONSE_CODE); 150 } else { 151 if (b.getNoOfFilesProcessed() > 0) { 152 throw new IOFailure("Error finding metadata for job " + id + ": Processed " 153 + b.getNoOfFilesProcessed() + ", failed on files " + b.getFilesFailed()); 154 } else { 155 throw new IOFailure("No metadata found for job " + id + " or error while fetching metadata"); 156 } 157 } 158 } catch (NumberFormatException e) { 159 String errMsg = "The value '" + idString + "' of Parameter jobID is not a parsable job id"; 160 log.warn(errMsg, e); 161 throw new IOFailure(errMsg, e); 162 } 163 } 164 } 165 166 /** 167 * Get a record from an ARC file, and write it to response. If the record has size greater than 168 * settings.viewerproxy.maxSizeInBrowser then a header is added to turn the response into a file-download. 169 * 170 * @param request A get metadata request; parameters arcFile and arcOffset are expected to be set. 171 * @param response Metadata will be written to this response. 172 * @throws IOFailure in any trouble. 173 */ 174 private void doGetRecord(Request request, Response response) { 175 String fileName = getParameter(request, FILE_NAME_PARAMETER); 176 String offsetString = getParameter(request, FILE_OFFSET_PARAMETER); 177 // TODO in which case will getParameter return null if ever? 178 // If yes, handle the else case 179 if (fileName != null && offsetString != null) { 180 try { 181 Long offset = Long.parseLong(offsetString); 182 BitarchiveRecord record = client.get(fileName, offset); 183 if (record == null) { 184 throw new IOFailure("Null record returned by " + "ViewerArcRepositoryClient.get(" + fileName + "," 185 + offset + "),"); 186 } 187 long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER); 188 // TODO: what happens if the record already has these headers defined? 189 if (record.getLength() > maxSize) { 190 response.addHeaderField("Content-Disposition", "Attachment; filename=record.txt"); 191 response.addHeaderField("Content-Type", "application/octet-stream"); 192 } 193 record.getData(response.getOutputStream()); 194 response.setStatus(OK_RESPONSE_CODE); 195 } catch (NumberFormatException e) { 196 String errMsg = "Unable to parse offsetstring '" + offsetString + "' as long"; 197 log.warn(errMsg, e); 198 throw new IOFailure(errMsg, e); 199 } 200 } 201 } 202 203 /** 204 * Get a file from bitarchive, and write it to response. 205 * 206 * @param request A get metadata request; parameter arcFile is expected to be set. 207 * @param response File will be written to this response. 208 * @throws IOFailure in any trouble. 209 */ 210 private void doGetFile(Request request, Response response) { 211 String fileName = getParameter(request, FILE_NAME_PARAMETER); 212 // TODO in which case will getParameter return null? 213 if (fileName != null) { 214 try { 215 File tempFile = null; 216 try { 217 tempFile = File.createTempFile("getFile", "download", FileUtils.getTempDir()); 218 client.getFile(fileName, Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)), 219 tempFile); 220 FileUtils.writeFileToStream(tempFile, response.getOutputStream()); 221 response.setStatus(OK_RESPONSE_CODE); 222 } finally { 223 if (tempFile != null) { 224 FileUtils.remove(tempFile); 225 } 226 } 227 } catch (IOException e) { 228 String errMsg = "Failure to getFile '" + fileName + "': "; 229 log.warn(errMsg, e); 230 throw new IOFailure(errMsg, e); 231 } 232 } 233 } 234 235 /** 236 * Get a single parameter out of a parametermap, checking for errors. 237 * 238 * @param request The request with the parameters 239 * @param name The name of the parameter 240 * @return The single value found 241 * @throws IOFailure if an error was encountered. 242 */ 243 private String getParameter(Request request, String name) { 244 String[] values = request.getParameterMap().get(name); 245 if (values == null || values.length == 0) { 246 throw new IOFailure("Missing parameter '" + name + "'"); 247 } 248 if (values.length > 1) { 249 throw new IOFailure("Multiple parameters for '" + name + "': " + Arrays.asList(values)); 250 } 251 return values[0]; 252 } 253 254 /** 255 * The trivial batch job: simply concatenate batched files to output. 256 */ 257 private static class GetFileBatchJob extends FileBatchJob implements Serializable { 258 259 public GetFileBatchJob() { 260 batchJobTimeout = 10 * Constants.ONE_MIN_IN_MILLIES; 261 } 262 263 /** Does nothing. */ 264 public void initialize(OutputStream os) { 265 } 266 267 /** 268 * Simply write file to output. 269 * 270 * @param file File to write to output. 271 * @param os Outputstream to write to. 272 * @return true. 273 */ 274 public boolean processFile(File file, OutputStream os) { 275 FileUtils.writeFileToStream(file, os); 276 return true; 277 } 278 279 /** does nothing. */ 280 public void finish(OutputStream os) { 281 } 282 } 283}