001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.viewerproxy;
025
026import java.io.File;
027import java.io.IOException;
028import java.io.OutputStream;
029import java.io.Serializable;
030import java.util.Arrays;
031
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import dk.netarkivet.common.CommonSettings;
036import dk.netarkivet.common.Constants;
037import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
038import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
039import dk.netarkivet.common.distribute.arcrepository.Replica;
040import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.utils.FileUtils;
044import dk.netarkivet.common.utils.Settings;
045import dk.netarkivet.common.utils.batch.FileBatchJob;
046import dk.netarkivet.harvester.HarvesterSettings;
047
048
049/**
050 * Wrapper for an URIResolver, which retrieves raw data on given specific URLs, and forwards all others to the wrapped
051 * handler. This allows you to get metadata, individual files, and individual records.
052 */
053@SuppressWarnings({"serial", "unused"})
054public class GetDataResolver extends CommandResolver {
055    /** Logger for this class. */
056         private static final Logger log = LoggerFactory.getLogger(GetDataResolver.class);
057
058    /** The client for the arc repository. */
059    ViewerArcRepositoryClient client;
060
061    /** Command for getting a single file from the bitarchive. */
062    public static final String GET_FILE_COMMAND = "/getFile";
063    /**
064     * Command for getting a specific record (file+offset) from an (W)ARC file in the bitarchive.
065     */
066    public static final String GET_RECORD_COMMAND = "/getRecord";
067    /** Command for getting all metadata for a single job. */
068    public static final String GET_METADATA_COMMAND = "/getMetadata";
069
070    /** Parameter defining the file to return the getting files or records. */
071    public static final String FILE_NAME_PARAMETER = "arcFile";
072    /** Parameter defining the offset into an ARC file for getting a record. */
073    public static final String FILE_OFFSET_PARAMETER = "arcOffset";
074    /** Parameter for ids of jobs to get metadata for. */
075    public static final String JOB_ID_PARAMETER = "jobID";
076
077    /** HTTP response code for OK. */
078    private static final int OK_RESPONSE_CODE = 200;
079
080    /** HTTP response code for failed. */
081    private static final int FAILED_RESPONSE_CODE = 500;
082
083    /**
084     * Make a new GetDataResolver, which calls commands on the arcrepository, and forwards all other requests to the
085     * given URIResolver.
086     *
087     * @param ur The URIResolver to handle all other uris.
088     * @param client the arcrepository client
089     * @throws ArgumentNotValid if either argument is null.
090     */
091    public GetDataResolver(URIResolver ur, ViewerArcRepositoryClient client) {
092        super(ur);
093        ArgumentNotValid.checkNotNull(client, "ArcRepositoryClient client");
094        this.client = client;
095    }
096
097    /**
098     * Handles parsing of the URL and delegating to relevant methods for known commands. Commands are: getFile - params:
099     * fileName - effect: get the full file specified by the parameter from the bitarchive. getRecord - params:
100     * fileName,offset - effect: get a single ARC record from the bitarchive. getMetadata - params: jobID - effect: get
101     * all metadata for a single job from the bitarchive.
102     *
103     * @param request The request to check
104     * @param response The response to give command results to if it is a command
105     * @return Whether this was a command URL
106     * @throws IOFailure in any trouble.
107     */
108    protected boolean executeCommand(Request request, Response response) {
109        // If the url is for this host (potential command)
110        if (isCommandHostRequest(request)) {
111            log.debug("Executing command " + request.getURI());
112            // get path
113            String path = request.getURI().getPath();
114            if (path.equals(GetDataResolver.GET_FILE_COMMAND)) {
115                doGetFile(request, response);
116                return true;
117            }
118            if (path.equals(GetDataResolver.GET_RECORD_COMMAND)) {
119                doGetRecord(request, response);
120                return true;
121            }
122            if (path.equals(GetDataResolver.GET_METADATA_COMMAND)) {
123                doGetMetadata(request, response);
124                return true;
125            }
126        }
127        return false;
128    }
129
130    /**
131     * Get all metadata for a given job id, and write it to response. Multiple metadata files will be concatenated.
132     *
133     * @param request A get metadata request; a parameter jobID is expected to be set.
134     * @param response Metadata will be written to this response.
135     * @throws IOFailure in case of missing or bad parameters.
136     */
137    private void doGetMetadata(Request request, Response response) {
138        String idString = getParameterOrThrowException(request, JOB_ID_PARAMETER);
139        try {
140                Long id = Long.parseLong(idString);
141                FileBatchJob job = new GetFileBatchJob();
142                job.processOnlyFilesMatching(".*" + id + ".*" + Settings.get(CommonSettings.METADATAFILE_REGEX_SUFFIX));
143                BatchStatus b = client.batch(job, Settings.get(CommonSettings.USE_REPLICA_ID));
144                if (b.getNoOfFilesProcessed() > b.getFilesFailed().size() && b.hasResultFile()) {
145                        b.appendResults(response.getOutputStream());
146                        response.setStatus(OK_RESPONSE_CODE);
147                } else {
148                        if (b.getNoOfFilesProcessed() > 0) {
149                                throw new IOFailure("Error finding metadata for job " + id + ": Processed "
150                                                + b.getNoOfFilesProcessed() + ", failed on files " + b.getFilesFailed());
151                        } else {
152                                throw new IOFailure("No metadata found for job " + id + " or error while fetching metadata");
153                        }
154                }
155        } catch (NumberFormatException e) {
156                String errMsg = "The value '" + idString + "' of Parameter jobID is not a parsable job id";
157                log.warn(errMsg, e);
158                throw new IOFailure(errMsg, e);
159        }
160    }
161
162    /**
163     * Get a record from an ARC file, and write it to response. If the record has size greater than
164     * settings.viewerproxy.maxSizeInBrowser then a header is added to turn the response into a file-download.
165     *
166     * @param request A getRecord request; parameters arcFile and arcOffset are expected to be set.
167     * @param response Metadata will be written to this response.
168     * @throws IOFailure in case of missing or bad parameters.
169     */
170    private void doGetRecord(Request request, Response response) {
171        String fileName = getParameterOrThrowException(request, FILE_NAME_PARAMETER);
172        String offsetString = getParameterOrThrowException(request, FILE_OFFSET_PARAMETER);
173        try {
174                Long offset = Long.parseLong(offsetString);
175                BitarchiveRecord record = client.get(fileName, offset);
176                if (record == null) {
177                        throw new IOFailure("Null record returned by " + "ViewerArcRepositoryClient.get(" + fileName + ","
178                                        + offset + "),");
179                }
180                long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER);
181                // TODO: what happens if the record already has these headers defined?
182                if (record.getLength() > maxSize) {
183                        response.addHeaderField("Content-Disposition", "Attachment; filename=record.txt");
184                        response.addHeaderField("Content-Type", "application/octet-stream");
185                }
186                record.getData(response.getOutputStream());
187                response.setStatus(OK_RESPONSE_CODE);
188        } catch (NumberFormatException e) {
189                String errMsg = "Unable to parse offsetstring '" + offsetString + "' as long";
190                log.warn(errMsg, e);
191                throw new IOFailure(errMsg, e);
192        }
193    }
194
195    /**
196     * Get a file from bitarchive, and write it to response.
197     *
198     * @param request A getFile request; parameter arcFile is expected to be set.
199     * @param response File will be written to this response.
200     * @throws IOFailure in any trouble.
201     */
202    private void doGetFile(Request request, Response response) {
203        String fileName = getParameterOrThrowException(request, FILE_NAME_PARAMETER);
204        long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER);
205        try {
206                File tempFile = null;
207                try {
208                        tempFile = File.createTempFile(fileName, "download", FileUtils.getTempDir());
209                        client.getFile(fileName, Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)),
210                                        tempFile);
211                        long size = tempFile.length();
212                response.addHeaderField("Content-Disposition", "Attachment; filename=" + fileName);
213                        if (size > maxSize) {
214                                log.info("Requested file {} of size {} is larger than maximum object in browser. Forcing browser to save file to disk");
215                                response.addHeaderField("Content-Type", "application/octet-stream");
216                        }
217                        response.setStatus(OK_RESPONSE_CODE);
218                response.getOutputStream().flush();
219                FileUtils.writeFileToStream(tempFile, response.getOutputStream());
220                } finally {
221                        if (tempFile != null) {
222                                FileUtils.remove(tempFile);
223                        }
224                }
225        } catch (IOException e) {
226                String errMsg = "Failure to getFile '" + fileName + "': ";
227                log.warn(errMsg, e);
228                throw new IOFailure(errMsg, e);
229        }
230    }
231
232    /**
233     * Get a single parameter out of a parameter-map, checking for errors, including empty string parameter value.
234     *
235     * @param request The request with the parameters
236     * @param name The name of the parameter
237     * @return The single value found trimmed
238     * @throws IOFailure if an error was encountered.
239     */
240    private String getParameterOrThrowException(Request request, String name) {
241        String[] values = request.getParameterMap().get(name);
242        if (values == null || values.length == 0) {
243            throw new IOFailure("Missing parameter '" + name + "'");
244        }
245        if (values.length > 1) {
246            throw new IOFailure("Multiple parameters for '" + name + "': " + Arrays.asList(values));
247        }
248        // Check that trimmed value is not empty string
249        String returnValue = values[0].trim();
250        if (returnValue.isEmpty()) {
251                throw new IOFailure("Trimmed value of parameter '" + name + "' is empty string!");
252        }
253        return returnValue;
254    }
255
256    /**
257     * The trivial batch job: simply concatenate batched files to output.
258     */
259    private static class GetFileBatchJob extends FileBatchJob implements Serializable {
260
261        public GetFileBatchJob() {
262            batchJobTimeout = 10 * Constants.ONE_MIN_IN_MILLIES;
263        }
264
265        /** Does nothing. */
266        public void initialize(OutputStream os) {
267        }
268
269        /**
270         * Simply write file to output.
271         *
272         * @param file File to write to output.
273         * @param os Outputstream to write to.
274         * @return true always
275         */
276        public boolean processFile(File file, OutputStream os) {
277            FileUtils.writeFileToStream(file, os);
278            return true;
279        }
280
281        /** does nothing. */
282        public void finish(OutputStream os) {
283        }
284    }
285}