001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.viewerproxy;
025
026import java.io.File;
027import java.io.IOException;
028import java.io.OutputStream;
029import java.io.Serializable;
030import java.util.Arrays;
031
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import dk.netarkivet.common.CommonSettings;
036import dk.netarkivet.common.Constants;
037import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
038import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
039import dk.netarkivet.common.distribute.arcrepository.Replica;
040import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.utils.FileUtils;
044import dk.netarkivet.common.utils.Settings;
045import dk.netarkivet.common.utils.batch.FileBatchJob;
046import dk.netarkivet.harvester.HarvesterSettings;
047
048
049/**
050 * Wrapper for an URIResolver, which retrieves raw data on given specific URLs, and forwards all others to the wrapped
051 * handler. This allows you to get metadata, individual files, and individual records.
052 */
053@SuppressWarnings({"serial", "unused"})
054public class GetDataResolver extends CommandResolver {
055    /** Logger for this class. */
056         private static final Logger log = LoggerFactory.getLogger(GetDataResolver.class);
057
058    /** The client for the arc repository. */
059    ViewerArcRepositoryClient client;
060
061    /** Command for getting a single file from the bitarchive. */
062    public static final String GET_FILE_COMMAND = "/getFile";
063    /**
064     * Command for getting a specific record (file+offset) from an ARC file in the bitarchive.
065     */
066    public static final String GET_RECORD_COMMAND = "/getRecord";
067    /** Command for getting all metadata for a single job. */
068    public static final String GET_METADATA_COMMAND = "/getMetadata";
069
070    /** Parameter defining the file to return the getting files or records. */
071    public static final String FILE_NAME_PARAMETER = "arcFile";
072    /** Parameter defining the offset into an ARC file for getting a record. */
073    public static final String FILE_OFFSET_PARAMETER = "arcOffset";
074    /** Parameter for ids of jobs to get metadata for. */
075    public static final String JOB_ID_PARAMETER = "jobID";
076
077    /** HTTP response code for OK. */
078    private static final int OK_RESPONSE_CODE = 200;
079
080    /** HTTP response code for failed. */
081    private static final int FAILED_RESPONSE_CODE = 500;
082
083    /**
084     * Make a new GetDataResolver, which calls commands on the arcrepository, and forwards all other requests to the
085     * given URIResolver.
086     *
087     * @param ur The URIResolver to handle all other uris.
088     * @param client the arcrepository client
089     * @throws ArgumentNotValid if either argument is null.
090     */
091    public GetDataResolver(URIResolver ur, ViewerArcRepositoryClient client) {
092        super(ur);
093        ArgumentNotValid.checkNotNull(client, "ArcRepositoryClient client");
094        this.client = client;
095    }
096
097    /**
098     * Handles parsing of the URL and delegating to relevant methods for known commands. Commands are: getFile - params:
099     * fileName - effect: get the full file specified by the parameter from the bitarchive. getRecord - params:
100     * fileName,offset - effect: get a single ARC record from the bitarchive. getMetadata - params: jobID - effect: get
101     * all metadata for a single job from the bitarchive.
102     *
103     * @param request The request to check
104     * @param response The response to give command results to if it is a command
105     * @return Whether this was a command URL
106     * @throws IOFailure in any trouble.
107     */
108    protected boolean executeCommand(Request request, Response response) {
109        // If the url is for this host (potential command)
110        if (isCommandHostRequest(request)) {
111            log.debug("Executing command " + request.getURI());
112            // get path
113            String path = request.getURI().getPath();
114            if (path.equals(GetDataResolver.GET_FILE_COMMAND)) {
115                doGetFile(request, response);
116                return true;
117            }
118            if (path.equals(GetDataResolver.GET_RECORD_COMMAND)) {
119                doGetRecord(request, response);
120                return true;
121            }
122            if (path.equals(GetDataResolver.GET_METADATA_COMMAND)) {
123                doGetMetadata(request, response);
124                return true;
125            }
126        }
127        return false;
128    }
129
130    /**
131     * Get all metadata for a given job id, and write it to response. Multiple metadata files will be concatenated.
132     *
133     * @param request A get metadata request; a parameter jobID is expected to be set.
134     * @param response Metadata will be written to this response.
135     * @throws IOFailure in any trouble.
136     */
137    private void doGetMetadata(Request request, Response response) {
138        String idString = getParameter(request, JOB_ID_PARAMETER);
139        // TODO in which case will getParameter return null (if ever)
140        // if yes, handle the case: idString==null
141        if (idString != null) {
142            try {
143                Long id = Long.parseLong(idString);
144                FileBatchJob job = new GetFileBatchJob();
145                job.processOnlyFilesMatching(id + Constants.METADATA_FILE_PATTERN_SUFFIX);
146                BatchStatus b = client.batch(job, Settings.get(CommonSettings.USE_REPLICA_ID));
147                if (b.getNoOfFilesProcessed() > b.getFilesFailed().size() && b.hasResultFile()) {
148                    b.appendResults(response.getOutputStream());
149                    response.setStatus(OK_RESPONSE_CODE);
150                } else {
151                    if (b.getNoOfFilesProcessed() > 0) {
152                        throw new IOFailure("Error finding metadata for job " + id + ": Processed "
153                                + b.getNoOfFilesProcessed() + ", failed on files " + b.getFilesFailed());
154                    } else {
155                        throw new IOFailure("No metadata found for job " + id + " or error while fetching metadata");
156                    }
157                }
158            } catch (NumberFormatException e) {
159                String errMsg = "The value '" + idString + "' of Parameter jobID is not a parsable job id";
160                log.warn(errMsg, e);
161                throw new IOFailure(errMsg, e);
162            }
163        }
164    }
165
166    /**
167     * Get a record from an ARC file, and write it to response. If the record has size greater than
168     * settings.viewerproxy.maxSizeInBrowser then a header is added to turn the response into a file-download.
169     *
170     * @param request A get metadata request; parameters arcFile and arcOffset are expected to be set.
171     * @param response Metadata will be written to this response.
172     * @throws IOFailure in any trouble.
173     */
174    private void doGetRecord(Request request, Response response) {
175        String fileName = getParameter(request, FILE_NAME_PARAMETER);
176        String offsetString = getParameter(request, FILE_OFFSET_PARAMETER);
177        // TODO in which case will getParameter return null if ever?
178        // If yes, handle the else case
179        if (fileName != null && offsetString != null) {
180            try {
181                Long offset = Long.parseLong(offsetString);
182                BitarchiveRecord record = client.get(fileName, offset);
183                if (record == null) {
184                    throw new IOFailure("Null record returned by " + "ViewerArcRepositoryClient.get(" + fileName + ","
185                            + offset + "),");
186                }
187                long maxSize = Settings.getLong(HarvesterSettings.MAXIMUM_OBJECT_IN_BROWSER);
188                // TODO: what happens if the record already has these headers defined?
189                if (record.getLength() > maxSize) {
190                    response.addHeaderField("Content-Disposition", "Attachment; filename=record.txt");
191                    response.addHeaderField("Content-Type", "application/octet-stream");
192                }
193                record.getData(response.getOutputStream());
194                response.setStatus(OK_RESPONSE_CODE);
195            } catch (NumberFormatException e) {
196                String errMsg = "Unable to parse offsetstring '" + offsetString + "' as long";
197                log.warn(errMsg, e);
198                throw new IOFailure(errMsg, e);
199            }
200        }
201    }
202
203    /**
204     * Get a file from bitarchive, and write it to response.
205     *
206     * @param request A get metadata request; parameter arcFile is expected to be set.
207     * @param response File will be written to this response.
208     * @throws IOFailure in any trouble.
209     */
210    private void doGetFile(Request request, Response response) {
211        String fileName = getParameter(request, FILE_NAME_PARAMETER);
212        // TODO in which case will getParameter return null?
213        if (fileName != null) {
214            try {
215                File tempFile = null;
216                try {
217                    tempFile = File.createTempFile("getFile", "download", FileUtils.getTempDir());
218                    client.getFile(fileName, Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)),
219                            tempFile);
220                    FileUtils.writeFileToStream(tempFile, response.getOutputStream());
221                    response.setStatus(OK_RESPONSE_CODE);
222                } finally {
223                    if (tempFile != null) {
224                        FileUtils.remove(tempFile);
225                    }
226                }
227            } catch (IOException e) {
228                String errMsg = "Failure to getFile '" + fileName + "': ";
229                log.warn(errMsg, e);
230                throw new IOFailure(errMsg, e);
231            }
232        }
233    }
234
235    /**
236     * Get a single parameter out of a parametermap, checking for errors.
237     *
238     * @param request The request with the parameters
239     * @param name The name of the parameter
240     * @return The single value found
241     * @throws IOFailure if an error was encountered.
242     */
243    private String getParameter(Request request, String name) {
244        String[] values = request.getParameterMap().get(name);
245        if (values == null || values.length == 0) {
246            throw new IOFailure("Missing parameter '" + name + "'");
247        }
248        if (values.length > 1) {
249            throw new IOFailure("Multiple parameters for '" + name + "': " + Arrays.asList(values));
250        }
251        return values[0];
252    }
253
254    /**
255     * The trivial batch job: simply concatenate batched files to output.
256     */
257    private static class GetFileBatchJob extends FileBatchJob implements Serializable {
258
259        public GetFileBatchJob() {
260            batchJobTimeout = 10 * Constants.ONE_MIN_IN_MILLIES;
261        }
262
263        /** Does nothing. */
264        public void initialize(OutputStream os) {
265        }
266
267        /**
268         * Simply write file to output.
269         *
270         * @param file File to write to output.
271         * @param os Outputstream to write to.
272         * @return true.
273         */
274        public boolean processFile(File file, OutputStream os) {
275            FileUtils.writeFileToStream(file, os);
276            return true;
277        }
278
279        /** does nothing. */
280        public void finish(OutputStream os) {
281        }
282    }
283}