001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.viewerproxy;
024
025import java.io.IOException;
026import java.io.OutputStream;
027import java.net.URI;
028import java.net.URISyntaxException;
029import java.util.Map;
030
031import javax.servlet.http.HttpServletRequest;
032import javax.servlet.http.HttpServletResponse;
033
034import org.eclipse.jetty.server.Server;
035import org.eclipse.jetty.server.handler.DefaultHandler;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import dk.netarkivet.common.CommonSettings;
040import dk.netarkivet.common.exceptions.ArgumentNotValid;
041import dk.netarkivet.common.exceptions.IOFailure;
042import dk.netarkivet.common.utils.ExceptionUtils;
043import dk.netarkivet.common.utils.Settings;
044
045/**
046 * The WebProxy is the ONLY viewerproxy class that interfaces with the Jetty classes. This class packages all requests
047 * up nicely as calls to uriResolver.lookup().
048 * <p>
049 * In particular, it handles the control of the Jetty server that the Proxy server builds on.
050 */
051@SuppressWarnings({"deprecation", "unchecked"})
052public class WebProxy extends DefaultHandler implements URIResolverHandler {
053    /** The URI resolver which handles URI lookups. */
054    private URIResolver uriResolver;
055    /** Logger used for reporting. */
056    private static final Logger log = LoggerFactory.getLogger(WebProxy.class);
057
058    /** The actual web server that we're the link to. */
059    private Server jettyServer;
060
061    /** HTTP header. */
062    private static final String CONTENT_TYPE_NAME = "Content-type";
063    /** Content-type header value. */
064    private static final String CONTENT_TYPE_VALUE = "text/html";
065    /** Inserted before error response to browser. */
066    private static final String HTML_HEADER = "<html><head><title>" + "Internal Server Error" + "</title><body>";
067    /** Inserted after error response to browser. */
068    private static final String HTML_FOOTER = "</body></html>";
069    
070    private final int portNo;
071
072    /**
073     * Initialises a new web proxy, which delegates lookups to the given uri resolver. The WebProxy will start listening
074     * on port given in settings.
075     *
076     * @param uriResolver The uriResolver used to handle lookups in the proxy.
077     * @throws IOFailure on trouble starting the proxy server.
078     * @throws ArgumentNotValid on null uriResolver.
079     */
080    public WebProxy(URIResolver uriResolver) {
081        setURIResolver(uriResolver);
082        portNo = Settings.getInt(CommonSettings.HTTP_PORT_NUMBER);
083        jettyServer = new Server(portNo);
084        jettyServer.setHandler(this);
085        log.info("Starting viewerproxy jetty on port {}", portNo);
086        try {
087            jettyServer.start();
088        } catch (Exception e) {
089            throw new IOFailure("Error while starting jetty server on port " + portNo, e);
090        }
091    }
092
093    /**
094     * Sets the current URIResolver.
095     *
096     * @param ur The resolver to handle lookups.
097     * @throws ArgumentNotValid on null uriResolver.
098     */
099    public void setURIResolver(URIResolver ur) {
100        ArgumentNotValid.checkNotNull(ur, "URIResolver ur");
101        this.uriResolver = ur;
102    }
103
104    /**
105     * Handle an HTTP request. Overrides default behaviour of Jetty. This will forward the URI and response to the
106     * wrapped URI resolver. Note that the server will NOT force the return value to be the one returned by the uri
107     * resolver, rather it will use the one the uri resolver has set in the response object.
108     * <p>
109     * Exceptions will generate an internal server error-page with the details.
110     *
111     * @param target URL or name for request. Not used
112     * @param request The original request, including URL
113     * @param response The object that receives the result
114     */
115    @Override
116    public void handle(String target, org.eclipse.jetty.server.Request baseRequest, HttpServletRequest request,
117            HttpServletResponse response) {
118        HttpResponse netarkivetResponse = new HttpResponse(response);
119        HttpRequest netarkivetRequest = new HttpRequest(request);
120        try {
121            // The following is a bad idea because it hides where the
122            // failure actually happens in the code
123            // Generate URI to enforce fail-early of illegal URIs
124            // uri = new URI(request.getRequestURL().toString());
125            uriResolver.lookup(netarkivetRequest, netarkivetResponse);
126            ((org.eclipse.jetty.server.Request) request).setHandled(true);
127        } catch (Exception e) {
128            createErrorResponse(netarkivetRequest.getURI(), netarkivetResponse, e);
129        }
130    }
131
132    /**
133     * Generate an appropriate error response when a URI generates an exception. If this fails, it is logged, but
134     * otherwise ignored.
135     *
136     * @param uri The URI attempted read that could not be found
137     * @param response The Response object to write the error response into.
138     * @param e the exception generated by the URI
139     */
140    private void createErrorResponse(URI uri, Response response, Throwable e) {
141        try {
142            // first write a header telling the browser to expect text/html
143            response.addHeaderField(CONTENT_TYPE_NAME, CONTENT_TYPE_VALUE);
144            response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
145            // Now flush an errorscreen to the browser
146            OutputStream browserOut = response.getOutputStream();
147            browserOut.write((HTML_HEADER + "Internal server error for: " + uri + "\n<pre>"
148                    + ExceptionUtils.getStackTrace(e) + "</pre>" + HTML_FOOTER).getBytes());
149            browserOut.flush();
150            log.warn("Exception for : " + uri, e);
151        } catch (Exception e1) {
152            log.warn(
153                    "Error writing error response to browser " + "for '" + uri + "' with exception "
154                            + ExceptionUtils.getStackTrace(e) + ". Giving up!", e1);
155        }
156        // Do not close stream! That is left to the servlet.
157    }
158
159    /** Shut down this server. */
160    public void kill() {
161        try {
162                log.info("Shutting down viewerproxy jetty listening on port {}", portNo);
163            jettyServer.stop();
164            jettyServer.destroy();
165        } catch (Exception ie) {
166            log.warn("Error shutting down server", ie);
167        }
168    }
169
170    /**
171     * A wrapper around the Jetty HttpResponse, giving the simple Response interface used in our URIResolvers. Also
172     * Collects and remembers status code for a response.
173     */
174    public static class HttpResponse implements Response {
175        /** The Jetty http response object. */
176        private HttpServletResponse hr;
177        /** The HTTP status code. */
178        private int status;
179
180        /**
181         * Constructs a new HttpResponse based on the given Jetty response.
182         *
183         * @param htResp A response object to wrap.
184         */
185        private HttpResponse(HttpServletResponse htResp) {
186            hr = htResp;
187        }
188
189        /**
190         * Getter for the data output stream.
191         *
192         * @return An open output stream.
193         * @throws IOFailure if an outputstream can not be obtained (on invalidated response).
194         */
195        public OutputStream getOutputStream() {
196            try {
197                return hr.getOutputStream();
198            } catch (IOException e) {
199                throw new IOFailure("Outputstream not available", e);
200            }
201        }
202
203        /**
204         * Setter for the status code (e.g. 200, 404)
205         *
206         * @param statusCode An HTTP status code.
207         */
208        public void setStatus(int statusCode) {
209            this.status = statusCode;
210            hr.setStatus(statusCode);
211        }
212
213        /**
214         * Set status code and explanatory text string describing the status.
215         *
216         * @param statusCode should be valid http status ie. 200, 404,
217         * @param reason text string explaining status ie. OK, not found,
218         */
219        public void setStatus(int statusCode, String reason) {
220                log.debug("Calling setStatus with statusCode {} and reason {} using deprecated API", statusCode, reason);
221            this.status = statusCode;
222            // Note: This uses deprecated method.
223            // We still use this, because in the proxying we need to set both
224            // status, reason, and body, and this is the only possible way to do
225            // this
226            hr.setStatus(statusCode, reason);
227        }
228
229        /**
230         * Add an HTTP header to the response.
231         *
232         * @param name Name of the header, e.g. Last-Modified-Date
233         * @param value The value of the header
234         */
235        public void addHeaderField(String name, String value) {
236            hr.addHeader(name, value);
237        }
238
239        /**
240         * Get the HTTP status of this response.
241         *
242         * @return The HTTP status.
243         */
244        public int getStatus() {
245            return status;
246        }
247    }
248
249    /**
250     * A wrapper around the Jetty HttpRequest, giving the simple Request interface used in our URIResolvers. Gives
251     * access to URI and posted parameters.
252     */
253    public static class HttpRequest implements Request {
254        /** The Jetty http response object. */
255        private HttpServletRequest hr;
256
257        /**
258         * Constructs a new HttpRequest based on the given Jetty request.
259         *
260         * @param htReq A request object to wrap.
261         */
262        protected HttpRequest(HttpServletRequest htReq) {
263            hr = htReq;
264        }
265
266        /**
267         * Get the URI from this request. In contrast to javax.servlet.HttpServletResponse this includes the query
268         * string.
269         *
270         * @return The URI from this request.
271         * @throws IOFailure if the URI is invalid. This should never happen.
272         */
273        public URI getURI() {
274            String uriString;
275            if (hr.getQueryString() != null) {
276                uriString = hr.getRequestURL().toString() + "?" + uriEncode(hr.getQueryString());
277            } else {
278                uriString = hr.getRequestURL().toString();
279            }
280            try {
281                return new URI(uriString);
282            } catch (URISyntaxException e) {
283                throw new IOFailure("Could not construct URI from '" + uriString + "'", e);
284            }
285        }
286
287        /**
288         * We here replace what should be standard API functionality with an apparent kludge. We do this because the
289         * constructor java.net.URI(String s) violates its own documentation. It should encode all "other" characters in the
290         * query part of the URI. These "other" characters include curly brackets, but actually the escaping is never
291         * done. Hence we do it here.
292         *
293         * @param s the String to be encoded
294         * @return the encoded String
295         */
296        public static String uriEncode(String s) {
297            return s.replaceAll("\\{", "%7B").replaceAll("\\}", "%7D");
298        }
299
300        /**
301         * Get parameters from this request. Note that this method is invalidated when the request is replied to.
302         *
303         * @return The parameters from this request.
304         */
305        public Map<String, String[]> getParameterMap() {
306            return (Map<String, String[]>) hr.getParameterMap();
307        }
308    }
309}