001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.viewerproxy;
024
025import java.io.IOException;
026import java.io.OutputStream;
027import java.net.URI;
028import java.net.URISyntaxException;
029import java.util.Map;
030
031import javax.servlet.http.HttpServletRequest;
032import javax.servlet.http.HttpServletResponse;
033
034import org.mortbay.jetty.Server;
035import org.mortbay.jetty.handler.DefaultHandler;
036//import org.eclipse.jetty.server.Server;
037//import org.eclipse.jetty.server.handler.DefaultHandler;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import dk.netarkivet.common.CommonSettings;
042import dk.netarkivet.common.exceptions.ArgumentNotValid;
043import dk.netarkivet.common.exceptions.IOFailure;
044import dk.netarkivet.common.utils.ExceptionUtils;
045import dk.netarkivet.common.utils.Settings;
046
047/**
048 * The WebProxy is the ONLY viewerproxy class that interfaces with the Jetty classes. This class packages all requests
049 * up nicely as calls to uriResolver.lookup().
050 * <p>
051 * In particular, it handles the control of the Jetty server that the Proxy server builds on.
052 */
053@SuppressWarnings({"deprecation", "unchecked"})
054public class WebProxy extends DefaultHandler implements URIResolverHandler {
055    /** The URI resolver which handles URI lookups. */
056    private URIResolver uriResolver;
057    /** Logger used for reporting. */
058    private static final Logger log = LoggerFactory.getLogger(WebProxy.class);
059
060    /** The actual web server that we're the link to. */
061    private Server jettyServer;
062
063    /** HTTP header. */
064    private static final String CONTENT_TYPE_NAME = "Content-type";
065    /** Content-type header value. */
066    private static final String CONTENT_TYPE_VALUE = "text/html";
067    /** Inserted before error response to browser. */
068    private static final String HTML_HEADER = "<html><head><title>" + "Internal Server Error" + "</title><body>";
069    /** Inserted after error response to browser. */
070    private static final String HTML_FOOTER = "</body></html>";
071    
072    private final int portNo;
073
074    /**
075     * Initialises a new web proxy, which delegates lookups to the given uri resolver. The WebProxy will start listening
076     * on port given in settings.
077     *
078     * @param uriResolver The uriResolver used to handle lookups in the proxy.
079     * @throws IOFailure on trouble starting the proxy server.
080     * @throws ArgumentNotValid on null uriResolver.
081     */
082    public WebProxy(URIResolver uriResolver) {
083        setURIResolver(uriResolver);
084        portNo = Settings.getInt(CommonSettings.HTTP_PORT_NUMBER);
085        jettyServer = new Server(portNo);
086        jettyServer.setHandler(this);
087        log.info("Starting viewerproxy jetty on port {}", portNo);
088        try {
089            jettyServer.start();
090        } catch (Exception e) {
091            throw new IOFailure("Error while starting jetty server on port " + portNo, e);
092        }
093    }
094
095    /**
096     * Sets the current URIResolver.
097     *
098     * @param ur The resolver to handle lookups.
099     * @throws ArgumentNotValid on null uriResolver.
100     */
101    public void setURIResolver(URIResolver ur) {
102        ArgumentNotValid.checkNotNull(ur, "URIResolver ur");
103        this.uriResolver = ur;
104    }
105
106    /**
107     * Handle an HTTP request. Overrides default behaviour of Jetty. This will forward the URI and response to the
108     * wrapped URI resolver. Note that the server will NOT force the return value to be the one returned by the uri
109     * resolver, rather it will use the one the uri resolver has set in the response object.
110     * <p>
111     * Exceptions will generate an internal server error-page with the details.
112     *
113     * @param target URL or name for request. Not used
114     * @param request The original request, including URL
115     * @param response The object that receives the result
116     */
117    @Override
118    /*
119    public void handle(String target, org.eclipse.jetty.server.Request baseRequest, HttpServletRequest request,
120            HttpServletResponse response) {
121        HttpResponse netarkivetResponse = new HttpResponse(response);
122        HttpRequest netarkivetRequest = new HttpRequest(request);
123        try {
124            // The following is a bad idea because it hides where the
125            // failure actually happens in the code
126            // Generate URI to enforce fail-early of illegal URIs
127            // uri = new URI(request.getRequestURL().toString());
128            uriResolver.lookup(netarkivetRequest, netarkivetResponse);
129            ((org.eclipse.jetty.server.Request) request).setHandled(true);
130        } catch (Exception e) {
131            createErrorResponse(netarkivetRequest.getURI(), netarkivetResponse, e);
132        }
133    }
134    */
135    public void handle(String target, HttpServletRequest request,
136                HttpServletResponse response, int dispatch) {
137        HttpResponse netarkivetResponse = new HttpResponse(response);
138        HttpRequest netarkivetRequest = new HttpRequest(request);
139        try {
140                //The following is a bad idea because it hides where the
141                //failure actually happens in the code
142                //Generate URI to enforce fail-early of illegal URIs 
143                //uri = new URI(request.getRequestURL().toString());
144                uriResolver.lookup(netarkivetRequest, netarkivetResponse);
145                ((org.mortbay.jetty.Request) request).setHandled(true);
146        } catch (Exception e) {
147                createErrorResponse(netarkivetRequest.getURI(),
148                                netarkivetResponse, e);
149        }
150    }
151    
152
153    /**
154     * Generate an appropriate error response when a URI generates an exception. If this fails, it is logged, but
155     * otherwise ignored.
156     *
157     * @param uri The URI attempted read that could not be found
158     * @param response The Response object to write the error response into.
159     * @param e the exception generated by the URI
160     */
161    private void createErrorResponse(URI uri, Response response, Throwable e) {
162        try {
163            // first write a header telling the browser to expect text/html
164            response.addHeaderField(CONTENT_TYPE_NAME, CONTENT_TYPE_VALUE);
165            response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
166            // Now flush an errorscreen to the browser
167            OutputStream browserOut = response.getOutputStream();
168            browserOut.write((HTML_HEADER + "Internal server error for: " + uri + "\n<pre>"
169                    + ExceptionUtils.getStackTrace(e) + "</pre>" + HTML_FOOTER).getBytes());
170            browserOut.flush();
171            log.warn("Exception for : " + uri, e);
172        } catch (Exception e1) {
173            log.warn(
174                    "Error writing error response to browser " + "for '" + uri + "' with exception "
175                            + ExceptionUtils.getStackTrace(e) + ". Giving up!", e1);
176        }
177        // Do not close stream! That is left to the servlet.
178    }
179
180    /** Shut down this server. */
181    public void kill() {
182        try {
183                log.info("Shutting down viewerproxy jetty listening on port {}", portNo);
184            jettyServer.stop();
185            jettyServer.destroy();
186        } catch (Exception ie) {
187            log.warn("Error shutting down server", ie);
188        }
189    }
190
191    /**
192     * A wrapper around the Jetty HttpResponse, giving the simple Response interface used in our URIResolvers. Also
193     * Collects and remembers status code for a response.
194     */
195    public static class HttpResponse implements Response {
196        /** The Jetty http response object. */
197        private HttpServletResponse hr;
198        /** The HTTP status code. */
199        private int status;
200
201        /**
202         * Constructs a new HttpResponse based on the given Jetty response.
203         *
204         * @param htResp A response object to wrap.
205         */
206        private HttpResponse(HttpServletResponse htResp) {
207            hr = htResp;
208        }
209
210        /**
211         * Getter for the data output stream.
212         *
213         * @return An open output stream.
214         * @throws IOFailure if an outputstream can not be obtained (on invalidated response).
215         */
216        public OutputStream getOutputStream() {
217            try {
218                return hr.getOutputStream();
219            } catch (IOException e) {
220                throw new IOFailure("Outputstream not available", e);
221            }
222        }
223
224        /**
225         * Setter for the status code (e.g. 200, 404)
226         *
227         * @param statusCode An HTTP status code.
228         */
229        public void setStatus(int statusCode) {
230            this.status = statusCode;
231            hr.setStatus(statusCode);
232        }
233
234        /**
235         * Set status code and explanatory text string describing the status.
236         *
237         * @param statusCode should be valid http status ie. 200, 404,
238         * @param reason text string explaining status ie. OK, not found,
239         */
240        public void setStatus(int statusCode, String reason) {
241                log.debug("Calling setStatus with statusCode {} and reason {} using deprecated API", statusCode, reason);
242            this.status = statusCode;
243            // Note: This uses deprecated method.
244            // We still use this, because in the proxying we need to set both
245            // status, reason, and body, and this is the only possible way to do
246            // this
247            hr.setStatus(statusCode, reason);
248        }
249
250        /**
251         * Add an HTTP header to the response.
252         *
253         * @param name Name of the header, e.g. Last-Modified-Date
254         * @param value The value of the header
255         */
256        public void addHeaderField(String name, String value) {
257            if (hr.isCommitted()) {
258                log.warn("Writing a header {}:{} after the http response is committed.", name, value);
259            }
260            hr.addHeader(name, value);
261        }
262
263        /**
264         * Get the HTTP status of this response.
265         *
266         * @return The HTTP status.
267         */
268        public int getStatus() {
269            return status;
270        }
271    }
272
273    /**
274     * A wrapper around the Jetty HttpRequest, giving the simple Request interface used in our URIResolvers. Gives
275     * access to URI and posted parameters.
276     */
277    public static class HttpRequest implements Request {
278        /** The Jetty http response object. */
279        private HttpServletRequest hr;
280
281        /**
282         * Constructs a new HttpRequest based on the given Jetty request.
283         *
284         * @param htReq A request object to wrap.
285         */
286        protected HttpRequest(HttpServletRequest htReq) {
287            hr = htReq;
288        }
289
290        /**
291         * Get the URI from this request. In contrast to javax.servlet.HttpServletResponse this includes the query
292         * string.
293         *
294         * @return The URI from this request.
295         * @throws IOFailure if the URI is invalid. This should never happen.
296         */
297        public URI getURI() {
298            String uriString;
299            if (hr.getQueryString() != null) {
300                uriString = hr.getRequestURL().toString() + "?" + uriEncode(hr.getQueryString());
301            } else {
302                uriString = hr.getRequestURL().toString();
303            }
304            try {
305                return new URI(uriString);
306            } catch (URISyntaxException e) {
307                throw new IOFailure("Could not construct URI from '" + uriString + "'", e);
308            }
309        }
310
311        /**
312         * We here replace what should be standard API functionality with an apparent kludge. We do this because the
313         * constructor java.net.URI(String s) violates its own documentation. It should encode all "other" characters in the
314         * query part of the URI. These "other" characters include curly brackets, but actually the escaping is never
315         * done. Hence we do it here.
316         *
317         * @param s the String to be encoded
318         * @return the encoded String
319         */
320        public static String uriEncode(String s) {
321            return s.replaceAll("\\{", "%7B").replaceAll("\\}", "%7D");
322        }
323
324        /**
325         * Get parameters from this request. Note that this method is invalidated when the request is replied to.
326         *
327         * @return The parameters from this request.
328         */
329        public Map<String, String[]> getParameterMap() {
330            return (Map<String, String[]>) hr.getParameterMap();
331        }
332    }
333}