001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.viewerproxy; 025 026import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory; 027import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient; 028import dk.netarkivet.common.distribute.indexserver.IndexClientFactory; 029import dk.netarkivet.common.distribute.indexserver.JobIndexCache; 030import dk.netarkivet.common.utils.CleanupIF; 031import dk.netarkivet.viewerproxy.distribute.HTTPControllerServer; 032 033/** 034 * Singleton of a viewerproxy. The viewerproxy consists of: - A JobIndexCache, which is able to retrieve a Lucene index 035 * file for a list of jobs - An ArcRepositoryClient used by ARCArchiveAccess - An ARCArchiveAccess, which retrieves 036 * objects from an ARC repository - A MissingURIRecorder, which records missing urls - A DelegatingController, which 037 * delegates commands to change index and handle missing url collection - An NotifyingURLResolver, which looks up URLs 038 * in an ARCArchiveAccess, and notifies Observers about missing URLs, - An UnknownCommandResolver, which generates an 039 * error for unknown command urls and pass on non-command urls to the NotifyingURLResolver - A GetDataResolver, which 040 * handles certain command urls for getting raw data and pass on the rest to the UnknownCommandResolver, - A 041 * HTTPControllerServer, which delegates certain command urls to a controller and pass on the rest to the 042 * GetDataResolver, - A WebProxy, which listens for http requests, and sends them to the HTTPControllerServer 043 */ 044public class ViewerProxy implements CleanupIF { 045 /** The singleton instance of this class. */ 046 private static ViewerProxy instance; 047 /** this setups the Jetty server behind our proxy. */ 048 private WebProxy webProxy; 049 /** The HTTPControllerServer. */ 050 private HTTPControllerServer controllerServer; 051 /** The unknown Command resolver. */ 052 private UnknownCommandResolver unknownCommandResolver; 053 /** 054 * The data resolver handling the different operations available here, {@link GetDataResolver#GET_FILE_COMMAND}, 055 * {@link GetDataResolver#GET_METADATA_COMMAND}, {@link GetDataResolver#GET_RECORD_COMMAND}. 056 */ 057 private GetDataResolver getDataResolver; 058 /** The NotifyingURIResolver. */ 059 private NotifyingURIResolver notifyingURIResolver; 060 /** 061 * The indexcache used. Trying to retrieve an index forces this index to be generated, if it doesn't exist. 062 */ 063 private JobIndexCache luceneIndexCache; 064 /** The object responsible for retrieving ARC data. */ 065 private ARCArchiveAccess arcArchiveAccess; 066 /** The Control object for the viewerProxy. See {@link Controller}. */ 067 private Controller controller; 068 /** 069 * This enables us to record missing URL when accessing data through this proxy. 070 */ 071 private MissingURIRecorder missingURIRecorder; 072 /** This provides read-only access to the data in the archive. */ 073 private ViewerArcRepositoryClient arcRepositoryClient; 074 075 /** Initiates the viewer proxy as described in class comment. */ 076 private ViewerProxy() { 077 arcRepositoryClient = ArcRepositoryClientFactory.getViewerInstance(); 078 // The Lucene index covers all items, not just non-text 079 luceneIndexCache = IndexClientFactory.getFullCrawllogInstance(); 080 arcArchiveAccess = new ARCArchiveAccess(arcRepositoryClient); 081 missingURIRecorder = new MissingURIRecorder(); 082 controller = new DelegatingController(missingURIRecorder, luceneIndexCache, arcArchiveAccess); 083 notifyingURIResolver = new NotifyingURIResolver(arcArchiveAccess, missingURIRecorder); 084 unknownCommandResolver = new UnknownCommandResolver(notifyingURIResolver); 085 getDataResolver = new GetDataResolver(unknownCommandResolver, arcRepositoryClient); 086 controllerServer = new HTTPControllerServer(controller, getDataResolver); 087 webProxy = new WebProxy(controllerServer); 088 } 089 090 /** 091 * Get singleton instance of viewerproxy. See class comment for details. 092 * 093 * @return The viewerproxy instance. 094 */ 095 public static ViewerProxy getInstance() { 096 if (instance == null) { 097 instance = new ViewerProxy(); 098 } 099 return instance; 100 } 101 102 /** Shuts down webproxy and arcrepositoryclient, and resets singleton. */ 103 public void cleanup() { 104 instance = null; 105 webProxy.kill(); 106 arcRepositoryClient.close(); 107 } 108}