001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.indexserver; 024 025import java.util.HashSet; 026import java.util.Set; 027 028import dk.netarkivet.common.distribute.indexserver.RequestType; 029import dk.netarkivet.common.utils.CleanupIF; 030 031/** 032 * Index server. Handles request for lucene indexes of crawl logs and cdx indexes of jobs, using two multifilebasedcache 033 * objects as handlers. 034 * <p> 035 * The server sets up handlers for three kinds of indexes (as defined by RequestType): A CDX index, where each index is 036 * one file, gzip-compressed. A DEDUP_CRAWL_LOG index, where each index is multiple files, gzip-compressed, making up a 037 * Lucene index of non-HTML files. A FULL_CRAWL_LOG index, where each index is multiple files, gzip-compressed, making 038 * up a Lucene index of all files. 039 */ 040public class IndexServer implements CleanupIF { 041 042 /** The remote server that hands us indexes. */ 043 private IndexRequestServerInterface remoteServer; 044 /** The singleton instance of this class. */ 045 private static IndexServer instance; 046 047 /** Instantiates the two handlers, and starts listening for requests. */ 048 protected IndexServer() { 049 FileBasedCache<Set<Long>> cdxCache = new CDXIndexCache(); 050 FileBasedCache<Set<Long>> dedupCrawlLogCache = new DedupCrawlLogIndexCache(); 051 FileBasedCache<Set<Long>> fullCrawlLogCache = new FullCrawlLogIndexCache(); 052 // prompt the empty indices to pre-generated 053 Set<Long> emptySet = new HashSet<Long>(); 054 cdxCache.getIndex(emptySet); 055 dedupCrawlLogCache.getIndex(emptySet); 056 fullCrawlLogCache.getIndex(emptySet); 057 058 remoteServer = IndexRequestServerFactory.getInstance(); 059 060 remoteServer.setHandler(RequestType.CDX, cdxCache); 061 remoteServer.setHandler(RequestType.DEDUP_CRAWL_LOG, dedupCrawlLogCache); 062 remoteServer.setHandler(RequestType.FULL_CRAWL_LOG, fullCrawlLogCache); 063 remoteServer.start(); 064 } 065 066 /** 067 * Get the unique index server instance. 068 * 069 * @return The instance; 070 */ 071 public static synchronized IndexServer getInstance() { 072 if (instance == null) { 073 instance = new IndexServer(); 074 } 075 return instance; 076 } 077 078 /** 079 * Close the server. 080 */ 081 public void cleanup() { 082 remoteServer.close(); 083 instance = null; 084 } 085 086}