001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.indexserver;
024
025import java.util.HashSet;
026import java.util.Set;
027
028import dk.netarkivet.common.distribute.indexserver.RequestType;
029import dk.netarkivet.common.utils.CleanupIF;
030
031/**
032 * Index server. Handles request for lucene indexes of crawl logs and cdx indexes of jobs, using two multifilebasedcache
033 * objects as handlers.
034 * <p>
035 * The server sets up handlers for three kinds of indexes (as defined by RequestType): A CDX index, where each index is
036 * one file, gzip-compressed. A DEDUP_CRAWL_LOG index, where each index is multiple files, gzip-compressed, making up a
037 * Lucene index of non-HTML files. A FULL_CRAWL_LOG index, where each index is multiple files, gzip-compressed, making
038 * up a Lucene index of all files.
039 */
040public class IndexServer implements CleanupIF {
041
042    /** The remote server that hands us indexes. */
043    private IndexRequestServerInterface remoteServer;
044    /** The singleton instance of this class. */
045    private static IndexServer instance;
046
047    /** Instantiates the two handlers, and starts listening for requests. */
048    protected IndexServer() {
049        FileBasedCache<Set<Long>> cdxCache = new CDXIndexCache();
050        FileBasedCache<Set<Long>> dedupCrawlLogCache = new DedupCrawlLogIndexCache();
051        FileBasedCache<Set<Long>> fullCrawlLogCache = new FullCrawlLogIndexCache();
052        // prompt the empty indices to pre-generated
053        Set<Long> emptySet = new HashSet<Long>();
054        cdxCache.getIndex(emptySet);
055        dedupCrawlLogCache.getIndex(emptySet);
056        fullCrawlLogCache.getIndex(emptySet);
057
058        remoteServer = IndexRequestServerFactory.getInstance();
059
060        remoteServer.setHandler(RequestType.CDX, cdxCache);
061        remoteServer.setHandler(RequestType.DEDUP_CRAWL_LOG, dedupCrawlLogCache);
062        remoteServer.setHandler(RequestType.FULL_CRAWL_LOG, fullCrawlLogCache);
063        remoteServer.start();
064    }
065
066    /**
067     * Get the unique index server instance.
068     *
069     * @return The instance;
070     */
071    public static synchronized IndexServer getInstance() {
072        if (instance == null) {
073            instance = new IndexServer();
074        }
075        return instance;
076    }
077
078    /**
079     * Close the server.
080     */
081    public void cleanup() {
082        remoteServer.close();
083        instance = null;
084    }
085
086}