001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.tools; 025 026import java.util.HashSet; 027import java.util.Set; 028 029import org.apache.commons.cli.CommandLine; 030import org.apache.commons.cli.CommandLineParser; 031import org.apache.commons.cli.MissingOptionException; 032import org.apache.commons.cli.Option; 033import org.apache.commons.cli.Options; 034import org.apache.commons.cli.ParseException; 035import org.apache.commons.cli.PosixParser; 036 037import dk.netarkivet.common.distribute.JMSConnectionFactory; 038import dk.netarkivet.common.distribute.indexserver.Index; 039import dk.netarkivet.common.distribute.indexserver.IndexClientFactory; 040import dk.netarkivet.common.distribute.indexserver.JobIndexCache; 041 042/** 043 * A tool to ask indices from indexserver on demand. 044 * <p> 045 * Usage: java dk.netarkivet.archive.tools.CreateIndex --type cdx|dedup|crawllog [jobid]+ 046 */ 047@SuppressWarnings({"unused"}) 048public class CreateIndex { 049 /** 050 * Private constructor to avoid instantiation of this class. 051 */ 052 private CreateIndex() { 053 } 054 055 /** Option for selecting the type of index required. */ 056 private static final String INDEXTYPE_OPTION = "t"; 057 058 /** Option for selecting the jobids to be used in the index. */ 059 private static final String JOBIDS_OPTION = "l"; 060 061 /** 062 * The main method that does the parsing of the commandline, and makes the actual index request. 063 * 064 * @param args the arguments 065 */ 066 public static void main(String[] args) { 067 Options options = new Options(); 068 CommandLineParser parser = new PosixParser(); 069 CommandLine cmd = null; 070 Option indexType = new Option("t", "type", true, "Type of index"); 071 Option jobList = new Option("l", "jobids", true, "list of jobids"); 072 indexType.setRequired(true); 073 jobList.setRequired(true); 074 options.addOption(indexType); 075 options.addOption(jobList); 076 077 try { 078 // parse the command line arguments 079 cmd = parser.parse(options, args); 080 } catch (MissingOptionException e) { 081 System.err.println("Some of the required parameters are missing: " + e.getMessage()); 082 dieWithUsage(); 083 } catch (ParseException exp) { 084 System.err.println("Parsing of parameters failed: " + exp.getMessage()); 085 dieWithUsage(); 086 } 087 088 String typeValue = cmd.getOptionValue(INDEXTYPE_OPTION); 089 String jobidsValue = cmd.getOptionValue(JOBIDS_OPTION); 090 String[] jobidsAsStrings = jobidsValue.split(","); 091 Set<Long> jobIDs = new HashSet<Long>(); 092 for (String idAsString : jobidsAsStrings) { 093 jobIDs.add(Long.valueOf(idAsString)); 094 } 095 096 JobIndexCache cache = null; 097 String indexTypeAstring = ""; 098 if (typeValue.equalsIgnoreCase("CDX")) { 099 indexTypeAstring = "CDX"; 100 cache = IndexClientFactory.getCDXInstance(); 101 } else if (typeValue.equalsIgnoreCase("DEDUP")) { 102 indexTypeAstring = "DEDUP"; 103 cache = IndexClientFactory.getDedupCrawllogInstance(); 104 } else if (typeValue.equalsIgnoreCase("CRAWLLOG")) { 105 indexTypeAstring = "CRAWLLOG"; 106 cache = IndexClientFactory.getFullCrawllogInstance(); 107 } else { 108 System.err.println("Unknown indextype '" + typeValue + "' requested."); 109 dieWithUsage(); 110 } 111 112 System.out.println("Creating " + indexTypeAstring + " index for ids: " + jobIDs); 113 Index<Set<Long>> index = cache.getIndex(jobIDs); 114 JMSConnectionFactory.getInstance().cleanup(); 115 } 116 117 /** 118 * Method for terminating this instance, with writing out the usage. This is used when the arguments are incorrect. 119 */ 120 private static void dieWithUsage() { 121 System.err.println("Usage: java " + CreateIndex.class.getName() 122 + " -type cdx|dedup|crawllog -jobids jobid[,jobid]*"); 123 System.exit(1); 124 } 125}