001/* CommandLineParser 002 * 003 * Created on 10.04.2006 004 * 005 * Copyright (C) 2006 National and University Library of Iceland 006 * 007 * This file is part of the DeDuplicator (Heritrix add-on module). 008 * 009 * DeDuplicator is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU Lesser Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * any later version. 013 * 014 * DeDuplicator is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU Lesser Public License for more details. 018 * 019 * You should have received a copy of the GNU Lesser Public License 020 * along with DeDuplicator; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023package is.hi.bok.deduplicator; 024 025import java.io.PrintWriter; 026import java.util.List; 027 028import org.apache.commons.cli.CommandLine; 029import org.apache.commons.cli.HelpFormatter; 030import org.apache.commons.cli.Option; 031import org.apache.commons.cli.Options; 032import org.apache.commons.cli.ParseException; 033import org.apache.commons.cli.PosixParser; 034import org.apache.commons.cli.UnrecognizedOptionException; 035 036/** 037 * Print DigestIndexer command-line usage message. 038 * 039 * @author Kristinn Sigurðsson 040 */ 041@SuppressWarnings({"rawtypes", "unused"}) 042public class CommandLineParser { 043 private static final String USAGE = "Usage: "; 044 private static final String NAME = "DigestIndexer"; 045 private Options options = null; 046 private CommandLine commandLine = null; 047 private PrintWriter out = null; 048 049 /** 050 * Block default construction. 051 */ 052 private CommandLineParser() { 053 super(); 054 } 055 056 /** 057 * Constructor. 058 * 059 * @param args Command-line arguments to process. 060 * @param out PrintStream to write on. 061 * @throws ParseException Failed parse of command line. 062 */ 063 public CommandLineParser(String[] args, PrintWriter out) throws ParseException { 064 super(); 065 066 this.out = out; 067 068 this.options = new Options(); 069 this.options.addOption(new Option("h", "help", false, "Prints this message and exits.")); 070 071 Option opt = new Option("o", "mode", true, "Index by URL, HASH or BOTH. Default: BOTH."); 072 opt.setArgName("type"); 073 this.options.addOption(opt); 074 075 this.options.addOption(new Option("s", "equivalent", false, 076 "Include a stripped URL in the index for equivalent URL " + "matches.")); 077 078 this.options.addOption(new Option("t", "timestamp", false, "Include the time of fetch in the index.")); 079 080 this.options.addOption(new Option("e", "etag", false, 081 "Include etags in the index (if available in the source).")); 082 083 opt = new Option("m", "mime", true, "A filter on what mime types are added into the index " 084 + "(blacklist). Default: ^text/.*"); 085 opt.setArgName("reg.expr."); 086 this.options.addOption(opt); 087 088 this.options.addOption(new Option("w", "whitelist", false, 089 "Make the --mime filter a whitelist instead of blacklist.")); 090 091 opt = new Option("i", "iterator", true, "An iterator suitable for the source data (default iterator " 092 + "works on Heritrix's crawl.log)."); 093 opt.setArgName("classname"); 094 this.options.addOption(opt); 095 096 this.options.addOption(new Option("a", "add", false, "Add source data to existing index.")); 097 098 opt = new Option("r", "origin", true, "If set, the 'origin' of each URI will be added to the index." 099 + " If no origin is provided by the source data then the " + "argument provided here will be used."); 100 opt.setArgName("origin"); 101 this.options.addOption(opt); 102 103 this.options.addOption(new Option("d", "skip-duplicates", false, 104 "If set, URIs marked as duplicates will not be added to the " + "index.")); 105 106 PosixParser parser = new PosixParser(); 107 try { 108 this.commandLine = parser.parse(this.options, args, false); 109 } catch (UnrecognizedOptionException e) { 110 usage(e.getMessage(), 1); 111 } 112 } 113 114 /** 115 * Print usage then exit. 116 */ 117 public void usage() { 118 usage(0); 119 } 120 121 /** 122 * Print usage then exit. 123 * 124 * @param exitCode 125 */ 126 public void usage(int exitCode) { 127 usage(null, exitCode); 128 } 129 130 /** 131 * Print message then usage then exit. 132 * <p> 133 * The JVM exits inside in this method. 134 * 135 * @param message Message to print before we do usage. 136 * @param exitCode Exit code to use in call to System.exit. 137 */ 138 public void usage(String message, int exitCode) { 139 outputAndExit(message, true, exitCode); 140 } 141 142 /** 143 * Print message and then exit. 144 * <p> 145 * The JVM exits inside in this method. 146 * 147 * @param message Message to print before we do usage. 148 * @param exitCode Exit code to use in call to System.exit. 149 */ 150 public void message(String message, int exitCode) { 151 outputAndExit(message, false, exitCode); 152 } 153 154 /** 155 * Print out optional message an optional usage and then exit. 156 * <p> 157 * Private utility method. JVM exits from inside in this method. 158 * 159 * @param message Message to print before we do usage. 160 * @param doUsage True if we are to print out the usage message. 161 * @param exitCode Exit code to use in call to System.exit. 162 */ 163 private void outputAndExit(String message, boolean doUsage, int exitCode) { 164 if (message != null) { 165 this.out.println(message); 166 } 167 168 if (doUsage) { 169 HelpFormatter formatter = new DigestHelpFormatter(); 170 formatter.printHelp(this.out, 80, NAME, "Options:", this.options, 1, 2, "Arguments:", false); 171 this.out.println(" source Data to iterate " + "over (typically a crawl.log). If"); 172 this.out.println(" using a non-standard " + "iterator, consult relevant."); 173 this.out.println(" documentation"); 174 this.out.println(" target Target directory " + "for index output. Directory need not"); 175 this.out.println(" exist, but " + "unless --add should be empty."); 176 } 177 178 // Close printwriter so stream gets flushed. 179 this.out.close(); 180 System.exit(exitCode); 181 } 182 183 /** 184 * @return Options passed on the command line. 185 */ 186 public Option[] getCommandLineOptions() { 187 return this.commandLine.getOptions(); 188 } 189 190 /** 191 * @return Arguments passed on the command line. 192 */ 193 public List getCommandLineArguments() { 194 return this.commandLine.getArgList(); 195 } 196 197 /** 198 * @return Command line. 199 */ 200 public CommandLine getCommandLine() { 201 return this.commandLine; 202 } 203 204 /** 205 * Override so can customize usage output. 206 */ 207 public class DigestHelpFormatter extends HelpFormatter { 208 public DigestHelpFormatter() { 209 super(); 210 } 211 212 public void printUsage(PrintWriter pw, int width, String cmdLineSyntax) { 213 out.println(USAGE + NAME + " --help"); 214 out.println(USAGE + NAME + " [options] source target"); 215 } 216 217 public void printUsage(PrintWriter pw, int width, String app, Options options) { 218 this.printUsage(pw, width, app); 219 } 220 } 221}