001/* CommandLineParser
002 * 
003 * Created on 10.04.2006
004 *
005 * Copyright (C) 2006 National and University Library of Iceland
006 * 
007 * This file is part of the DeDuplicator (Heritrix add-on module).
008 * 
009 * DeDuplicator is free software; you can redistribute it and/or modify
010 * it under the terms of the GNU Lesser Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * any later version.
013 * 
014 * DeDuplicator is distributed in the hope that it will be useful, 
015 * but WITHOUT ANY WARRANTY; without even the implied warranty of
016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
017 * GNU Lesser Public License for more details.
018 * 
019 * You should have received a copy of the GNU Lesser Public License
020 * along with DeDuplicator; if not, write to the Free Software
021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
022 */
023package is.hi.bok.deduplicator;
024
025import java.io.PrintWriter;
026import java.util.List;
027
028import org.apache.commons.cli.CommandLine;
029import org.apache.commons.cli.HelpFormatter;
030import org.apache.commons.cli.Option;
031import org.apache.commons.cli.Options;
032import org.apache.commons.cli.ParseException;
033import org.apache.commons.cli.PosixParser;
034import org.apache.commons.cli.UnrecognizedOptionException;
035
036/**
037 * Print DigestIndexer command-line usage message.
038 *
039 * @author Kristinn Sigurðsson
040 */
041@SuppressWarnings({"rawtypes", "unused"})
042public class CommandLineParser {
043    private static final String USAGE = "Usage: ";
044    private static final String NAME = "DigestIndexer";
045    private Options options = null;
046    private CommandLine commandLine = null;
047    private PrintWriter out = null;
048
049    /**
050     * Block default construction.
051     */
052    private CommandLineParser() {
053        super();
054    }
055
056    /**
057     * Constructor.
058     *
059     * @param args Command-line arguments to process.
060     * @param out PrintStream to write on.
061     * @throws ParseException Failed parse of command line.
062     */
063    public CommandLineParser(String[] args, PrintWriter out) throws ParseException {
064        super();
065
066        this.out = out;
067
068        this.options = new Options();
069        this.options.addOption(new Option("h", "help", false, "Prints this message and exits."));
070
071        Option opt = new Option("o", "mode", true, "Index by URL, HASH or BOTH. Default: BOTH.");
072        opt.setArgName("type");
073        this.options.addOption(opt);
074
075        this.options.addOption(new Option("s", "equivalent", false,
076                "Include a stripped URL in the index for equivalent URL " + "matches."));
077
078        this.options.addOption(new Option("t", "timestamp", false, "Include the time of fetch in the index."));
079
080        this.options.addOption(new Option("e", "etag", false,
081                "Include etags in the index (if available in the source)."));
082
083        opt = new Option("m", "mime", true, "A filter on what mime types are added into the index "
084                + "(blacklist). Default: ^text/.*");
085        opt.setArgName("reg.expr.");
086        this.options.addOption(opt);
087
088        this.options.addOption(new Option("w", "whitelist", false,
089                "Make the --mime filter a whitelist instead of blacklist."));
090
091        opt = new Option("i", "iterator", true, "An iterator suitable for the source data (default iterator "
092                + "works on Heritrix's crawl.log).");
093        opt.setArgName("classname");
094        this.options.addOption(opt);
095
096        this.options.addOption(new Option("a", "add", false, "Add source data to existing index."));
097
098        opt = new Option("r", "origin", true, "If set, the 'origin' of each URI will be added to the index."
099                + " If no origin is provided by the source data then the " + "argument provided here will be used.");
100        opt.setArgName("origin");
101        this.options.addOption(opt);
102
103        this.options.addOption(new Option("d", "skip-duplicates", false,
104                "If set, URIs marked as duplicates will not be added to the " + "index."));
105
106        PosixParser parser = new PosixParser();
107        try {
108            this.commandLine = parser.parse(this.options, args, false);
109        } catch (UnrecognizedOptionException e) {
110            usage(e.getMessage(), 1);
111        }
112    }
113
114    /**
115     * Print usage then exit.
116     */
117    public void usage() {
118        usage(0);
119    }
120
121    /**
122     * Print usage then exit.
123     *
124     * @param exitCode
125     */
126    public void usage(int exitCode) {
127        usage(null, exitCode);
128    }
129
130    /**
131     * Print message then usage then exit.
132     * <p>
133     * The JVM exits inside in this method.
134     *
135     * @param message Message to print before we do usage.
136     * @param exitCode Exit code to use in call to System.exit.
137     */
138    public void usage(String message, int exitCode) {
139        outputAndExit(message, true, exitCode);
140    }
141
142    /**
143     * Print message and then exit.
144     * <p>
145     * The JVM exits inside in this method.
146     *
147     * @param message Message to print before we do usage.
148     * @param exitCode Exit code to use in call to System.exit.
149     */
150    public void message(String message, int exitCode) {
151        outputAndExit(message, false, exitCode);
152    }
153
154    /**
155     * Print out optional message an optional usage and then exit.
156     * <p>
157     * Private utility method. JVM exits from inside in this method.
158     *
159     * @param message Message to print before we do usage.
160     * @param doUsage True if we are to print out the usage message.
161     * @param exitCode Exit code to use in call to System.exit.
162     */
163    private void outputAndExit(String message, boolean doUsage, int exitCode) {
164        if (message != null) {
165            this.out.println(message);
166        }
167
168        if (doUsage) {
169            HelpFormatter formatter = new DigestHelpFormatter();
170            formatter.printHelp(this.out, 80, NAME, "Options:", this.options, 1, 2, "Arguments:", false);
171            this.out.println(" source                     Data to iterate " + "over (typically a crawl.log). If");
172            this.out.println("                            using a non-standard " + "iterator, consult relevant.");
173            this.out.println("                            documentation");
174            this.out.println(" target                     Target directory " + "for index output. Directory need not");
175            this.out.println("                            exist, but " + "unless --add should be empty.");
176        }
177
178        // Close printwriter so stream gets flushed.
179        this.out.close();
180        System.exit(exitCode);
181    }
182
183    /**
184     * @return Options passed on the command line.
185     */
186    public Option[] getCommandLineOptions() {
187        return this.commandLine.getOptions();
188    }
189
190    /**
191     * @return Arguments passed on the command line.
192     */
193    public List getCommandLineArguments() {
194        return this.commandLine.getArgList();
195    }
196
197    /**
198     * @return Command line.
199     */
200    public CommandLine getCommandLine() {
201        return this.commandLine;
202    }
203
204    /**
205     * Override so can customize usage output.
206     */
207    public class DigestHelpFormatter extends HelpFormatter {
208        public DigestHelpFormatter() {
209            super();
210        }
211
212        public void printUsage(PrintWriter pw, int width, String cmdLineSyntax) {
213            out.println(USAGE + NAME + " --help");
214            out.println(USAGE + NAME + " [options] source target");
215        }
216
217        public void printUsage(PrintWriter pw, int width, String app, Options options) {
218            this.printUsage(pw, width, app);
219        }
220    }
221}