001/*
002 * #%L
003 * Netarchivesuite - archive
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.archive.tools;
025
026import java.io.File;
027import java.io.FileNotFoundException;
028import java.io.IOException;
029import java.io.PrintStream;
030import java.util.ArrayList;
031import java.util.Collection;
032import java.util.List;
033import java.util.regex.Pattern;
034import java.util.regex.PatternSyntaxException;
035
036import org.apache.commons.cli.CommandLine;
037import org.apache.commons.cli.CommandLineParser;
038import org.apache.commons.cli.Option;
039import org.apache.commons.cli.Options;
040import org.apache.commons.cli.ParseException;
041import org.apache.commons.cli.PosixParser;
042
043import dk.netarkivet.common.CommonSettings;
044import dk.netarkivet.common.distribute.JMSConnectionFactory;
045import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
046import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
047import dk.netarkivet.common.distribute.arcrepository.Replica;
048import dk.netarkivet.common.distribute.arcrepository.ReplicaType;
049import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
050import dk.netarkivet.common.tools.SimpleCmdlineTool;
051import dk.netarkivet.common.tools.ToolRunnerBase;
052import dk.netarkivet.common.utils.Settings;
053import dk.netarkivet.common.utils.batch.FileBatchJob;
054import dk.netarkivet.common.utils.batch.FileBatchJob.ExceptionOccurrence;
055import dk.netarkivet.common.utils.batch.LoadableFileBatchJob;
056import dk.netarkivet.common.utils.batch.LoadableJarBatchJob;
057
058/**
059 * A command-line tool to run batch jobs in the bitarchive.
060 * <p>
061 * Usage: java dk.netarkivet.archive.tools.RunBatch with arguments as defined in local class BatchParameters
062 * <p>
063 * where: <br/>
064 * -J&lt;jarfile&gt; is a file containing all the classes needed by a BatchJob <br/>
065 * -C&lt;classfile&gt; is a file containing a FileBatchJob implementation <br/>
066 * -R&lt;regexp&gt; is a regular expression that will be matched against file names in the archive, by default .* <br/>
067 * -B&lt;replica&gt; is the name of the bitarchive replica this should be run on, by default taken from settings. <br/>
068 * -O&lt;outputfile&lt; is a file where the output from the batch job will be written. By default, it goes to stdout. <br/>
069 * -E&lt;errorFile&gt; is a file where the errors from the batch job will be written. By default, it goes to stderr. <br/>
070 * -N&lt;className&gt; is the name of the primary class to be loaded when doing a LoadableJarBatchJob <br/>
071 * -A&lt;Arguments&gt; The arguments for the batchjob, separated by '##', e.g. -Aarg1##arg2##... <br/>
072 * Examples: <br/>
073 * java dk.netarkivet.archive.tools.RunBatch -CFindMime.class \ -R10-*.arc -BReplicaOne -Omimes <br/>
074 * java dk.netarkivet.archive.tools.RunBatch -JFindMime.jar -NFindMime \ -R10-*.arc -BReplicaOne -Omimes <br/>
075 * Note that you probably want to set the application instance id setting (
076 * {@literal CommonSettings#APPLICATION_INSTANCE_ID}) to something other than its default value to avoid clashing with
077 * other channel listeners.
078 */
079public class RunBatch extends ToolRunnerBase {
080    /**
081     * Main method. Runs a batch job in the bitarchive. Setup, teardown and run is delegated to the RunBatchTool class.
082     * Management of this, exception handling etc. is delegated to ToolRunnerBase class.
083     *
084     * @param argv command line parameters as defined in local class BatchParameters required: The name of a class-file
085     * containing an implementation of FileBatchJob Name of jar file which includes the class file, and the className
086     */
087    public static void main(String[] argv) {
088        RunBatch instance = new RunBatch();
089        instance.runTheTool(argv);
090    }
091
092    /**
093     * Create an instance of the actual RunBatchTool.
094     *
095     * @return an instance of RunBatchTool.
096     */
097    protected SimpleCmdlineTool makeMyTool() {
098        return new RunBatchTool();
099    }
100
101    /** The implementation of SimpleCmdlineTool for RunBatch. */
102    private static class RunBatchTool implements SimpleCmdlineTool {
103        /**
104         * This instance is declared outside of run method to ensure reliable teardown in case of exceptions during
105         * execution.
106         */
107        private ViewerArcRepositoryClient arcrep;
108
109        /** Default regexp that matches everything. */
110        private static final String DEFAULT_REGEXP = ".*";
111
112        /** Character to separate jarfiles with option J. */
113        private static final String JARFILELIST_SEPARATOR = ",";
114
115        /**
116         * The regular expression that will be matched against file names in the archive, by default ".*".
117         */
118        private String regexp = DEFAULT_REGEXP;
119
120        /**
121         * Bitarchive replica where batchjob is to be run. Set to setting use replica is as default
122         */
123        private Replica batchReplica = Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID));
124
125        /**
126         * The outputfile, if any was given.
127         */
128        private File outputFile;
129
130        /** The errorfile, if any was given. */
131        private File errorFile;
132
133        /** The list of arguments for the batchjob. */
134        private List<String> argumentList = new ArrayList<String>();
135
136        /** File types in input parameter. */
137        private enum FileType {
138            OTHER, JAR, CLASS
139        }
140
141        ;
142
143        /** File suffix denoting FileType.CLASS. */
144        private static final String CLASS_FILE_SUFFIX = ".class";
145
146        /** File suffix denoting FileType.JAR. */
147        private static final String JAR_FILE_SUFFIX = ".jar";
148
149        /** The jarfile option key. */
150        private static final String JARFILE_OPTION_KEY = "J";
151        /** The classfile option key. */
152        private static final String CLASSFILE_OPTION_KEY = "C";
153        /** The regexp option key. */
154        private static final String REGEXP_OPTION_KEY = "R";
155        /** The replica option key. */
156        private static final String REPLICA_OPTION_KEY = "B";
157        /** The outputfile option key. */
158        private static final String OUTPUTFILE_OPTION_KEY = "O";
159        /** The errorfile option key. */
160        private static final String ERRORFILE_OPTION_KEY = "E";
161        /** The classname option key. */
162        private static final String CLASSNAME_OPTION_KEY = "N";
163        /** The arguments option key. */
164        private static final String ARGUMENTS_OPTION_KEY = "A";
165
166        /** To contain parameters defined by options to batchjob. */
167        private BatchParameters parms = new BatchParameters();
168
169        /**
170         * String to separate the arguments for the batchjob. TODO make into global constant.
171         */
172        private static final String ARGUMENT_SEPARATOR = "##";
173
174        /**
175         * Getting FileType from given file name.
176         *
177         * @param fileName The file name to get file type from
178         * @return FileType found from extension of file name
179         */
180        private FileType getFileType(String fileName) {
181            int i = fileName.lastIndexOf(".");
182            if (i > 0) { // Does fileName have a suffix?
183                String s = fileName.substring(i).toLowerCase();
184                if (s.equals(CLASS_FILE_SUFFIX)) {
185                    return FileType.CLASS;
186                } else {
187                    if (s.equals(JAR_FILE_SUFFIX)) {
188                        return FileType.JAR;
189                    } else {
190                        return FileType.OTHER;
191                    }
192                }
193            } else {
194                return FileType.OTHER;
195            }
196        }
197
198        /**
199         * Check, if you can write a file named fileName to current working directory.
200         *
201         * @param fileName The file name
202         * @param fileTag a tag for the fileName
203         * @return true, if you can write such a file; False, if the file already exists, or you cannot create the file
204         */
205        private boolean checkWriteFile(String fileName, String fileTag) {
206            if (new File(fileName).exists()) {
207                System.err.println(fileTag + " '" + fileName + "' does already exist");
208                return false;
209            } else {
210                try {
211                    File tmpFile = new File(fileName);
212                    tmpFile.createNewFile();
213                    if (!tmpFile.canWrite()) {
214                        System.err.println(fileTag + " '" + fileName + "' cannot be written to");
215                        return false;
216                    } else {
217                        return true;
218                    }
219                } catch (IOException e) {
220                    System.err.println(fileTag + " '" + fileName + "' cannot be created.");
221                    return false;
222                }
223            }
224        }
225
226        /**
227         * Type to encapsulate parameters defined by options to batchjob based on apache.commons.cli.
228         */
229        private class BatchParameters {
230            /**
231             * Options object for parameters.
232             */
233            protected Options options = new Options();
234            /** The parser. */
235            private CommandLineParser parser = new PosixParser();
236            /** The command line. */
237            protected CommandLine cmd;
238
239            /**
240             * Initialize options by setting legal parameters for batch jobs. Note that all our options has arguments.
241             */
242            public BatchParameters() {
243                final boolean hasArg = true;
244                options.addOption(CLASSFILE_OPTION_KEY, hasArg, "Class file to be run");
245                options.addOption(JARFILE_OPTION_KEY, hasArg, "Jar file to be run (required if class file "
246                        + "is in jar file)");
247                options.addOption(CLASSNAME_OPTION_KEY, hasArg, "Name of the primary class to be run. Only "
248                        + "needed when using the Jar-file option");
249
250                options.addOption(REGEXP_OPTION_KEY, hasArg, "Regular expression for files to be processed "
251                        + "(default: '" + regexp + "')");
252                options.addOption(REPLICA_OPTION_KEY, hasArg,
253                        "Name of bitarchive replica where batch must " + "be run " + "(default: '"
254                                + Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)).getName()
255                                + "')");
256                options.addOption(OUTPUTFILE_OPTION_KEY, hasArg, "Output file to contain result (default is "
257                        + "stdout)");
258                options.addOption(ERRORFILE_OPTION_KEY, hasArg, "Error file to contain errors from run "
259                        + "(default is stderr)");
260                options.addOption(ARGUMENTS_OPTION_KEY, hasArg, "Arguments for the batchjob. If several arguments, "
261                        + "then separate with '##'. Default no arguments.");
262            }
263
264            /**
265             * Method for parsing the arguments.
266             *
267             * @param args The arguments.
268             * @return The empty string, or an error message.
269             */
270            public String parseParameters(String[] args) {
271                try {
272                    // parse the command line arguments
273                    cmd = parser.parse(options, args);
274                } catch (ParseException exp) {
275                    return "Parsing parameters failed.  Reason is: " + exp.getMessage();
276                }
277                return "";
278            }
279
280            /**
281             * Method for retrieving the arguments of this instance.
282             *
283             * @return The list of arguments, ready to be printed to system out.
284             */
285            public String listArguments() {
286                String s = "\nwith arguments:\n";
287                // add options
288                for (Object o : options.getOptions()) {
289                    Option op = (Option) o;
290                    s += "-" + op.getOpt() + " " + op.getDescription() + "\n";
291                }
292                // delete last delimiter
293                if (s.length() > 0) {
294                    s = s.substring(0, s.length() - 1);
295                }
296                return s;
297            }
298        }
299
300        /**
301         * Accept parameters and checks them for validity.
302         *
303         * @param args the arguments
304         * @return true, if given arguments are valid returns false otherwise
305         */
306        public boolean checkArgs(String... args) {
307            // Parse arguments to check that the options are valid
308            String msg = parms.parseParameters(args);
309            if (msg.length() > 0) {
310                System.err.println(msg);
311                return false;
312            }
313
314            // Check number of arguments
315            if (args.length < 1) {
316                System.err.println("Missing required argument: jar or class " + "file");
317                return false;
318            }
319            if (args.length > parms.options.getOptions().size()) {
320                System.err.println("Too many arguments");
321                return false;
322            }
323
324            // Check class file argument
325            String jars = parms.cmd.getOptionValue(JARFILE_OPTION_KEY);
326            String className = parms.cmd.getOptionValue(CLASSNAME_OPTION_KEY);
327            String classFileName = parms.cmd.getOptionValue(CLASSFILE_OPTION_KEY);
328
329            if (classFileName == null && jars == null) {
330                msg = "Missing required class file argument ";
331                msg += "(-C) or Jarfile argument (-J)";
332                System.err.println(msg);
333                return false;
334            }
335            // Check, that option -C and -J is not used simultaneously
336            if (classFileName != null && jars != null) {
337                msg = "Cannot use option -J and -C at the same time";
338                System.err.println(msg);
339                return false;
340            }
341
342            // Validate the situation where -C is used and not -J
343            if (classFileName != null && jars == null) {
344                if (!getFileType(classFileName).equals(FileType.CLASS)) {
345                    System.err.println("Argument '" + classFileName + "' is not denoting a class file");
346                    return false;
347                }
348                if (!new File(classFileName).canRead()) {
349                    System.err.println("Cannot read class file: '" + classFileName + "'");
350                    return false;
351                }
352            }
353
354            // Check jar file arguments
355            if (jars != null) {
356                if (className == null) {
357                    msg = "Using option -J also requires" + "option -N (the name of the class).";
358                    System.err.println(msg);
359                    return false;
360                }
361
362                String[] jarList = jars.split(JARFILELIST_SEPARATOR);
363                File[] jarFiles = new File[jarList.length];
364                for (int i = 0; i < jarList.length; i++) {
365                    String jar = jarList[i];
366
367                    // check extension
368                    if (!getFileType(jar).equals(FileType.JAR)) {
369                        System.err.println("Argument '" + jar + "' is not denoting a jar file");
370                        return false;
371                    }
372
373                    File jarFile = new File(jar);
374                    jarFiles[i] = jarFile;
375
376                    // Check if file is readable.
377                    if (!jarFile.canRead()) {
378                        System.err.println("Cannot read jar file: '" + jar + "'");
379                        return false;
380                    }
381                }
382
383                // Try to load the jar batch job.
384                try {
385                    new LoadableJarBatchJob(className, argumentList, jarFiles);
386                } catch (Throwable e) {
387                    System.err.println("Cannot create batchjob '" + className + "' from the jarfiles '" + jars + "'");
388                    e.printStackTrace();
389                    return false;
390                }
391            }
392
393            // Check regular expression argument
394            String reg = parms.cmd.getOptionValue(REGEXP_OPTION_KEY);
395            if (reg != null) {
396                try {
397                    Pattern.compile(reg);
398                } catch (PatternSyntaxException e) {
399                    System.err.println("Illegal pattern syntax: '" + reg + "'");
400                    e.printStackTrace();
401                    return false;
402                }
403            }
404            // Check replica argument
405            if (!isReplicaArgumentsValid()) {
406                return false;
407            }
408
409            // Check output- and errorfile arguments
410            if (!isOutputAndErrorFileArgsValid()) {
411                return false;
412            }
413
414            // check arguments for the batchjob.
415            String arguments = parms.cmd.getOptionValue(ARGUMENTS_OPTION_KEY);
416            if (arguments != null) {
417                // go through all the arguments and put them into the list.
418                for (String arg : arguments.split(ARGUMENT_SEPARATOR)) {
419                    argumentList.add(arg);
420                }
421            }
422
423            return true;
424        }
425
426        /**
427         * @return true, if replica arguments is valid (or not set at all)
428         */
429        private boolean isReplicaArgumentsValid() {
430            String repName = parms.cmd.getOptionValue(REPLICA_OPTION_KEY);
431            if (repName != null) {
432                // Is the replica known
433                if (!Replica.isKnownReplicaName(repName)) {
434                    System.err.println("Unknown replica name '" + repName + "', known replicas are "
435                            + Replica.getKnownNamesAsSet());
436                    return false;
437                }
438                // Is it a bitarchive replica.
439                if (!Replica.getReplicaFromName(repName).getType().equals(ReplicaType.BITARCHIVE)) {
440                    System.err.println("Can only send a batchjob to a " + "bitarchive replica, and '"
441                            + Replica.getReplicaFromName(repName) + "' is of the type '"
442                            + Replica.getReplicaFromName(repName).getType() + "'");
443                    return false;
444                }
445            }
446            return true;
447        }
448
449        /**
450         * @return true, if both arguments are valid, otherwise returns false
451         */
452        private boolean isOutputAndErrorFileArgsValid() {
453            // Check output file argument
454            String oFile = parms.cmd.getOptionValue(OUTPUTFILE_OPTION_KEY);
455            if (oFile != null && !checkWriteFile(oFile, "Output file")) {
456                return false;
457            }
458
459            // Check error file argument
460            String eFile = parms.cmd.getOptionValue(ERRORFILE_OPTION_KEY);
461            if (eFile != null && !checkWriteFile(eFile, "Error file")) {
462                return false;
463            }
464            return true;
465        }
466
467        /**
468         * Create the ArcRepositoryClient instance here for reliable execution of close method in tearDown.
469         *
470         * @param args the arguments (not used)
471         */
472        public void setUp(String... args) {
473            arcrep = ArcRepositoryClientFactory.getViewerInstance();
474        }
475
476        /**
477         * Ensure reliable execution of the ArcRepositoryClient.close() method. Remember to check if arcrep was actually
478         * created. Also reliably cleans up the JMSConnection.
479         */
480        public void tearDown() {
481            if (arcrep != null) {
482                arcrep.close();
483            }
484            JMSConnectionFactory.getInstance().cleanup();
485        }
486
487        /**
488         * Perform the actual work. Procure the necessary information from command line parameters and system settings
489         * required to run the ViewerArcRepositoryClient.batch(), and perform the operation. Creating and closing the
490         * ArcRepositoryClient (arcrep) is done in the setUp and tearDown methods.
491         *
492         * @param args the arguments
493         */
494        public void run(String... args) {
495            // Arguments are allready checked by checkArgs
496            String jarArgs = parms.cmd.getOptionValue(JARFILE_OPTION_KEY);
497            String classFileName = parms.cmd.getOptionValue(CLASSFILE_OPTION_KEY);
498            String className = parms.cmd.getOptionValue(CLASSNAME_OPTION_KEY);
499
500            FileBatchJob job;
501
502            if (jarArgs == null) {
503                LoadableFileBatchJob classJob = new LoadableFileBatchJob(new File(classFileName), argumentList);
504                job = classJob;
505            } else {
506                // split jar argument into jar file names
507                String[] jarNames = jarArgs.split(",");
508
509                // get jar files and put them into an array
510                File[] jarFiles = new File[jarNames.length];
511                for (int i = 0; i < jarNames.length; i++) {
512                    jarFiles[i] = new File(jarNames[i]);
513                }
514                job = new LoadableJarBatchJob(className, argumentList, jarFiles);
515            }
516
517            String reg = parms.cmd.getOptionValue(REGEXP_OPTION_KEY);
518            if (reg != null) {
519                regexp = reg;
520                job.processOnlyFilesMatching(regexp);
521            }
522
523            String repName = parms.cmd.getOptionValue(REPLICA_OPTION_KEY);
524            if (repName != null) {
525                batchReplica = Replica.getReplicaFromName(repName);
526            }
527
528            // Note: if no filename is given, output will be written to stdout
529            String oFile = parms.cmd.getOptionValue(OUTPUTFILE_OPTION_KEY);
530            if (oFile != null) {
531                outputFile = new File(oFile);
532            }
533
534            // Note: if no filename is given, errors will be written to stderr
535            String eFile = parms.cmd.getOptionValue(ERRORFILE_OPTION_KEY);
536            if (eFile != null) {
537                errorFile = new File(eFile);
538            }
539
540            System.out.println("Running batch job '" + ((classFileName == null) ? "" : classFileName + "' ")
541                    + ((jarArgs == null) ? "" : className + "' from jar-file '" + jarArgs + "' ")
542                    + "on files matching '" + regexp + "' " + "on replica '" + batchReplica.getName() + "', "
543                    + "output written to " + ((oFile == null) ? "stdout " : "file '" + oFile + "', ")
544                    + "errors written to " + ((eFile == null) ? "stderr " : "file '" + eFile + "' "));
545
546            BatchStatus status = arcrep.batch(job, batchReplica.getId());
547            final Collection<File> failedFiles = status.getFilesFailed();
548            Collection<ExceptionOccurrence> exceptions = status.getExceptions();
549
550            System.out.println("Processed " + status.getNoOfFilesProcessed() + " files with " + failedFiles.size()
551                    + " failures");
552
553            // Write to output file or stdout
554            if (outputFile == null) {
555                status.appendResults(System.out);
556            } else {
557                status.copyResults(outputFile);
558            }
559
560            // Write to error file or stderr
561            PrintStream errorOutput = System.err;
562            if (errorFile != null) {
563                try {
564                    System.err.println("Writing errors to file: " + errorFile.getAbsolutePath());
565                    errorOutput = new PrintStream(errorFile);
566                } catch (FileNotFoundException e) {
567                    // Should not occur since argument is checked
568                    System.err.println("Unable to to create errorfile for writing: " + e);
569                    System.err.println("Writing errors to stdout instead!");
570                }
571            }
572
573            if (!failedFiles.isEmpty()) {
574                errorOutput.println("Failed files:");
575                for (File f : failedFiles) {
576                    errorOutput.println(f.getName());
577                }
578            }
579
580            if (!exceptions.isEmpty()) {
581                errorOutput.println("Failed files that produced exceptions(" + exceptions.size() + "):");
582                for (ExceptionOccurrence occurrence : exceptions) {
583                    errorOutput.println("File: " + occurrence.getFileName());
584                    errorOutput.println("Offset: " + occurrence.getFileOffset());
585                    errorOutput.println("OutputOffset: " + occurrence.getOutputOffset());
586                    errorOutput.println("Class name: " + occurrence.getClass().getName());
587                    errorOutput.println("Was exception during initialize: " + occurrence.isInitializeException());
588                    errorOutput.println("Was exception during finish: " + occurrence.isFinishException());
589                    errorOutput.println("Exception w/stacktrace: ");
590                    occurrence.getException().printStackTrace(errorOutput);
591                }
592            }
593            errorOutput.close();
594        }
595
596        /**
597         * Return the list of parameters accepted by the RunBatchTool class.
598         *
599         * @return the list of parameters accepted.
600         */
601        public String listParameters() {
602            return parms.listArguments();
603        }
604    }
605}