001/* 002 * #%L 003 * Netarchivesuite - archive 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.archive.tools; 025 026import java.io.File; 027import java.io.FileNotFoundException; 028import java.io.IOException; 029import java.io.PrintStream; 030import java.util.ArrayList; 031import java.util.Collection; 032import java.util.List; 033import java.util.regex.Pattern; 034import java.util.regex.PatternSyntaxException; 035 036import org.apache.commons.cli.CommandLine; 037import org.apache.commons.cli.CommandLineParser; 038import org.apache.commons.cli.Option; 039import org.apache.commons.cli.Options; 040import org.apache.commons.cli.ParseException; 041import org.apache.commons.cli.PosixParser; 042 043import dk.netarkivet.common.CommonSettings; 044import dk.netarkivet.common.distribute.JMSConnectionFactory; 045import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory; 046import dk.netarkivet.common.distribute.arcrepository.BatchStatus; 047import dk.netarkivet.common.distribute.arcrepository.Replica; 048import dk.netarkivet.common.distribute.arcrepository.ReplicaType; 049import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient; 050import dk.netarkivet.common.tools.SimpleCmdlineTool; 051import dk.netarkivet.common.tools.ToolRunnerBase; 052import dk.netarkivet.common.utils.Settings; 053import dk.netarkivet.common.utils.batch.FileBatchJob; 054import dk.netarkivet.common.utils.batch.FileBatchJob.ExceptionOccurrence; 055import dk.netarkivet.common.utils.batch.LoadableFileBatchJob; 056import dk.netarkivet.common.utils.batch.LoadableJarBatchJob; 057 058/** 059 * A command-line tool to run batch jobs in the bitarchive. 060 * <p> 061 * Usage: java dk.netarkivet.archive.tools.RunBatch with arguments as defined in local class BatchParameters 062 * <p> 063 * where: <br/> 064 * -J<jarfile> is a file containing all the classes needed by a BatchJob <br/> 065 * -C<classfile> is a file containing a FileBatchJob implementation <br/> 066 * -R<regexp> is a regular expression that will be matched against file names in the archive, by default .* <br/> 067 * -B<replica> is the name of the bitarchive replica this should be run on, by default taken from settings. <br/> 068 * -O<outputfile< is a file where the output from the batch job will be written. By default, it goes to stdout. <br/> 069 * -E<errorFile> is a file where the errors from the batch job will be written. By default, it goes to stderr. <br/> 070 * -N<className> is the name of the primary class to be loaded when doing a LoadableJarBatchJob <br/> 071 * -A<Arguments> The arguments for the batchjob, separated by '##', e.g. -Aarg1##arg2##... <br/> 072 * Examples: <br/> 073 * java dk.netarkivet.archive.tools.RunBatch -CFindMime.class \ -R10-*.arc -BReplicaOne -Omimes <br/> 074 * java dk.netarkivet.archive.tools.RunBatch -JFindMime.jar -NFindMime \ -R10-*.arc -BReplicaOne -Omimes <br/> 075 * Note that you probably want to set the application instance id setting ( 076 * {@literal CommonSettings#APPLICATION_INSTANCE_ID}) to something other than its default value to avoid clashing with 077 * other channel listeners. 078 */ 079public class RunBatch extends ToolRunnerBase { 080 /** 081 * Main method. Runs a batch job in the bitarchive. Setup, teardown and run is delegated to the RunBatchTool class. 082 * Management of this, exception handling etc. is delegated to ToolRunnerBase class. 083 * 084 * @param argv command line parameters as defined in local class BatchParameters required: The name of a class-file 085 * containing an implementation of FileBatchJob Name of jar file which includes the class file, and the className 086 */ 087 public static void main(String[] argv) { 088 RunBatch instance = new RunBatch(); 089 instance.runTheTool(argv); 090 } 091 092 /** 093 * Create an instance of the actual RunBatchTool. 094 * 095 * @return an instance of RunBatchTool. 096 */ 097 protected SimpleCmdlineTool makeMyTool() { 098 return new RunBatchTool(); 099 } 100 101 /** The implementation of SimpleCmdlineTool for RunBatch. */ 102 private static class RunBatchTool implements SimpleCmdlineTool { 103 /** 104 * This instance is declared outside of run method to ensure reliable teardown in case of exceptions during 105 * execution. 106 */ 107 private ViewerArcRepositoryClient arcrep; 108 109 /** Default regexp that matches everything. */ 110 private static final String DEFAULT_REGEXP = ".*"; 111 112 /** Character to separate jarfiles with option J. */ 113 private static final String JARFILELIST_SEPARATOR = ","; 114 115 /** 116 * The regular expression that will be matched against file names in the archive, by default ".*". 117 */ 118 private String regexp = DEFAULT_REGEXP; 119 120 /** 121 * Bitarchive replica where batchjob is to be run. Set to setting use replica is as default 122 */ 123 private Replica batchReplica = Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)); 124 125 /** 126 * The outputfile, if any was given. 127 */ 128 private File outputFile; 129 130 /** The errorfile, if any was given. */ 131 private File errorFile; 132 133 /** The list of arguments for the batchjob. */ 134 private List<String> argumentList = new ArrayList<String>(); 135 136 /** File types in input parameter. */ 137 private enum FileType { 138 OTHER, JAR, CLASS 139 } 140 141 ; 142 143 /** File suffix denoting FileType.CLASS. */ 144 private static final String CLASS_FILE_SUFFIX = ".class"; 145 146 /** File suffix denoting FileType.JAR. */ 147 private static final String JAR_FILE_SUFFIX = ".jar"; 148 149 /** The jarfile option key. */ 150 private static final String JARFILE_OPTION_KEY = "J"; 151 /** The classfile option key. */ 152 private static final String CLASSFILE_OPTION_KEY = "C"; 153 /** The regexp option key. */ 154 private static final String REGEXP_OPTION_KEY = "R"; 155 /** The replica option key. */ 156 private static final String REPLICA_OPTION_KEY = "B"; 157 /** The outputfile option key. */ 158 private static final String OUTPUTFILE_OPTION_KEY = "O"; 159 /** The errorfile option key. */ 160 private static final String ERRORFILE_OPTION_KEY = "E"; 161 /** The classname option key. */ 162 private static final String CLASSNAME_OPTION_KEY = "N"; 163 /** The arguments option key. */ 164 private static final String ARGUMENTS_OPTION_KEY = "A"; 165 166 /** To contain parameters defined by options to batchjob. */ 167 private BatchParameters parms = new BatchParameters(); 168 169 /** 170 * String to separate the arguments for the batchjob. TODO make into global constant. 171 */ 172 private static final String ARGUMENT_SEPARATOR = "##"; 173 174 /** 175 * Getting FileType from given file name. 176 * 177 * @param fileName The file name to get file type from 178 * @return FileType found from extension of file name 179 */ 180 private FileType getFileType(String fileName) { 181 int i = fileName.lastIndexOf("."); 182 if (i > 0) { // Does fileName have a suffix? 183 String s = fileName.substring(i).toLowerCase(); 184 if (s.equals(CLASS_FILE_SUFFIX)) { 185 return FileType.CLASS; 186 } else { 187 if (s.equals(JAR_FILE_SUFFIX)) { 188 return FileType.JAR; 189 } else { 190 return FileType.OTHER; 191 } 192 } 193 } else { 194 return FileType.OTHER; 195 } 196 } 197 198 /** 199 * Check, if you can write a file named fileName to current working directory. 200 * 201 * @param fileName The file name 202 * @param fileTag a tag for the fileName 203 * @return true, if you can write such a file; False, if the file already exists, or you cannot create the file 204 */ 205 private boolean checkWriteFile(String fileName, String fileTag) { 206 if (new File(fileName).exists()) { 207 System.err.println(fileTag + " '" + fileName + "' does already exist"); 208 return false; 209 } else { 210 try { 211 File tmpFile = new File(fileName); 212 tmpFile.createNewFile(); 213 if (!tmpFile.canWrite()) { 214 System.err.println(fileTag + " '" + fileName + "' cannot be written to"); 215 return false; 216 } else { 217 return true; 218 } 219 } catch (IOException e) { 220 System.err.println(fileTag + " '" + fileName + "' cannot be created."); 221 return false; 222 } 223 } 224 } 225 226 /** 227 * Type to encapsulate parameters defined by options to batchjob based on apache.commons.cli. 228 */ 229 private class BatchParameters { 230 /** 231 * Options object for parameters. 232 */ 233 protected Options options = new Options(); 234 /** The parser. */ 235 private CommandLineParser parser = new PosixParser(); 236 /** The command line. */ 237 protected CommandLine cmd; 238 239 /** 240 * Initialize options by setting legal parameters for batch jobs. Note that all our options has arguments. 241 */ 242 public BatchParameters() { 243 final boolean hasArg = true; 244 options.addOption(CLASSFILE_OPTION_KEY, hasArg, "Class file to be run"); 245 options.addOption(JARFILE_OPTION_KEY, hasArg, "Jar file to be run (required if class file " 246 + "is in jar file)"); 247 options.addOption(CLASSNAME_OPTION_KEY, hasArg, "Name of the primary class to be run. Only " 248 + "needed when using the Jar-file option"); 249 250 options.addOption(REGEXP_OPTION_KEY, hasArg, "Regular expression for files to be processed " 251 + "(default: '" + regexp + "')"); 252 options.addOption(REPLICA_OPTION_KEY, hasArg, 253 "Name of bitarchive replica where batch must " + "be run " + "(default: '" 254 + Replica.getReplicaFromId(Settings.get(CommonSettings.USE_REPLICA_ID)).getName() 255 + "')"); 256 options.addOption(OUTPUTFILE_OPTION_KEY, hasArg, "Output file to contain result (default is " 257 + "stdout)"); 258 options.addOption(ERRORFILE_OPTION_KEY, hasArg, "Error file to contain errors from run " 259 + "(default is stderr)"); 260 options.addOption(ARGUMENTS_OPTION_KEY, hasArg, "Arguments for the batchjob. If several arguments, " 261 + "then separate with '##'. Default no arguments."); 262 } 263 264 /** 265 * Method for parsing the arguments. 266 * 267 * @param args The arguments. 268 * @return The empty string, or an error message. 269 */ 270 public String parseParameters(String[] args) { 271 try { 272 // parse the command line arguments 273 cmd = parser.parse(options, args); 274 } catch (ParseException exp) { 275 return "Parsing parameters failed. Reason is: " + exp.getMessage(); 276 } 277 return ""; 278 } 279 280 /** 281 * Method for retrieving the arguments of this instance. 282 * 283 * @return The list of arguments, ready to be printed to system out. 284 */ 285 public String listArguments() { 286 String s = "\nwith arguments:\n"; 287 // add options 288 for (Object o : options.getOptions()) { 289 Option op = (Option) o; 290 s += "-" + op.getOpt() + " " + op.getDescription() + "\n"; 291 } 292 // delete last delimiter 293 if (s.length() > 0) { 294 s = s.substring(0, s.length() - 1); 295 } 296 return s; 297 } 298 } 299 300 /** 301 * Accept parameters and checks them for validity. 302 * 303 * @param args the arguments 304 * @return true, if given arguments are valid returns false otherwise 305 */ 306 public boolean checkArgs(String... args) { 307 // Parse arguments to check that the options are valid 308 String msg = parms.parseParameters(args); 309 if (msg.length() > 0) { 310 System.err.println(msg); 311 return false; 312 } 313 314 // Check number of arguments 315 if (args.length < 1) { 316 System.err.println("Missing required argument: jar or class " + "file"); 317 return false; 318 } 319 if (args.length > parms.options.getOptions().size()) { 320 System.err.println("Too many arguments"); 321 return false; 322 } 323 324 // Check class file argument 325 String jars = parms.cmd.getOptionValue(JARFILE_OPTION_KEY); 326 String className = parms.cmd.getOptionValue(CLASSNAME_OPTION_KEY); 327 String classFileName = parms.cmd.getOptionValue(CLASSFILE_OPTION_KEY); 328 329 if (classFileName == null && jars == null) { 330 msg = "Missing required class file argument "; 331 msg += "(-C) or Jarfile argument (-J)"; 332 System.err.println(msg); 333 return false; 334 } 335 // Check, that option -C and -J is not used simultaneously 336 if (classFileName != null && jars != null) { 337 msg = "Cannot use option -J and -C at the same time"; 338 System.err.println(msg); 339 return false; 340 } 341 342 // Validate the situation where -C is used and not -J 343 if (classFileName != null && jars == null) { 344 if (!getFileType(classFileName).equals(FileType.CLASS)) { 345 System.err.println("Argument '" + classFileName + "' is not denoting a class file"); 346 return false; 347 } 348 if (!new File(classFileName).canRead()) { 349 System.err.println("Cannot read class file: '" + classFileName + "'"); 350 return false; 351 } 352 } 353 354 // Check jar file arguments 355 if (jars != null) { 356 if (className == null) { 357 msg = "Using option -J also requires" + "option -N (the name of the class)."; 358 System.err.println(msg); 359 return false; 360 } 361 362 String[] jarList = jars.split(JARFILELIST_SEPARATOR); 363 File[] jarFiles = new File[jarList.length]; 364 for (int i = 0; i < jarList.length; i++) { 365 String jar = jarList[i]; 366 367 // check extension 368 if (!getFileType(jar).equals(FileType.JAR)) { 369 System.err.println("Argument '" + jar + "' is not denoting a jar file"); 370 return false; 371 } 372 373 File jarFile = new File(jar); 374 jarFiles[i] = jarFile; 375 376 // Check if file is readable. 377 if (!jarFile.canRead()) { 378 System.err.println("Cannot read jar file: '" + jar + "'"); 379 return false; 380 } 381 } 382 383 // Try to load the jar batch job. 384 try { 385 new LoadableJarBatchJob(className, argumentList, jarFiles); 386 } catch (Throwable e) { 387 System.err.println("Cannot create batchjob '" + className + "' from the jarfiles '" + jars + "'"); 388 e.printStackTrace(); 389 return false; 390 } 391 } 392 393 // Check regular expression argument 394 String reg = parms.cmd.getOptionValue(REGEXP_OPTION_KEY); 395 if (reg != null) { 396 try { 397 Pattern.compile(reg); 398 } catch (PatternSyntaxException e) { 399 System.err.println("Illegal pattern syntax: '" + reg + "'"); 400 e.printStackTrace(); 401 return false; 402 } 403 } 404 // Check replica argument 405 if (!isReplicaArgumentsValid()) { 406 return false; 407 } 408 409 // Check output- and errorfile arguments 410 if (!isOutputAndErrorFileArgsValid()) { 411 return false; 412 } 413 414 // check arguments for the batchjob. 415 String arguments = parms.cmd.getOptionValue(ARGUMENTS_OPTION_KEY); 416 if (arguments != null) { 417 // go through all the arguments and put them into the list. 418 for (String arg : arguments.split(ARGUMENT_SEPARATOR)) { 419 argumentList.add(arg); 420 } 421 } 422 423 return true; 424 } 425 426 /** 427 * @return true, if replica arguments is valid (or not set at all) 428 */ 429 private boolean isReplicaArgumentsValid() { 430 String repName = parms.cmd.getOptionValue(REPLICA_OPTION_KEY); 431 if (repName != null) { 432 // Is the replica known 433 if (!Replica.isKnownReplicaName(repName)) { 434 System.err.println("Unknown replica name '" + repName + "', known replicas are " 435 + Replica.getKnownNamesAsSet()); 436 return false; 437 } 438 // Is it a bitarchive replica. 439 if (!Replica.getReplicaFromName(repName).getType().equals(ReplicaType.BITARCHIVE)) { 440 System.err.println("Can only send a batchjob to a " + "bitarchive replica, and '" 441 + Replica.getReplicaFromName(repName) + "' is of the type '" 442 + Replica.getReplicaFromName(repName).getType() + "'"); 443 return false; 444 } 445 } 446 return true; 447 } 448 449 /** 450 * @return true, if both arguments are valid, otherwise returns false 451 */ 452 private boolean isOutputAndErrorFileArgsValid() { 453 // Check output file argument 454 String oFile = parms.cmd.getOptionValue(OUTPUTFILE_OPTION_KEY); 455 if (oFile != null && !checkWriteFile(oFile, "Output file")) { 456 return false; 457 } 458 459 // Check error file argument 460 String eFile = parms.cmd.getOptionValue(ERRORFILE_OPTION_KEY); 461 if (eFile != null && !checkWriteFile(eFile, "Error file")) { 462 return false; 463 } 464 return true; 465 } 466 467 /** 468 * Create the ArcRepositoryClient instance here for reliable execution of close method in tearDown. 469 * 470 * @param args the arguments (not used) 471 */ 472 public void setUp(String... args) { 473 arcrep = ArcRepositoryClientFactory.getViewerInstance(); 474 } 475 476 /** 477 * Ensure reliable execution of the ArcRepositoryClient.close() method. Remember to check if arcrep was actually 478 * created. Also reliably cleans up the JMSConnection. 479 */ 480 public void tearDown() { 481 if (arcrep != null) { 482 arcrep.close(); 483 } 484 JMSConnectionFactory.getInstance().cleanup(); 485 } 486 487 /** 488 * Perform the actual work. Procure the necessary information from command line parameters and system settings 489 * required to run the ViewerArcRepositoryClient.batch(), and perform the operation. Creating and closing the 490 * ArcRepositoryClient (arcrep) is done in the setUp and tearDown methods. 491 * 492 * @param args the arguments 493 */ 494 public void run(String... args) { 495 // Arguments are allready checked by checkArgs 496 String jarArgs = parms.cmd.getOptionValue(JARFILE_OPTION_KEY); 497 String classFileName = parms.cmd.getOptionValue(CLASSFILE_OPTION_KEY); 498 String className = parms.cmd.getOptionValue(CLASSNAME_OPTION_KEY); 499 500 FileBatchJob job; 501 502 if (jarArgs == null) { 503 LoadableFileBatchJob classJob = new LoadableFileBatchJob(new File(classFileName), argumentList); 504 job = classJob; 505 } else { 506 // split jar argument into jar file names 507 String[] jarNames = jarArgs.split(","); 508 509 // get jar files and put them into an array 510 File[] jarFiles = new File[jarNames.length]; 511 for (int i = 0; i < jarNames.length; i++) { 512 jarFiles[i] = new File(jarNames[i]); 513 } 514 job = new LoadableJarBatchJob(className, argumentList, jarFiles); 515 } 516 517 String reg = parms.cmd.getOptionValue(REGEXP_OPTION_KEY); 518 if (reg != null) { 519 regexp = reg; 520 job.processOnlyFilesMatching(regexp); 521 } 522 523 String repName = parms.cmd.getOptionValue(REPLICA_OPTION_KEY); 524 if (repName != null) { 525 batchReplica = Replica.getReplicaFromName(repName); 526 } 527 528 // Note: if no filename is given, output will be written to stdout 529 String oFile = parms.cmd.getOptionValue(OUTPUTFILE_OPTION_KEY); 530 if (oFile != null) { 531 outputFile = new File(oFile); 532 } 533 534 // Note: if no filename is given, errors will be written to stderr 535 String eFile = parms.cmd.getOptionValue(ERRORFILE_OPTION_KEY); 536 if (eFile != null) { 537 errorFile = new File(eFile); 538 } 539 540 System.out.println("Running batch job '" + ((classFileName == null) ? "" : classFileName + "' ") 541 + ((jarArgs == null) ? "" : className + "' from jar-file '" + jarArgs + "' ") 542 + "on files matching '" + regexp + "' " + "on replica '" + batchReplica.getName() + "', " 543 + "output written to " + ((oFile == null) ? "stdout " : "file '" + oFile + "', ") 544 + "errors written to " + ((eFile == null) ? "stderr " : "file '" + eFile + "' ")); 545 546 BatchStatus status = arcrep.batch(job, batchReplica.getId()); 547 final Collection<File> failedFiles = status.getFilesFailed(); 548 Collection<ExceptionOccurrence> exceptions = status.getExceptions(); 549 550 System.out.println("Processed " + status.getNoOfFilesProcessed() + " files with " + failedFiles.size() 551 + " failures"); 552 553 // Write to output file or stdout 554 if (outputFile == null) { 555 status.appendResults(System.out); 556 } else { 557 status.copyResults(outputFile); 558 } 559 560 // Write to error file or stderr 561 PrintStream errorOutput = System.err; 562 if (errorFile != null) { 563 try { 564 System.err.println("Writing errors to file: " + errorFile.getAbsolutePath()); 565 errorOutput = new PrintStream(errorFile); 566 } catch (FileNotFoundException e) { 567 // Should not occur since argument is checked 568 System.err.println("Unable to to create errorfile for writing: " + e); 569 System.err.println("Writing errors to stdout instead!"); 570 } 571 } 572 573 if (!failedFiles.isEmpty()) { 574 errorOutput.println("Failed files:"); 575 for (File f : failedFiles) { 576 errorOutput.println(f.getName()); 577 } 578 } 579 580 if (!exceptions.isEmpty()) { 581 errorOutput.println("Failed files that produced exceptions(" + exceptions.size() + "):"); 582 for (ExceptionOccurrence occurrence : exceptions) { 583 errorOutput.println("File: " + occurrence.getFileName()); 584 errorOutput.println("Offset: " + occurrence.getFileOffset()); 585 errorOutput.println("OutputOffset: " + occurrence.getOutputOffset()); 586 errorOutput.println("Class name: " + occurrence.getClass().getName()); 587 errorOutput.println("Was exception during initialize: " + occurrence.isInitializeException()); 588 errorOutput.println("Was exception during finish: " + occurrence.isFinishException()); 589 errorOutput.println("Exception w/stacktrace: "); 590 occurrence.getException().printStackTrace(errorOutput); 591 } 592 } 593 errorOutput.close(); 594 } 595 596 /** 597 * Return the list of parameters accepted by the RunBatchTool class. 598 * 599 * @return the list of parameters accepted. 600 */ 601 public String listParameters() { 602 return parms.listArguments(); 603 } 604 } 605}