001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.common.tools; 025 026import java.io.File; 027import java.util.ArrayList; 028import java.util.List; 029 030import dk.netarkivet.common.exceptions.IOFailure; 031import dk.netarkivet.common.utils.FileUtils; 032import dk.netarkivet.common.utils.batch.BatchLocalFiles; 033import dk.netarkivet.common.utils.cdx.ExtractCDXJob; 034 035/** 036 * Command line tool for extracting CDX information from given ARC files. 037 * <p> 038 * Usage: java dk.netarkivet.common.tools.ExtractCDX file1.arc [file2.arc ...] > myindex.cdx 039 * <p> 040 * Note: Does not depend on logging - communicates failures on stderr. 041 */ 042public class ExtractCDX { 043 /** 044 * Main method. Extracts CDX from all given files and outputs the index on stdout. 045 * 046 * @param argv A list of (absolute paths to) files to index. 047 */ 048 public static void main(String[] argv) { 049 if (argv.length == 0) { 050 System.err.println("Missing parameter: " + "Must supply an ARC file to be indexed"); 051 dieWithUsage(); 052 } 053 List<File> arcFiles = new ArrayList<File>(); 054 for (String arg : argv) { 055 File f = toArcFile(arg); 056 arcFiles.add(f); 057 } 058 File[] arcFileArray = arcFiles.toArray(new File[] {}); 059 BatchLocalFiles batchRunner = new BatchLocalFiles(arcFileArray); 060 batchRunner.run(new ExtractCDXJob(), System.out); 061 } 062 063 /** 064 * Verifies that the filename (absolute path) points to an existing file and that it is an arc file. 065 * 066 * @param filename The filename to verify. 067 * @return The arc file, as a File. 068 */ 069 private static File toArcFile(String filename) { 070 File f; 071 try { 072 f = FileUtils.makeValidFileFromExisting(filename).getAbsoluteFile(); 073 if (!FileUtils.ARCS_FILTER.accept(f.getParentFile(), f.getName())) { 074 dieWithError("Could not accept " + filename + ": was not an arc file"); 075 } 076 return f; 077 } catch (IOFailure e) { 078 dieWithError("Could not accept " + filename + ":" + e); 079 return null; // Compiler does not recognize System.exit() 080 } 081 } 082 083 /** 084 * Prints out a message on stderr and exits with an error code. 085 * 086 * @param msg The message to print. 087 */ 088 private static void dieWithError(String msg) { 089 System.err.println(msg); 090 System.err.println("Exiting - output is not OK"); 091 System.exit(1); 092 } 093 094 /** 095 * Prints out proper usage of this tool on stderr and exits with an error code. 096 */ 097 private static void dieWithUsage() { 098 System.err.println("Usage: java " + ExtractCDX.class.getName() + " file1.arc [file2.arc ...]"); 099 System.exit(1); 100 } 101}