001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.common.tools;
025
026import java.io.File;
027import java.util.ArrayList;
028import java.util.List;
029
030import dk.netarkivet.common.exceptions.IOFailure;
031import dk.netarkivet.common.utils.FileUtils;
032import dk.netarkivet.common.utils.batch.BatchLocalFiles;
033import dk.netarkivet.common.utils.cdx.ExtractCDXJob;
034
035/**
036 * Command line tool for extracting CDX information from given ARC files.
037 * <p>
038 * Usage: java dk.netarkivet.common.tools.ExtractCDX file1.arc [file2.arc ...] > myindex.cdx
039 * <p>
040 * Note: Does not depend on logging - communicates failures on stderr.
041 */
042public class ExtractCDX {
043    /**
044     * Main method. Extracts CDX from all given files and outputs the index on stdout.
045     *
046     * @param argv A list of (absolute paths to) files to index.
047     */
048    public static void main(String[] argv) {
049        if (argv.length == 0) {
050            System.err.println("Missing parameter: " + "Must supply an ARC file to be indexed");
051            dieWithUsage();
052        }
053        List<File> arcFiles = new ArrayList<File>();
054        for (String arg : argv) {
055            File f = toArcFile(arg);
056            arcFiles.add(f);
057        }
058        File[] arcFileArray = arcFiles.toArray(new File[] {});
059        BatchLocalFiles batchRunner = new BatchLocalFiles(arcFileArray);
060        batchRunner.run(new ExtractCDXJob(), System.out);
061    }
062
063    /**
064     * Verifies that the filename (absolute path) points to an existing file and that it is an arc file.
065     *
066     * @param filename The filename to verify.
067     * @return The arc file, as a File.
068     */
069    private static File toArcFile(String filename) {
070        File f;
071        try {
072            f = FileUtils.makeValidFileFromExisting(filename).getAbsoluteFile();
073            if (!FileUtils.ARCS_FILTER.accept(f.getParentFile(), f.getName())) {
074                dieWithError("Could not accept " + filename + ": was not an arc file");
075            }
076            return f;
077        } catch (IOFailure e) {
078            dieWithError("Could not accept " + filename + ":" + e);
079            return null; // Compiler does not recognize System.exit()
080        }
081    }
082
083    /**
084     * Prints out a message on stderr and exits with an error code.
085     *
086     * @param msg The message to print.
087     */
088    private static void dieWithError(String msg) {
089        System.err.println(msg);
090        System.err.println("Exiting - output is not OK");
091        System.exit(1);
092    }
093
094    /**
095     * Prints out proper usage of this tool on stderr and exits with an error code.
096     */
097    private static void dieWithUsage() {
098        System.err.println("Usage: java " + ExtractCDX.class.getName() + " file1.arc [file2.arc ...]");
099        System.exit(1);
100    }
101}