001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils.arc;
024
025import java.io.File;
026
027/**
028 * Represents a location key in the ARC format.
029 */
030public class ARCKey {
031
032    /** The ARC file that we will be reading from. */
033    private File arcFile;
034    /** The offset that the entry starts at in the file. */
035    private long offset;
036    /** Extension used by gzipped arc-files. */
037    private static final String GZIPPED_ARC_FILE_EXTENSION = ".arc.gz";
038    /** Extension used Alexa dat files. */
039    private static final String ALEXA_DAT_FILE_EXTENSION = ".dat";
040
041    /**
042     * Constructor for ARCKey. Note that if the filename ends in .dat (it's an Alexa-style DAT file), we assume that the
043     * file we actually want is a .arc.gz file as produced by Alexa tools. That is because the Alexa cdx generator does
044     * not put the correct filename in there.
045     *
046     * @param archiveFileName The name of the archive found in the cdx file
047     * @param offset The offset in the arc file of this entry.
048     */
049    public ARCKey(String archiveFileName, long offset) {
050        String arcgz;
051        if (archiveFileName.toLowerCase().endsWith(ALEXA_DAT_FILE_EXTENSION)) {
052            arcgz = archiveFileName.substring(0, archiveFileName.length() - ALEXA_DAT_FILE_EXTENSION.length())
053                    + GZIPPED_ARC_FILE_EXTENSION;
054        } else {
055            arcgz = archiveFileName;
056        }
057        arcFile = new File(arcgz);
058        this.offset = offset;
059    }
060
061    /**
062     * Getter for offset.
063     *
064     * @return The offset into the ARC file used for this key
065     */
066    public long getOffset() {
067        return offset;
068    }
069
070    /**
071     * Getter for arcFile.
072     *
073     * @return The ARC file that this entry can be found in
074     */
075    public File getFile() {
076        return arcFile;
077    }
078
079    /** @return a textual representation of filename and offset */
080    public String toString() {
081        return getFile() + " offset: " + getOffset();
082    }
083
084}