001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.utils.arc; 024 025import java.io.File; 026 027/** 028 * Represents a location key in the ARC format. 029 */ 030public class ARCKey { 031 032 /** The ARC file that we will be reading from. */ 033 private File arcFile; 034 /** The offset that the entry starts at in the file. */ 035 private long offset; 036 /** Extension used by gzipped arc-files. */ 037 private static final String GZIPPED_ARC_FILE_EXTENSION = ".arc.gz"; 038 /** Extension used Alexa dat files. */ 039 private static final String ALEXA_DAT_FILE_EXTENSION = ".dat"; 040 041 /** 042 * Constructor for ARCKey. Note that if the filename ends in .dat (it's an Alexa-style DAT file), we assume that the 043 * file we actually want is a .arc.gz file as produced by Alexa tools. That is because the Alexa cdx generator does 044 * not put the correct filename in there. 045 * 046 * @param archiveFileName The name of the archive found in the cdx file 047 * @param offset The offset in the arc file of this entry. 048 */ 049 public ARCKey(String archiveFileName, long offset) { 050 String arcgz; 051 if (archiveFileName.toLowerCase().endsWith(ALEXA_DAT_FILE_EXTENSION)) { 052 arcgz = archiveFileName.substring(0, archiveFileName.length() - ALEXA_DAT_FILE_EXTENSION.length()) 053 + GZIPPED_ARC_FILE_EXTENSION; 054 } else { 055 arcgz = archiveFileName; 056 } 057 arcFile = new File(arcgz); 058 this.offset = offset; 059 } 060 061 /** 062 * Getter for offset. 063 * 064 * @return The offset into the ARC file used for this key 065 */ 066 public long getOffset() { 067 return offset; 068 } 069 070 /** 071 * Getter for arcFile. 072 * 073 * @return The ARC file that this entry can be found in 074 */ 075 public File getFile() { 076 return arcFile; 077 } 078 079 /** @return a textual representation of filename and offset */ 080 public String toString() { 081 return getFile() + " offset: " + getOffset(); 082 } 083 084}