001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.distribute.arcrepository; 024 025import java.io.ByteArrayInputStream; 026import java.io.File; 027import java.io.FileOutputStream; 028import java.io.FilterInputStream; 029import java.io.IOException; 030import java.io.InputStream; 031import java.io.OutputStream; 032import java.io.Serializable; 033 034import org.archive.io.ArchiveRecord; 035import org.archive.io.arc.ARCRecord; 036import org.archive.io.warc.WARCRecord; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import dk.netarkivet.common.CommonSettings; 041import dk.netarkivet.common.distribute.RemoteFile; 042import dk.netarkivet.common.distribute.RemoteFileFactory; 043import dk.netarkivet.common.exceptions.ArgumentNotValid; 044import dk.netarkivet.common.exceptions.IOFailure; 045import dk.netarkivet.common.exceptions.IllegalState; 046import dk.netarkivet.common.utils.FileUtils; 047import dk.netarkivet.common.utils.Settings; 048import dk.netarkivet.common.utils.arc.ARCUtils; 049import dk.netarkivet.common.utils.warc.WARCUtils; 050 051/** 052 * Class to hold the result of a lookup operation in the bitarchive: The metadata information associated with the record 053 * The actual byte content The name of the file the data were retrieved from If length of record exceeds value of 054 * Settings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE The record is stored in a RemoteFile. 055 */ 056@SuppressWarnings({"serial"}) 057public class BitarchiveRecord implements Serializable { 058 059 /** the log. */ 060 private static final transient Logger log = LoggerFactory.getLogger(BitarchiveRecord.class); 061 062 /** The file the data were retrieved from. */ 063 private String fileName; 064 065 /** The actual data. */ 066 private byte[] objectBuffer; 067 068 /** The offset of the ArchiveRecord contained. */ 069 private long offset; 070 071 /** The length of the ArchiveRecord contained. */ 072 private long length; 073 074 /** The actual data as a remote file. */ 075 private RemoteFile objectAsRemoteFile; 076 077 /** Is the data stored in a RemoteFile. */ 078 private boolean isStoredAsRemoteFile = false; 079 080 /** Set after deleting RemoteFile. */ 081 private boolean hasRemoteFileBeenDeleted = false; 082 083 /** How large the ARCRecord can before saving as RemoteFile. */ 084 private final long LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER = Settings 085 .getLong(CommonSettings.BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE); 086 087 /** 088 * Creates a BitarchiveRecord from the a ArchiveRecord, which can be either a ARCRecord or WARCRecord. Note that 089 * record metadata is not included with the BitarchiveRecord, only the payload of the record. 090 * <p> 091 * If the length of the record is higher than Settings .BITARCHIVE_LIMIT_FOR_RECORD_DATATRANSFER_IN_FILE the data is 092 * stored in a RemoteFile, otherwise the data is stored in a byte array. 093 * 094 * @param record the ArchiveRecord that the data should come from. We do not close the ArchiveRecord. 095 * @param filename The filename of the ArchiveFile 096 */ 097 public BitarchiveRecord(ArchiveRecord record, String filename) { 098 ArgumentNotValid.checkNotNull(record, "ArchiveRecord record"); 099 ArgumentNotValid.checkNotNull(filename, "String filename"); 100 this.fileName = filename; 101 this.offset = record.getHeader().getOffset(); 102 if (record instanceof ARCRecord) { 103 length = record.getHeader().getLength(); 104 } else if (record instanceof WARCRecord) { 105 // The length of the payload of the warc-record is not getLength(), 106 // but getLength minus getContentBegin(), which is the number of 107 // bytes used for the record-header! 108 length = record.getHeader().getLength() - record.getHeader().getContentBegin(); 109 } else { 110 throw new ArgumentNotValid("Unknown type of ArchiveRecord"); 111 } 112 if (length > LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER) { 113 // copy arc-data to local file and create a RemoteFile based on this 114 log.info("Record exceeds limit of {} bytes. Length is {} bytes, Storing as instance of {}", 115 LIMIT_FOR_SAVING_DATA_IN_OBJECT_BUFFER, length, Settings.get(CommonSettings.REMOTE_FILE_CLASS)); 116 if (RemoteFileFactory.isExtendedRemoteFile()) { 117 objectAsRemoteFile = RemoteFileFactory.getExtendedInstance(record); 118 isStoredAsRemoteFile = true; 119 } else { 120 File localTmpFile = null; 121 try { 122 localTmpFile = File.createTempFile("BitarchiveRecord-" + fileName, ".tmp", FileUtils.getTempDir()); 123 record.dump(new FileOutputStream(localTmpFile)); 124 objectAsRemoteFile = RemoteFileFactory.getMovefileInstance(localTmpFile); 125 isStoredAsRemoteFile = true; 126 } catch (IOException e) { 127 throw new IOFailure("Unable to store record(" + fileName + "," + offset + ") as remotefile", e); 128 } 129 } 130 } else { // Store data in objectbuffer 131 try { 132 if (record instanceof ARCRecord) { 133 objectBuffer = ARCUtils.readARCRecord((ARCRecord) record); 134 } else if (record instanceof WARCRecord) { 135 objectBuffer = WARCUtils.readWARCRecord((WARCRecord) record); 136 } 137 log.debug("Bytes stored in objectBuffer: {}", objectBuffer.length); 138 } catch (IOException e) { 139 throw new ExceptionInInitializerError(e); 140 } 141 } 142 } 143 144 /** 145 * Returns the file that this information was loaded from. 146 * 147 * @return the file that this ARC record comes from. 148 */ 149 public String getFile() { 150 return fileName; 151 } 152 153 /** 154 * Returns the length of the ARCRecord contained. 155 * 156 * @return the length of the ARCRecord contained 157 */ 158 public long getLength() { 159 return length; 160 } 161 162 /** 163 * Retrieve the data in the record. If data is in RemoteFile, this operation deletes the RemoteFile. 164 * 165 * @return the data from the ARCRecord as an InputStream. 166 * @throws IllegalState if remotefile already deleted 167 */ 168 public InputStream getData() { 169 InputStream result = null; 170 if (isStoredAsRemoteFile) { 171 if (hasRemoteFileBeenDeleted) { 172 throw new IllegalState("RemoteFile has already been deleted"); 173 } 174 log.info("Reading {} bytes from RemoteFile", length); 175 InputStream rfInputStream = objectAsRemoteFile.getInputStream(); 176 result = new FilterInputStream(rfInputStream) { 177 public void close() throws IOException { 178 super.close(); 179 objectAsRemoteFile.cleanup(); 180 hasRemoteFileBeenDeleted = true; 181 } 182 }; 183 } else { 184 log.debug("Reading {} bytes from objectBuffer", length); 185 result = new ByteArrayInputStream(objectBuffer); 186 } 187 return result; 188 } 189 190 /** 191 * Deliver the data in the record to a given OutputStream. If data is in RemoteFile, this operation deletes the 192 * RemoteFile 193 * 194 * @param out deliver the data to this outputstream 195 * @throws IOFailure if any IOException occurs reading or writing the data 196 * @throws IllegalState if remotefile already deleted 197 */ 198 public void getData(OutputStream out) { 199 ArgumentNotValid.checkNotNull(out, "OutputStream out"); 200 if (isStoredAsRemoteFile) { 201 if (hasRemoteFileBeenDeleted) { 202 throw new IllegalState("RemoteFile has already been deleted"); 203 } 204 try { 205 log.debug("Reading {} bytes from RemoteFile", length); 206 objectAsRemoteFile.appendTo(out); 207 } finally { 208 log.trace("Deleting the RemoteFile '{}'.", objectAsRemoteFile.getName()); 209 objectAsRemoteFile.cleanup(); 210 hasRemoteFileBeenDeleted = true; 211 } 212 } else { 213 try { 214 log.debug("Reading {} bytes from objectBuffer", length); 215 out.write(objectBuffer, 0, objectBuffer.length); 216 } catch (IOException e) { 217 throw new IOFailure("Unable to write data from " + "objectBuffer to the outputstream", e); 218 } 219 } 220 } 221 222}