001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.common.utils; 025 026import java.io.IOException; 027import java.io.InputStream; 028import java.util.zip.GZIPInputStream; 029 030/** 031 * Subclass of GZIPInputstream, including a workaround to support >2GB files. 032 * <p> 033 * Java currently has a bug that does not allow unzipping Gzip files with contents larger than 2GB. The result will be 034 * an IOException with the message "Corrupt GZIP trailer". This class works around that bug by ignoring that message for 035 * all streams which are uncompressed larger than 2GB. This sacrifices CRC checks for those streams, though. 036 * <p> 037 * See sun bug: http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=5092263 038 * 039 * @see GZIPInputStream 040 */ 041public class LargeFileGZIPInputStream extends GZIPInputStream { 042 043 /** 044 * Creates a new input stream with a default buffer size. 045 * 046 * @param in the input stream 047 * @throws IOException if an I/O error has occurred. Note: We usually don't allow IOException in our code, but this 048 * is done here to closely mimic GZIPInputStream 049 */ 050 public LargeFileGZIPInputStream(InputStream in) throws IOException { 051 super(in); 052 } 053 054 /** 055 * Reads uncompressed data into an array of bytes. Blocks until enough input is available for decompression. 056 * 057 * @param buf the buffer into which the data is read 058 * @param off the start offset of the data 059 * @param len the maximum number of bytes read 060 * @return the actual number of bytes read, or -1 if the end of the compressed input stream is reached 061 * @throws IOException if an I/O error has occurred or the compressed input data is corrupt. Note that size 062 * differences are ignored in this workaround class if size is larger than Integer.MAX_VALUE. Note: We usually don't 063 * allow IOException in our code, but this is done here to closely mimic GZIPInputStream 064 */ 065 public int read(byte[] buf, int off, int len) throws IOException { 066 try { 067 return super.read(buf, off, len); 068 } catch (IOException e) { 069 if (exceptionCausedByJavaException(e)) { 070 // mimic succes 071 eos = true; 072 return -1; 073 } else { 074 throw e; 075 } 076 } 077 } 078 079 /** 080 * Given an IOException caused by read, return whether this is the exception we are working around. This is the case 081 * if 1) The message is Corrupt GZIP trailer 2) More then Integer.MAX_VALUE bytes are written 082 * 083 * @param e An IOException thrown by GZIPInputStream.read 084 * @return Whether it is one caused by the bug we are working around 085 */ 086 private boolean exceptionCausedByJavaException(IOException e) { 087 return (e.getMessage() != null && e.getMessage().equals("Corrupt GZIP trailer") && inf.getBytesWritten() >= Integer.MAX_VALUE); 088 } 089 090}