001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.utils.archive; 024 025import java.io.File; 026import java.io.OutputStream; 027import java.util.List; 028 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import dk.netarkivet.common.exceptions.ArgumentNotValid; 033import dk.netarkivet.common.exceptions.NetarkivetException; 034import dk.netarkivet.common.utils.batch.FileBatchJob; 035 036@SuppressWarnings({"serial"}) 037public abstract class ArchiveBatchJobBase extends FileBatchJob { 038 039 private static final Logger log = LoggerFactory.getLogger(ArchiveBatchJobBase.class); 040 041 /** The total number of records processed. */ 042 protected int noOfRecordsProcessed = 0; 043 044 /** 045 * Initialize the job before running. This is called before the processRecord() calls start coming. 046 * 047 * @param os The OutputStream to which output data is written 048 */ 049 public abstract void initialize(OutputStream os); 050 051 /** 052 * Finish up the job. This is called after the last processRecord() call. 053 * 054 * @param os The OutputStream to which output data is written 055 */ 056 public abstract void finish(OutputStream os); 057 058 /** 059 * Private method that handles our exception. 060 * 061 * @param e the given exception 062 * @param archiveFile The archive file where the exception occurred. 063 * @param index The offset in the archive file where the exception occurred. 064 */ 065 protected void handleOurException(NetarkivetException e, File archiveFile, long index) { 066 handleException(e, archiveFile, index); 067 } 068 069 /** 070 * When the org.archive.io.arc classes throw IOExceptions while reading, this is where they go. Subclasses are 071 * welcome to override the default functionality which simply logs and records them in a list. TODO: Actually use 072 * the archive file/index entries in the exception list 073 * 074 * @param e An Exception thrown by the org.archive.io.arc classes. 075 * @param archiveFile The archive file that was processed while the Exception was thrown 076 * @param index The index (in the archive file) at which the Exception was thrown 077 * @throws ArgumentNotValid if e is null 078 */ 079 public void handleException(Exception e, File archiveFile, long index) throws ArgumentNotValid { 080 ArgumentNotValid.checkNotNull(e, "e"); 081 082 log.debug("Caught exception while running batch job on file {}, position {}:\n{}", archiveFile, index, e, 083 e.getMessage()); 084 addException(archiveFile, index, ExceptionOccurrence.UNKNOWN_OFFSET, e); 085 } 086 087 /** 088 * Returns a representation of the list of Exceptions recorded for this archive batch job. If called by a subclass, 089 * a method overriding handleException() should always call super.handleException(). 090 * 091 * @return All Exceptions passed to handleException so far. 092 */ 093 public Exception[] getExceptionArray() { 094 List<ExceptionOccurrence> exceptions = getExceptions(); 095 Exception[] exceptionList = new Exception[exceptions.size()]; 096 int i = 0; 097 for (ExceptionOccurrence e : exceptions) { 098 exceptionList[i++] = e.getException(); 099 } 100 return exceptionList; 101 } 102 103 /** 104 * Returns the number of records processed. 105 * 106 * @return the number of records processed. 107 */ 108 public int noOfRecordsProcessed() { 109 return noOfRecordsProcessed; 110 } 111 112}