001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils.archive;
024
025import java.io.File;
026import java.io.OutputStream;
027import java.util.List;
028
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import dk.netarkivet.common.exceptions.ArgumentNotValid;
033import dk.netarkivet.common.exceptions.NetarkivetException;
034import dk.netarkivet.common.utils.batch.FileBatchJob;
035
036@SuppressWarnings({"serial"})
037public abstract class ArchiveBatchJobBase extends FileBatchJob {
038
039    private static final Logger log = LoggerFactory.getLogger(ArchiveBatchJobBase.class);
040
041    /** The total number of records processed. */
042    protected int noOfRecordsProcessed = 0;
043
044    /**
045     * Initialize the job before running. This is called before the processRecord() calls start coming.
046     *
047     * @param os The OutputStream to which output data is written
048     */
049    public abstract void initialize(OutputStream os);
050
051    /**
052     * Finish up the job. This is called after the last processRecord() call.
053     *
054     * @param os The OutputStream to which output data is written
055     */
056    public abstract void finish(OutputStream os);
057
058    /**
059     * Private method that handles our exception.
060     *
061     * @param e the given exception
062     * @param archiveFile The archive file where the exception occurred.
063     * @param index The offset in the archive file where the exception occurred.
064     */
065    protected void handleOurException(NetarkivetException e, File archiveFile, long index) {
066        handleException(e, archiveFile, index);
067    }
068
069    /**
070     * When the org.archive.io.arc classes throw IOExceptions while reading, this is where they go. Subclasses are
071     * welcome to override the default functionality which simply logs and records them in a list. TODO: Actually use
072     * the archive file/index entries in the exception list
073     *
074     * @param e An Exception thrown by the org.archive.io.arc classes.
075     * @param archiveFile The archive file that was processed while the Exception was thrown
076     * @param index The index (in the archive file) at which the Exception was thrown
077     * @throws ArgumentNotValid if e is null
078     */
079    public void handleException(Exception e, File archiveFile, long index) throws ArgumentNotValid {
080        ArgumentNotValid.checkNotNull(e, "e");
081
082        log.debug("Caught exception while running batch job on file {}, position {}:\n{}", archiveFile, index, e,
083                e.getMessage());
084        addException(archiveFile, index, ExceptionOccurrence.UNKNOWN_OFFSET, e);
085    }
086
087    /**
088     * Returns a representation of the list of Exceptions recorded for this archive batch job. If called by a subclass,
089     * a method overriding handleException() should always call super.handleException().
090     *
091     * @return All Exceptions passed to handleException so far.
092     */
093    public Exception[] getExceptionArray() {
094        List<ExceptionOccurrence> exceptions = getExceptions();
095        Exception[] exceptionList = new Exception[exceptions.size()];
096        int i = 0;
097        for (ExceptionOccurrence e : exceptions) {
098            exceptionList[i++] = e.getException();
099        }
100        return exceptionList;
101    }
102
103    /**
104     * Returns the number of records processed.
105     *
106     * @return the number of records processed.
107     */
108    public int noOfRecordsProcessed() {
109        return noOfRecordsProcessed;
110    }
111
112}