001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.harvesting.report;
024
025import java.io.Serializable;
026import java.util.Set;
027
028import dk.netarkivet.common.exceptions.ArgumentNotValid;
029import dk.netarkivet.harvester.datamodel.Job;
030import dk.netarkivet.harvester.datamodel.StopReason;
031
032/**
033 * Base interface for a post-crawl harvest report.
034 */
035public interface HarvestReport extends Serializable {
036
037    /**
038     * Returns the default stop reason initially assigned to every domain.
039     */
040    StopReason getDefaultStopReason();
041
042    /**
043     * Returns the set of domain names that are contained in hosts-report.txt (i.e. host names mapped to domains)
044     *
045     * @return a Set of Strings
046     */
047    Set<String> getDomainNames();
048
049    /**
050     * Get the number of objects found for the given domain.
051     *
052     * @param domainName A domain name (as given by getDomainNames())
053     * @return How many objects were collected for that domain
054     * @throws ArgumentNotValid if null or empty domainName
055     */
056    Long getObjectCount(String domainName) throws ArgumentNotValid;
057
058    /**
059     * Get the number of bytes downloaded for the given domain.
060     *
061     * @param domainName A domain name (as given by getDomainNames())
062     * @return How many bytes were collected for that domain
063     * @throws ArgumentNotValid if null or empty domainName
064     */
065    Long getByteCount(String domainName) throws ArgumentNotValid;
066
067    /**
068     * Get the StopReason for the given domain.
069     *
070     * @param domainName A domain name (as given by getDomainNames())
071     * @return the StopReason for the given domain.
072     * @throws ArgumentNotValid if null or empty domainName
073     */
074    StopReason getStopReason(String domainName) throws ArgumentNotValid;
075
076
077    /**
078     * Post-processing happens on the scheduler side when ARC files have been uploaded.
079     */
080    void postProcess(Job job);
081
082}