001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.harvesting.report; 024 025import java.io.Serializable; 026import java.util.Set; 027 028import dk.netarkivet.common.exceptions.ArgumentNotValid; 029import dk.netarkivet.harvester.datamodel.Job; 030import dk.netarkivet.harvester.datamodel.StopReason; 031 032/** 033 * Base interface for a post-crawl harvest report. 034 */ 035public interface HarvestReport extends Serializable { 036 037 /** 038 * Returns the default stop reason initially assigned to every domain. 039 */ 040 StopReason getDefaultStopReason(); 041 042 /** 043 * Returns the set of domain names that are contained in hosts-report.txt (i.e. host names mapped to domains) 044 * 045 * @return a Set of Strings 046 */ 047 Set<String> getDomainNames(); 048 049 /** 050 * Get the number of objects found for the given domain. 051 * 052 * @param domainName A domain name (as given by getDomainNames()) 053 * @return How many objects were collected for that domain 054 * @throws ArgumentNotValid if null or empty domainName 055 */ 056 Long getObjectCount(String domainName) throws ArgumentNotValid; 057 058 /** 059 * Get the number of bytes downloaded for the given domain. 060 * 061 * @param domainName A domain name (as given by getDomainNames()) 062 * @return How many bytes were collected for that domain 063 * @throws ArgumentNotValid if null or empty domainName 064 */ 065 Long getByteCount(String domainName) throws ArgumentNotValid; 066 067 /** 068 * Get the StopReason for the given domain. 069 * 070 * @param domainName A domain name (as given by getDomainNames()) 071 * @return the StopReason for the given domain. 072 * @throws ArgumentNotValid if null or empty domainName 073 */ 074 StopReason getStopReason(String domainName) throws ArgumentNotValid; 075 076 077 /** 078 * Post-processing happens on the scheduler side when ARC files have been uploaded. 079 */ 080 void postProcess(Job job); 081 082}