001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.harvesting.report; 024 025import java.util.Collections; 026import java.util.Map; 027import java.util.Set; 028 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import dk.netarkivet.common.exceptions.ArgumentNotValid; 033import dk.netarkivet.harvester.datamodel.StopReason; 034import dk.netarkivet.harvester.harvesting.distribute.DomainStats; 035 036/** 037 * Base implementation for a harvest report. 038 */ 039@SuppressWarnings({"serial"}) 040public abstract class AbstractHarvestReport implements HarvestReport{ 041 042 /** The logger for this class. */ 043 private static final Logger log = LoggerFactory.getLogger(AbstractHarvestReport.class); 044 045 /** 046 * The default reason why we stopped harvesting this domain. This value is set by looking for a CRAWL ENDED in the 047 * crawl.log. 048 */ 049 private StopReason defaultStopReason; 050 051 private DomainStatsReport domainstatsReport; 052 053 /** 054 * Default constructor that does nothing. The real construction is supposed to be done in the subclasses by filling 055 * out the domainStats map with crawl results. 056 */ 057 public AbstractHarvestReport() { 058 } 059 060 /** 061 * Constructor from DomainStatsReports. 062 * 063 * @param files the result of parsing the crawl.log for domain statistics 064 */ 065 public AbstractHarvestReport(DomainStatsReport dsr) { 066 ArgumentNotValid.checkNotNull(dsr, "DomainStatsReport dsr"); 067 this.domainstatsReport = dsr; 068 this.defaultStopReason = dsr.getDefaultStopReason(); 069 } 070 071 @Override 072 public StopReason getDefaultStopReason() { 073 return defaultStopReason; 074 } 075 076 /** 077 * Returns the set of domain names that are contained in hosts-report.txt (i.e. host names mapped to domains) 078 * 079 * @return a Set of Strings 080 */ 081 @Override 082 public final Set<String> getDomainNames() { 083 return Collections.unmodifiableSet(domainstatsReport.getDomainstats().keySet()); 084 } 085 086 /** 087 * Get the number of objects found for the given domain. 088 * 089 * @param domainName A domain name (as given by getDomainNames()) 090 * @return How many objects were collected for that domain or Null if none found 091 */ 092 @Override 093 public final Long getObjectCount(String domainName) { 094 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 095 final DomainStats domainStats = getDomainStats().get(domainName); 096 if (domainStats != null) { 097 return domainStats.getObjectCount(); 098 } 099 return null; 100 } 101 102 /** 103 * Get the number of bytes downloaded for the given domain. 104 * 105 * @param domainName A domain name (as given by getDomainNames()) 106 * @return How many bytes were collected for that domain or null if information available for this domain. 107 */ 108 @Override 109 public final Long getByteCount(String domainName) { 110 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 111 final DomainStats domainStats = getDomainStats().get(domainName); 112 if (domainStats != null) { 113 return domainStats.getByteCount(); 114 } 115 return null; 116 } 117 118 /** 119 * Get the StopReason for the given domain. 120 * 121 * @param domainName A domain name (as given by getDomainNames()) 122 * @return the StopReason for the given domain or null, if no stopreason found for this domain 123 * @throws ArgumentNotValid if null or empty domainName 124 */ 125 @Override 126 public final StopReason getStopReason(String domainName) { 127 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 128 final DomainStats domainStats = getDomainStats().get(domainName); 129 if (domainStats != null) { 130 return domainStats.getStopReason(); 131 } 132 return null; 133 } 134 135 /** 136 * Attempts to get an already existing {@link DomainStats} object for that domain, and if not found creates one with 137 * zero values. 138 * 139 * @param domainName the name of the domain to get DomainStats for. 140 * @return a DomainStats object for the given domain-name. 141 */ 142 protected DomainStats getOrCreateDomainStats(String domainName) { 143 DomainStats dhi = getDomainStats().get(domainName); 144 if (dhi == null) { 145 dhi = new DomainStats(0L, 0L, defaultStopReason); 146 getDomainStats().put(domainName, dhi); 147 } 148 149 return dhi; 150 } 151 152 private Map<String, DomainStats> getDomainStats() { 153 return this.domainstatsReport.getDomainstats(); 154 } 155 156}