001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.harvesting.report;
024
025import java.util.Collections;
026import java.util.Map;
027import java.util.Set;
028
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031
032import dk.netarkivet.common.exceptions.ArgumentNotValid;
033import dk.netarkivet.harvester.datamodel.StopReason;
034import dk.netarkivet.harvester.harvesting.distribute.DomainStats;
035
036/**
037 * Base implementation for a harvest report.
038 */
039@SuppressWarnings({"serial"})
040public abstract class AbstractHarvestReport implements HarvestReport{
041
042    /** The logger for this class. */
043    private static final Logger log = LoggerFactory.getLogger(AbstractHarvestReport.class);
044
045    /**
046     * The default reason why we stopped harvesting this domain. This value is set by looking for a CRAWL ENDED in the
047     * crawl.log.
048     */
049    private StopReason defaultStopReason;
050
051        private DomainStatsReport domainstatsReport;
052
053    /**
054     * Default constructor that does nothing. The real construction is supposed to be done in the subclasses by filling
055     * out the domainStats map with crawl results.
056     */
057    public AbstractHarvestReport() {
058    }
059
060    /**
061     * Constructor from DomainStatsReports.
062     *
063     * @param dsr the result of parsing the crawl.log for domain statistics
064     */
065    public AbstractHarvestReport(DomainStatsReport dsr) {
066        ArgumentNotValid.checkNotNull(dsr, "DomainStatsReport dsr");
067        this.domainstatsReport = dsr;
068        this.defaultStopReason = dsr.getDefaultStopReason();
069    }
070
071    @Override
072    public StopReason getDefaultStopReason() {
073        return defaultStopReason;
074    }
075
076    /**
077     * Returns the set of domain names that are contained in hosts-report.txt (i.e. host names mapped to domains)
078     *
079     * @return a Set of Strings
080     */
081    @Override
082    public final Set<String> getDomainNames() {
083        return Collections.unmodifiableSet(domainstatsReport.getDomainstats().keySet());
084    }
085
086    /**
087     * Get the number of objects found for the given domain.
088     *
089     * @param domainName A domain name (as given by getDomainNames())
090     * @return How many objects were collected for that domain or Null if none found
091     */
092    @Override
093    public final Long getObjectCount(String domainName) {
094        ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
095        final DomainStats domainStats = getDomainStats().get(domainName);
096        if (domainStats != null) {
097            return domainStats.getObjectCount();
098        }
099        return null;
100    }
101
102    /**
103     * Get the number of bytes downloaded for the given domain.
104     *
105     * @param domainName A domain name (as given by getDomainNames())
106     * @return How many bytes were collected for that domain or null if information available for this domain.
107     */
108    @Override
109    public final Long getByteCount(String domainName) {
110        ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
111        final DomainStats domainStats = getDomainStats().get(domainName);
112        if (domainStats != null) {
113            return domainStats.getByteCount();
114        }
115        return null;
116    }
117
118    /**
119     * Get the StopReason for the given domain.
120     *
121     * @param domainName A domain name (as given by getDomainNames())
122     * @return the StopReason for the given domain or null, if no stopreason found for this domain
123     * @throws ArgumentNotValid if null or empty domainName
124     */
125    @Override
126    public final StopReason getStopReason(String domainName) {
127        ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
128        final DomainStats domainStats = getDomainStats().get(domainName);
129        if (domainStats != null) {
130            return domainStats.getStopReason();
131        }
132        return null;
133    }
134
135    /**
136     * Attempts to get an already existing {@link DomainStats} object for that domain, and if not found creates one with
137     * zero values.
138     *
139     * @param domainName the name of the domain to get DomainStats for.
140     * @return a DomainStats object for the given domain-name.
141     */
142    protected DomainStats getOrCreateDomainStats(String domainName) {
143        DomainStats dhi = getDomainStats().get(domainName);
144        if (dhi == null) {
145            dhi = new DomainStats(0L, 0L, defaultStopReason);
146            getDomainStats().put(domainName, dhi);
147        }
148
149        return dhi;
150    }
151
152    private Map<String, DomainStats> getDomainStats() {
153        return this.domainstatsReport.getDomainstats();
154    }
155    
156}