001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.harvesting;
025
026import org.archive.crawler.datamodel.CrawlURI;
027import org.archive.crawler.framework.Processor;
028
029/**
030 * A post processor that adds an annotation content-size:<bytes> for each successfully harvested URI.
031 */
032@SuppressWarnings({"serial"})
033public class ContentSizeAnnotationPostProcessor extends Processor {
034
035    /** Prefix associated with annotations made by this processor. */
036    public static final String CONTENT_SIZE_ANNOTATION_PREFIX = "content-size:";
037
038    /**
039     * Constructor.
040     *
041     * @param name the name of the processor.
042     * @see Processor
043     */
044    public ContentSizeAnnotationPostProcessor(String name) {
045        super(name, "A post processor that adds an annotation content-size:<bytes> for each successfully harvested"
046                + " URI.");
047    }
048
049    /**
050     * For each URI with a successful status code (status code > 0), add annotation with content size.
051     *
052     * @param crawlURI URI to add annotation for if successful.
053     * @throws ArgumentNotValid if crawlURI is null.
054     * @throws InterruptedException never.
055     * @see Processor#innerProcess(org.archive.crawler.datamodel.CrawlURI)
056     */
057    protected void innerProcess(CrawlURI crawlURI) throws InterruptedException {
058        if (crawlURI == null) {
059            throw new IllegalArgumentException("The value of the variable 'CrawlURI crawlURI' must not be null.");
060        }
061        if (crawlURI.getFetchStatus() > 0) {
062            crawlURI.addAnnotation(CONTENT_SIZE_ANNOTATION_PREFIX + crawlURI.getContentSize());
063        }
064    }
065
066}