001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.harvesting.frontier; 024 025import dk.netarkivet.common.exceptions.ArgumentNotValid; 026import dk.netarkivet.harvester.harvesting.frontier.FullFrontierReport.ReportIterator; 027 028/** 029 * Filters the N active queues (i.e. not exhausted or retired) with the biggest totalEnqueues values. The size of the 030 * list is defined by the setting property 031 */ 032public class TopTotalEnqueuesFilter extends MaxSizeFrontierReportExtract { 033 034 @Override 035 public InMemoryFrontierReport process(FrontierReport initialFrontier) { 036 if (!(initialFrontier instanceof FullFrontierReport)) { 037 throw new ArgumentNotValid(getClass().getSimpleName() + " operates only on " 038 + FullFrontierReport.class.getSimpleName() + ", not: " 039 + initialFrontier.getClass().getSimpleName()); 040 } 041 042 FullFrontierReport full = (FullFrontierReport) initialFrontier; 043 044 InMemoryFrontierReport topRep = new InMemoryFrontierReport(initialFrontier.getJobName()); 045 046 ReportIterator iter = full.iterateOnTotalEnqueues(); 047 try { 048 int addedLines = 0; 049 int howMany = getMaxSize(); 050 while (addedLines < howMany) { 051 if (!iter.hasNext()) { 052 break; // No more values, break loop 053 } 054 055 FrontierReportLine fetch = iter.next(); 056 long totalBudget = fetch.getTotalBudget(); 057 058 // Add only lines that are neither retired or exhausted 059 if (fetch.getCurrentSize() > 0 060 && fetch.getSessionBalance() > 0 061 && (totalBudget == Constants.HERITRIX_MAXOBJECTS_INFINITY || fetch.getTotalSpend() < totalBudget)) { 062 topRep.addLine(new FrontierReportLine(fetch)); 063 addedLines++; 064 } 065 } 066 067 } finally { 068 if (iter != null) { 069 iter.close(); 070 } 071 } 072 073 return topRep; 074 } 075 076}