001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.webinterface; 025 026import java.util.HashSet; 027import java.util.Set; 028 029import javax.inject.Provider; 030import javax.servlet.ServletRequest; 031import javax.servlet.jsp.PageContext; 032 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036import dk.netarkivet.common.distribute.indexserver.IndexClientFactory; 037import dk.netarkivet.common.distribute.indexserver.JobIndexCache; 038import dk.netarkivet.common.exceptions.ArgumentNotValid; 039import dk.netarkivet.common.exceptions.ForwardedToErrorPage; 040import dk.netarkivet.common.exceptions.UnknownID; 041import dk.netarkivet.common.utils.I18n; 042import dk.netarkivet.common.utils.Settings; 043import dk.netarkivet.common.webinterface.HTMLUtils; 044import dk.netarkivet.harvester.HarvesterSettings; 045import dk.netarkivet.harvester.datamodel.DomainDAO; 046import dk.netarkivet.harvester.datamodel.FullHarvest; 047import dk.netarkivet.harvester.datamodel.HarvestDefinition; 048import dk.netarkivet.harvester.datamodel.HarvestDefinitionDAO; 049import dk.netarkivet.harvester.datamodel.JobDAO; 050import dk.netarkivet.harvester.datamodel.JobStatus; 051import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory; 052import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO; 053 054/** 055 * Contains utility methods for supporting GUI for updating snapshot harvests. 056 */ 057public class SnapshotHarvestDefinition { 058 //protected static final Log log = LogFactory.getLog(SnapshotHarvestDefinition.class); 059 protected static final Logger log = LoggerFactory.getLogger(SnapshotHarvestDefinition.class); 060 private final Provider<HarvestDefinitionDAO> hdDaoProvider; 061 private final Provider<JobDAO> jobDaoProvider; 062 private final Provider<ExtendedFieldDAO> extendedFieldDAOProvider; 063 private final Provider<DomainDAO> domainDAOProvider; 064 065 public SnapshotHarvestDefinition(Provider<HarvestDefinitionDAO> hdDaoProvider, Provider<JobDAO> jobDaoProvider, 066 Provider<ExtendedFieldDAO> extendedFieldDAOProvider, Provider<DomainDAO> domainDAOProvider) { 067 this.hdDaoProvider = hdDaoProvider; 068 this.jobDaoProvider = jobDaoProvider; 069 this.extendedFieldDAOProvider = extendedFieldDAOProvider; 070 this.domainDAOProvider = domainDAOProvider; 071 } 072 073 public static SnapshotHarvestDefinition createSnapshotHarvestDefinitionWithDefaultDAOs() { 074 return new SnapshotHarvestDefinition(DAOProviderFactory.getHarvestDefinitionDAOProvider(), 075 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 076 DAOProviderFactory.getDomainDAOProvider()); 077 } 078 079 /** 080 * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data 081 * to the updateHarvestDefinition method for processing. If the "update" parameter is not set, this method does 082 * nothing. 083 * <p> 084 * The parameters in the request are defined in Definitions-edit-snapshot-harvest.jsp. 085 * 086 * @param context The context of the web request. 087 * @param i18n Translation information 088 * @throws ForwardedToErrorPage if an error happened that caused a forward to the standard error page, in which case 089 * further JSP processing should be aborted. 090 */ 091 public void processRequest(PageContext context, I18n i18n) { 092 ArgumentNotValid.checkNotNull(context, "PageContext context"); 093 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 094 095 ServletRequest request = context.getRequest(); 096 if (request.getParameter(Constants.UPDATE_PARAM) == null) { 097 return; 098 } 099 100 HTMLUtils.forwardOnEmptyParameter(context, Constants.HARVEST_PARAM); 101 102 String name = request.getParameter(Constants.HARVEST_PARAM); 103 String comments = request.getParameter(Constants.COMMENTS_PARAM); 104 105 long objectLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_OBJECTLIMIT_PARAM, 106 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS); 107 long byteLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_BYTELIMIT_PARAM, 108 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES); 109 long runningtimeLimit = HTMLUtils.parseOptionalLong(context, Constants.JOB_TIMELIMIT_PARAM, 110 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME); 111 112 Long oldHarvestId = HTMLUtils.parseOptionalLong(context, Constants.OLDSNAPSHOT_PARAM, null); 113 114 if (oldHarvestId != null && !hdDaoProvider.get().exists(oldHarvestId)) { 115 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist", 116 oldHarvestId); 117 throw new ForwardedToErrorPage("Old harvestdefinition " + oldHarvestId + " does not exist"); 118 } 119 120 FullHarvest hd; 121 if ((request.getParameter(Constants.CREATENEW_PARAM) != null)) { 122 if (hdDaoProvider.get().getHarvestDefinition(name) != null) { 123 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.definition.0.already.exists", name); 124 throw new ForwardedToErrorPage("Harvest definition '" + name + "' already exists"); 125 } 126 // Note, object/bytelimit set to default values, if not set 127 hd = new FullHarvest(name, comments, oldHarvestId, objectLimit, byteLimit, runningtimeLimit, false, 128 hdDaoProvider, jobDaoProvider, extendedFieldDAOProvider, domainDAOProvider); 129 hd.setActive(false); 130 hdDaoProvider.get().create(hd); 131 } else { 132 hd = (FullHarvest) hdDaoProvider.get().getHarvestDefinition(name); 133 if (hd == null) { 134 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.0.does.not.exist", name); 135 throw new UnknownID("Harvest definition '" + name + "' doesn't exist!"); 136 } 137 long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, Constants.NO_EDITION); 138 139 if (hd.getEdition() != edition) { 140 HTMLUtils.forwardWithRawErrorMessage(context, i18n, "errormsg;harvest.definition.changed.0.retry.1", 141 "<br/><a href=\"Definitions-edit-snapshot-harvest.jsp?" + Constants.HARVEST_PARAM + "=" 142 + HTMLUtils.encodeAndEscapeHTML(name) + "\">", "</a>"); 143 144 throw new ForwardedToErrorPage("Harvest definition '" + name + "' has changed"); 145 } 146 147 // MaxBytes is set to 148 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES 149 // if parameter snapshot_byte_Limit is not defined 150 hd.setMaxBytes(byteLimit); 151 152 // MaxCountObjects is set to 153 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS 154 // if parameter snapshot_object_limit is not defined 155 hd.setMaxCountObjects(objectLimit); 156 157 // MaxJobRunningTime is set to 158 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME 159 // if parameter snapshot_time_limit is not defined 160 hd.setMaxJobRunningTime(runningtimeLimit); 161 162 hd.setPreviousHarvestDefinition(oldHarvestId); 163 hd.setComments(comments); 164 hdDaoProvider.get().update(hd); 165 } 166 } 167 168 /** 169 * Flip the active status of a harvestdefinition named in the "flipactive" parameter. 170 * 171 * @param context The context of the web servlet 172 * @param i18n Translation information 173 * @return True if a harvest definition changed state. 174 */ 175 public boolean flipActive(PageContext context, I18n i18n) { 176 ArgumentNotValid.checkNotNull(context, "PageContext context"); 177 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 178 179 ServletRequest request = context.getRequest(); 180 String flipactive = request.getParameter(Constants.FLIPACTIVE_PARAM); 181 // Change activation if requested 182 if (flipactive != null) { 183 HarvestDefinition hd = hdDaoProvider.get().getHarvestDefinition(flipactive); 184 if (hd != null) { 185 boolean isActive = hd.getActive(); 186 boolean useDeduplication = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED); 187 if (!isActive) { 188 if (hd instanceof FullHarvest) { 189 FullHarvest fhd = (FullHarvest) hd; 190 validatePreviousHd(fhd, context, i18n); 191 if (useDeduplication) { 192 // The client for requesting job index. 193 JobIndexCache jobIndexCache = IndexClientFactory.getDedupCrawllogInstance(); 194 Long harvestId = fhd.getOid(); 195 Set<Long> jobSet = hdDaoProvider.get().getJobIdsForSnapshotDeduplicationIndex(harvestId); 196 jobIndexCache.requestIndex(jobSet, harvestId); 197 } else { 198 // If deduplication disabled set indexReady to true 199 // right now, so the job generation can proceed. 200 fhd.setIndexReady(true); 201 } 202 } else { // hd is not Fullharvest 203 log.warn("Harvestdefinition #" + hd.getOid() + " is not a FullHarvest " + " but a " 204 + hd.getClass().getName()); 205 return false; 206 } 207 } 208 hd.setActive(!hd.getActive()); 209 hdDaoProvider.get().update(hd); 210 return true; 211 } else { 212 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist", 213 flipactive); 214 throw new ForwardedToErrorPage("Harvest definition " + flipactive + " doesn't exist"); 215 } 216 } 217 return false; 218 } 219 220 /** 221 * Validate the previous harvestDefinition of this FullHarvest. The validation checks, that the given hs arguments 222 * represents a completed Fullharvest: Check 1: It has one or more jobs. Check 2: None of the jobs have status 223 * NEW,SUBMITTED, or STARTED. 224 * 225 * @param hd A given FullHarvest 226 * @param context The context of the web request. 227 * @param i18n Translation information 228 */ 229 private void validatePreviousHd(FullHarvest hd, PageContext context, I18n i18n) { 230 HarvestDefinition preHd = hd.getPreviousHarvestDefinition(); 231 if (preHd == null) { 232 return; // no validation needed 233 } 234 235 // This query represents check one 236 HarvestStatusQuery hsq1 = new HarvestStatusQuery(preHd.getOid(), 0); 237 // This query represents check two 238 HarvestStatusQuery hsq2 = new HarvestStatusQuery(preHd.getOid(), 0); 239 // States needed to update the query for check two. 240 Set<JobStatus> chosenStates = new HashSet<JobStatus>(); 241 chosenStates.add(JobStatus.NEW); 242 chosenStates.add(JobStatus.SUBMITTED); 243 chosenStates.add(JobStatus.STARTED); 244 hsq2.setJobStatus(chosenStates); 245 HarvestStatus hs1 = jobDaoProvider.get().getStatusInfo(hsq1); 246 HarvestStatus hs2 = jobDaoProvider.get().getStatusInfo(hsq2); 247 if (hs1.getJobStatusInfo().isEmpty() || !hs2.getJobStatusInfo().isEmpty()) { 248 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.is.based.on." 249 + "unfinished.definition.1", hd.getName(), preHd.getName()); 250 throw new ForwardedToErrorPage("Harvest definition " + hd.getName() + " is based on unfinished definition " 251 + preHd.getName()); 252 } 253 } 254}