001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.webinterface; 025 026import java.sql.SQLException; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Set; 030 031import javax.inject.Provider; 032import javax.servlet.ServletRequest; 033import javax.servlet.jsp.PageContext; 034 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import com.antiaction.raptor.dao.AttributeBase; 039import com.antiaction.raptor.dao.AttributeTypeBase; 040 041import dk.netarkivet.common.distribute.indexserver.IndexClientFactory; 042import dk.netarkivet.common.distribute.indexserver.JobIndexCache; 043import dk.netarkivet.common.exceptions.ArgumentNotValid; 044import dk.netarkivet.common.exceptions.ForwardedToErrorPage; 045import dk.netarkivet.common.exceptions.UnknownID; 046import dk.netarkivet.common.utils.I18n; 047import dk.netarkivet.common.utils.Settings; 048import dk.netarkivet.common.webinterface.HTMLUtils; 049import dk.netarkivet.harvester.HarvesterSettings; 050import dk.netarkivet.harvester.datamodel.DomainDAO; 051import dk.netarkivet.harvester.datamodel.FullHarvest; 052import dk.netarkivet.harvester.datamodel.HarvestDefinition; 053import dk.netarkivet.harvester.datamodel.HarvestDefinitionDAO; 054import dk.netarkivet.harvester.datamodel.JobDAO; 055import dk.netarkivet.harvester.datamodel.JobStatus; 056import dk.netarkivet.harvester.datamodel.JobStatusInfo; 057import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory; 058import dk.netarkivet.harvester.datamodel.eav.EAV; 059import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType; 060import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO; 061 062/** 063 * Contains utility methods for supporting GUI for updating snapshot harvests. 064 */ 065public class SnapshotHarvestDefinition { 066 protected static final Logger log = LoggerFactory.getLogger(SnapshotHarvestDefinition.class); 067 private final Provider<HarvestDefinitionDAO> hdDaoProvider; 068 private final Provider<JobDAO> jobDaoProvider; 069 private final Provider<ExtendedFieldDAO> extendedFieldDAOProvider; 070 private final Provider<DomainDAO> domainDAOProvider; 071 private final Provider<EAV> eavDAOProvider; 072 073 /** 074 * Constructor. 075 * @param hdDaoProvider Provider for HarvestDefinitions 076 * @param jobDaoProvider Provider for Jobs 077 * @param extendedFieldDAOProvider Provider ExtendedFields 078 * @param domainDAOProvider Provider for Domains 079 */ 080 public SnapshotHarvestDefinition(Provider<HarvestDefinitionDAO> hdDaoProvider, Provider<JobDAO> jobDaoProvider, 081 Provider<ExtendedFieldDAO> extendedFieldDAOProvider, Provider<DomainDAO> domainDAOProvider, Provider<EAV> eavDAOProvider) { 082 this.hdDaoProvider = hdDaoProvider; 083 this.jobDaoProvider = jobDaoProvider; 084 this.extendedFieldDAOProvider = extendedFieldDAOProvider; 085 this.domainDAOProvider = domainDAOProvider; 086 this.eavDAOProvider = eavDAOProvider; 087 } 088 089 /** 090 * 091 * @return a default SnapshotHarvestDefinition 092 */ 093 public static SnapshotHarvestDefinition createSnapshotHarvestDefinitionWithDefaultDAOs() { 094 return new SnapshotHarvestDefinition(DAOProviderFactory.getHarvestDefinitionDAOProvider(), 095 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 096 DAOProviderFactory.getDomainDAOProvider(), DAOProviderFactory.getEAVDAOProvider()); 097 } 098 099 /** 100 * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data 101 * to the updateHarvestDefinition method for processing. If the "update" parameter is not set, this method does 102 * nothing. 103 * <p> 104 * The parameters in the request are defined in Definitions-edit-snapshot-harvest.jsp. 105 * 106 * @param context The context of the web request. 107 * @param i18n Translation information 108 * @throws ForwardedToErrorPage if an error happened that caused a forward to the standard error page, in which case 109 * further JSP processing should be aborted. 110 */ 111 public void processRequest(PageContext context, I18n i18n) { 112 ArgumentNotValid.checkNotNull(context, "PageContext context"); 113 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 114 115 ServletRequest request = context.getRequest(); 116 if (request.getParameter(Constants.UPDATE_PARAM) == null) { 117 return; 118 } 119 120 HTMLUtils.forwardOnEmptyParameter(context, Constants.HARVEST_PARAM); 121 122 String oldname = request.getParameter(Constants.HARVEST_OLD_PARAM); 123 if (oldname == null) { 124 oldname = ""; 125 } 126 String name = request.getParameter(Constants.HARVEST_PARAM); 127 String comments = request.getParameter(Constants.COMMENTS_PARAM); 128 129 long objectLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_OBJECTLIMIT_PARAM, 130 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS); 131 long byteLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_BYTELIMIT_PARAM, 132 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES); 133 long runningtimeLimit = HTMLUtils.parseOptionalLong(context, Constants.JOB_TIMELIMIT_PARAM, 134 dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME); 135 136 Long oldHarvestId = HTMLUtils.parseOptionalLong(context, Constants.OLDSNAPSHOT_PARAM, null); 137 138 if (oldHarvestId != null && !hdDaoProvider.get().exists(oldHarvestId)) { 139 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist", 140 oldHarvestId); 141 throw new ForwardedToErrorPage("Old harvestdefinition " + oldHarvestId + " does not exist"); 142 } 143 144 FullHarvest hd; 145 if ((request.getParameter(Constants.CREATENEW_PARAM) != null)) { 146 if (hdDaoProvider.get().getHarvestDefinition(name) != null) { 147 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.definition.0.already.exists", name); 148 throw new ForwardedToErrorPage("Harvest definition '" + name + "' already exists"); 149 } 150 // Note, object/bytelimit set to default values, if not set 151 hd = new FullHarvest(name, comments, oldHarvestId, objectLimit, byteLimit, runningtimeLimit, false, 152 hdDaoProvider, jobDaoProvider, extendedFieldDAOProvider, domainDAOProvider); 153 hd.setActive(false); 154 hdDaoProvider.get().create(hd); 155 } else { 156 if (oldname.equals(name)) { // name is unchanged 157 hd = (FullHarvest) hdDaoProvider.get().getHarvestDefinition(name); 158 } else { 159 // test that the name does not exist already 160 if (hdDaoProvider.get().exists(name)) { 161 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.definition.0.already.exists", name); 162 throw new ForwardedToErrorPage("Harvest definition '" + name + "' already exists"); 163 } else { 164 hd = (FullHarvest) hdDaoProvider.get().getHarvestDefinition(oldname); 165 hd.setName(name); 166 } 167 } 168 if (hd == null) { 169 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.0.does.not.exist", name); 170 throw new UnknownID("Harvest definition '" + name + "' doesn't exist!"); 171 } 172 long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, Constants.NO_EDITION); 173 174 if (hd.getEdition() != edition) { 175 HTMLUtils.forwardWithRawErrorMessage(context, i18n, "errormsg;harvest.definition.changed.0.retry.1", 176 "<br/><a href=\"Definitions-edit-snapshot-harvest.jsp?" + Constants.HARVEST_PARAM + "=" 177 + HTMLUtils.encodeAndEscapeHTML(name) + "\">", "</a>"); 178 179 throw new ForwardedToErrorPage("Harvest definition '" + name + "' has changed"); 180 } 181 182 // MaxBytes is set to 183 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES 184 // if parameter snapshot_byte_Limit is not defined 185 hd.setMaxBytes(byteLimit); 186 187 // MaxCountObjects is set to 188 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS 189 // if parameter snapshot_object_limit is not defined 190 hd.setMaxCountObjects(objectLimit); 191 192 // MaxJobRunningTime is set to 193 // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME 194 // if parameter snapshot_time_limit is not defined 195 hd.setMaxJobRunningTime(runningtimeLimit); 196 197 hd.setPreviousHarvestDefinition(oldHarvestId); 198 hd.setComments(comments); 199 hdDaoProvider.get().update(hd); 200 } 201 202 // EAV 203 try { 204 Long entity_id = hd.getOid(); 205 if (entity_id == null) { 206 entity_id = 0L; 207 } 208 EAV eav = eavDAOProvider.get(); 209 List<AttributeAndType> attributesAndTypes = eav.getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)((long)entity_id)); 210 AttributeAndType attributeAndType; 211 AttributeTypeBase attributeType; 212 AttributeBase attribute; 213 for (int i=0; i<attributesAndTypes.size(); ++i) { 214 attributeAndType = attributesAndTypes.get(i); 215 attributeType = attributeAndType.attributeType; 216 attribute = attributeAndType.attribute; 217 if (attribute == null) { 218 attribute = attributeType.instanceOf(); 219 attribute.entity_id = (int)((long)entity_id); 220 } 221 switch (attributeType.viewtype) { 222 case 1: 223 long l = HTMLUtils.parseOptionalLong(context, attributeType.name, (long)attributeType.def_int); 224 attribute.setInteger((int)l); 225 break; 226 case 5: 227 case 6: 228 String paramValue = context.getRequest().getParameter(attributeType.name); 229 int intVal = 0; 230 if (paramValue != null && !"0".equals(paramValue)) { 231 intVal = 1; 232 } 233 attribute.setInteger(intVal); 234 break; 235 } 236 eav.saveAttribute(attribute); 237 } 238 } catch (SQLException e) { 239 throw new RuntimeException("Unable to store EAV data!", e); 240 } 241 } 242 243 244 /** 245 * Flip the active status of a harvestdefinition named in the "flipactive" parameter. 246 * 247 * @param context The context of the web servlet 248 * @param i18n Translation information 249 * @return True if a harvest definition changed state. 250 */ 251 public boolean flipActive(PageContext context, I18n i18n) { 252 ArgumentNotValid.checkNotNull(context, "PageContext context"); 253 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 254 255 ServletRequest request = context.getRequest(); 256 String flipactive = request.getParameter(Constants.FLIPACTIVE_PARAM); 257 // Change activation if requested 258 if (flipactive != null) { 259 HarvestDefinition hd = hdDaoProvider.get().getHarvestDefinition(flipactive); 260 if (hd != null) { 261 boolean isActive = hd.getActive(); 262 boolean useDeduplication = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED); 263 if (!isActive) { 264 if (hd instanceof FullHarvest) { 265 FullHarvest fhd = (FullHarvest) hd; 266 validatePreviousHd(fhd, context, i18n); 267 if (useDeduplication) { 268 // The client for requesting job index. 269 JobIndexCache jobIndexCache = IndexClientFactory.getDedupCrawllogInstance(); 270 Long harvestId = fhd.getOid(); 271 Set<Long> jobSet = hdDaoProvider.get().getJobIdsForSnapshotDeduplicationIndex(harvestId); 272 jobIndexCache.requestIndex(jobSet, harvestId); 273 } else { 274 // If deduplication disabled set indexReady to true 275 // right now, so the job generation can proceed. 276 fhd.setIndexReady(true); 277 } 278 } else { // hd is not Fullharvest 279 log.warn("Harvestdefinition #" + hd.getOid() + " is not a FullHarvest " + " but a " 280 + hd.getClass().getName()); 281 return false; 282 } 283 } 284 hd.setActive(!hd.getActive()); 285 hdDaoProvider.get().update(hd); 286 return true; 287 } else { 288 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist", 289 flipactive); 290 throw new ForwardedToErrorPage("Harvest definition " + flipactive + " doesn't exist"); 291 } 292 } 293 return false; 294 } 295 296 /** 297 * Validate the previous harvestDefinition of this FullHarvest. The validation checks, that the given hs arguments 298 * represents a completed Fullharvest: Check 1: It has one or more jobs. Check 2: None of the jobs have status 299 * NEW,SUBMITTED, or STARTED. 300 * 301 * @param hd A given FullHarvest 302 * @param context The context of the web request. 303 * @param i18n Translation information 304 */ 305 private void validatePreviousHd(FullHarvest hd, PageContext context, I18n i18n) { 306 HarvestDefinition preHd = hd.getPreviousHarvestDefinition(); 307 if (preHd == null) { 308 return; // no validation needed 309 } 310 311 // This query represents check one 312 HarvestStatusQuery hsq1 = new HarvestStatusQuery(preHd.getOid(), 1); 313 // This query represents check two 314 HarvestStatusQuery hsq2 = new HarvestStatusQuery(preHd.getOid(), 1); 315 // States needed to update the query for check two. 316 Set<JobStatus> chosenStates = new HashSet<JobStatus>(); 317 chosenStates.add(JobStatus.NEW); 318 chosenStates.add(JobStatus.SUBMITTED); 319 chosenStates.add(JobStatus.STARTED); 320 hsq2.setJobStatus(chosenStates); 321 HarvestStatus hs1 = jobDaoProvider.get().getStatusInfo(hsq1); 322 HarvestStatus hs2 = jobDaoProvider.get().getStatusInfo(hsq2); 323 if (hs1.getJobStatusInfo().isEmpty() || !hs2.getJobStatusInfo().isEmpty()) { 324 if (hs1.getJobStatusInfo().isEmpty()) { 325 log.debug("Cannot base snapshot job on old job, because no jobs generated for " + preHd.getName()); 326 } 327 if (!hs2.getJobStatusInfo().isEmpty()) { 328 for (JobStatusInfo jobStatusInfo: hs2.getJobStatusInfo()) { 329 log.debug("Cannot activate new jobs for {} because found job {} in state {}.", hd.getName(), jobStatusInfo.getJobID(),jobStatusInfo.getStatus().name()); 330 } 331 } 332 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.is.based.on." 333 + "unfinished.definition.1", hd.getName(), preHd.getName()); 334 throw new ForwardedToErrorPage("Harvest definition " + hd.getName() + " is based on unfinished definition " 335 + preHd.getName()); 336 } 337 } 338}