001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.webinterface; 025 026import java.util.ArrayList; 027import java.util.Iterator; 028import java.util.LinkedList; 029import java.util.List; 030import java.util.Locale; 031 032import javax.servlet.ServletRequest; 033import javax.servlet.jsp.PageContext; 034 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import dk.netarkivet.common.exceptions.ArgumentNotValid; 039import dk.netarkivet.common.exceptions.ForwardedToErrorPage; 040import dk.netarkivet.common.exceptions.IOFailure; 041import dk.netarkivet.common.utils.DomainUtils; 042import dk.netarkivet.common.utils.I18n; 043import dk.netarkivet.common.webinterface.HTMLUtils; 044import dk.netarkivet.harvester.datamodel.Domain; 045import dk.netarkivet.harvester.datamodel.DomainConfiguration; 046import dk.netarkivet.harvester.datamodel.DomainDAO; 047import dk.netarkivet.harvester.datamodel.NamedUtils; 048import dk.netarkivet.harvester.datamodel.SeedList; 049import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes; 050 051/** 052 * Utility class for handling update of domain from the domain jsp page. 053 */ 054public class DomainDefinition { 055 056 protected static final Logger log = LoggerFactory.getLogger(DomainDefinition.class); 057 058 protected static final String EDIT_DOMAIN_JSP = "/HarvestDefinition/Definitions-edit-domain.jsp?"; 059 060 /** Private constructor to prevent public construction of this class. */ 061 private DomainDefinition() { 062 } 063 064 /** 065 * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data 066 * to the updateDomain method for processing. 067 * <p> 068 * For reference, the parameters for this page look something like 069 * http://localhost:8076/HarvestDefinition/Definitions-edit-domain.jsp? 070 * update=1&name=netarkivet.dk&default=defaultconfig&configName=&order_xml=& 071 * load=&maxObjects=&urlListName=&seedList=+&passwordName=&passwordDomain=& passwordRealm=&userName=&password=& 072 * crawlertraps=%2Fcgi-bin%2F*%0D%0A%2Ftrap%2F*%0D%0A 073 * <p> 074 * update: This method throws an exception if update is not set 075 * <p> 076 * name: must be the name of a known domain 077 * <p> 078 * comments: optional user-entered comments about the domain 079 * <p> 080 * default: the defaultconfig is set to this value. Must be non-null and a known configuration of this domain. 081 * <p> 082 * crawlertraps: a newline-separated list of urls to be ignored. May be empty or null 083 * <p> 084 * alias: If set, this domain is an alias of the set domain renewAlias: If set, the alias date should be renewed 085 * 086 * @param context The context of this request 087 * @param i18n I18n information 088 * @throws IOFailure on updateerrors in the DAO 089 * @throws ForwardedToErrorPage if domain is not found, if the edition is out-of-date, or if parameters are missing 090 * or invalid 091 */ 092 public static void processRequest(PageContext context, I18n i18n) { 093 ArgumentNotValid.checkNotNull(context, "PageContext context"); 094 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 095 096 HTMLUtils.forwardOnEmptyParameter(context, Constants.DOMAIN_PARAM, Constants.DEFAULT_PARAM); 097 ServletRequest request = context.getRequest(); 098 String name = request.getParameter(Constants.DOMAIN_PARAM).trim(); 099 100 if (!DomainDAO.getInstance().exists(name)) { 101 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.domain.0", name); 102 throw new ForwardedToErrorPage("Unknown domain '" + name + "'"); 103 } 104 Domain domain = DomainDAO.getInstance().read(name); 105 106 // check the edition number before updating 107 long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, -1L); 108 109 if (domain.getEdition() != edition) { 110 HTMLUtils.forwardWithRawErrorMessage( 111 context, 112 i18n, 113 "errormsg;domain.definition.changed.0.retry.1", 114 "<br/><a href=\"Definitions-edit-domain.jsp?" + Constants.DOMAIN_PARAM + "=" 115 + HTMLUtils.escapeHtmlValues(HTMLUtils.encode(name)) + "\">", "</a>"); 116 throw new ForwardedToErrorPage("Domain '" + name + "' has changed"); 117 } 118 119 // default configuration 120 String defaultConf = request.getParameter(Constants.DEFAULT_PARAM); 121 if (!domain.hasConfiguration(defaultConf)) { 122 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.default.configuration.0.for.1", 123 defaultConf, name); 124 throw new ForwardedToErrorPage("Unknown default configuration '" + defaultConf + "'"); 125 } 126 127 String crawlertraps = request.getParameter(Constants.CRAWLERTRAPS_PARAM); 128 if (crawlertraps == null) { 129 crawlertraps = ""; 130 } 131 String comments = request.getParameter(Constants.COMMENTS_PARAM); 132 if (comments == null) { 133 comments = ""; 134 } 135 String alias = request.getParameter(Constants.ALIAS_PARAM); 136 if (alias == null) { 137 alias = ""; 138 } 139 140 String aliasRenew = request.getParameter(Constants.RENEW_ALIAS_PARAM); 141 if (aliasRenew == null) { 142 aliasRenew = "no"; 143 } 144 145 boolean renewAlias = aliasRenew.equals("yes"); 146 147 ExtendedFieldValueDefinition.processRequest(context, i18n, domain, ExtendedFieldTypes.DOMAIN); 148 149 updateDomain(domain, defaultConf, crawlertraps, comments, alias, renewAlias); 150 } 151 152 /** 153 * This updates the given domain in the database. 154 * 155 * @param domain the given domain 156 * @param defaultConfig the name of the default configuration 157 * @param crawlertraps the current crawlertraps stated for the domain 158 * @param comments User-defined comments for the domain 159 * @param alias if this is non-null, this domain is an alias of 'alias'. 160 * @param renewAlias true, if alias is to be updated even if it is not changed 161 */ 162 private static void updateDomain(Domain domain, String defaultConfig, String crawlertraps, String comments, 163 String alias, boolean renewAlias) { 164 // Set default configuration 165 domain.setDefaultConfiguration(defaultConfig); 166 domain.setComments(comments); 167 168 // Update crawlertraps 169 List<String> trapList = new ArrayList<String>(); 170 if (crawlertraps.length() > 0) { 171 String[] traps = crawlertraps.split("[\\r\\n]+"); 172 for (String trap : traps) { 173 if (trap.trim().length() > 0) { 174 trapList.add(trap); 175 } 176 } 177 log.debug("Now {} crawlertraps for this domain.", trapList.size()); 178 } else { 179 log.debug("No crawlertraps for this domain."); 180 } 181 domain.setCrawlerTraps(trapList, true); // Note that exception is thrówn if any of the crawlertraps is not a valid regexp 182 183 // Update alias information 184 185 // If alias is empty string, do not regard this domain as an alias any 186 // more. 187 // If alias is not-empty, update only if alias is different from 188 // oldAlias or 189 // This only updates alias if it is required: See javadoc below for 190 // needToUpdateAlias() 191 String oldAlias = null; 192 if (domain.getAliasInfo() != null) { 193 oldAlias = domain.getAliasInfo().getAliasOf(); 194 } 195 String newAlias; 196 // If alias is empty string, this domain is or should not be an alias. 197 198 if (alias.trim().isEmpty()) { 199 newAlias = null; 200 } else { 201 newAlias = alias.trim(); 202 } 203 204 if (needToUpdateAlias(oldAlias, newAlias, renewAlias)) { 205 domain.updateAlias(newAlias); 206 } 207 208 DomainDAO.getInstance().update(domain); 209 } 210 211 /** 212 * Define the cases where we want to update the alias information. 1. The alias information is updated, if the new 213 * alias is null, and the old alias is different from null 2. The alias information is updated, if the new alias is 214 * different from null, and old alias is null 3. The alias information is updated, if the new alias is different 215 * from null, and the old alias is different from null, and they are not either not equal, or renewAlias is true 216 * 217 * @param oldAlias the old alias (could be null) 218 * @param newAlias the new alias (could be null) 219 * @param renewAlias should we renew alias, if the alias is unchanged? 220 * @return true, if we want to update the alias information, false otherwise 221 */ 222 private static boolean needToUpdateAlias(String oldAlias, String newAlias, boolean renewAlias) { 223 boolean needToUpdate = false; 224 // If new alias is null: update if old alias is different from null 225 if (newAlias == null) { 226 if (oldAlias != null) { 227 needToUpdate = true; 228 } 229 } else { // newAlias is not null 230 if (oldAlias == null) { 231 needToUpdate = true; 232 } else { 233 if (oldAlias.equals(newAlias)) { 234 if (renewAlias) { 235 needToUpdate = true; 236 } 237 } else { 238 needToUpdate = true; 239 } 240 } 241 } 242 return needToUpdate; 243 } 244 245 /** 246 * Creates domains with default attributes. 247 * 248 * @param domains a list of domain names 249 * @return List of the non-empty domain names that were not legal domain names or already exist. 250 */ 251 public static List<String> createDomains(String... domains) { 252 DomainDAO ddao = DomainDAO.getInstance(); 253 List<String> illegalOrExisting = new ArrayList<String>(); 254 List<Domain> domainsToCreate = new ArrayList<Domain>(); 255 for (String domain : domains) { 256 if (DomainUtils.isValidDomainName(domain) && !ddao.exists(domain)) { 257 domainsToCreate.add(Domain.getDefaultDomain(domain)); 258 } else { 259 if (domain.trim().length() > 0) { 260 illegalOrExisting.add(domain); 261 } 262 } 263 } 264 265 log.info("Creating {} new domains", domainsToCreate.size()); 266 ddao.create(domainsToCreate); 267 268 return illegalOrExisting; 269 } 270 271 /** 272 * Creates a link to the domain edit page. 273 * 274 * @param domain The domain to show with a link 275 * @return HTML code with the link and the domain name shown 276 */ 277 public static String makeDomainLink(String domain) { 278 ArgumentNotValid.checkNotNullOrEmpty(domain, "domain"); 279 String url = EDIT_DOMAIN_JSP + Constants.DOMAIN_PARAM + "=" 280 + HTMLUtils.encode(domain); 281 return "<a href=\"" + url + "\">" + HTMLUtils.escapeHtmlValues(domain) + "</a>"; 282 } 283 284 /** 285 * Creates a url based on the supplied request where all the parameters are the same, except the 286 * <code>ShowUnusedConfigurations</code> boolean, which is flipped. 287 * 288 * @param request The original 'create domain' request to based the new url on. 289 * @return The new url with the <code>ShowUnusedConfigurations</code> boolean switched. 290 */ 291 public static String createDomainUrlWithFlippedShowConfigurations(ServletRequest request) { 292 boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request 293 .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM)); 294 boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM)); 295 StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP); 296 urlBuilder 297 .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM))); 298 urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "=" 299 + Boolean.toString(!showUnusedConfigurationsParam)); 300 urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(showUnusedSeedsParam)); 301 return urlBuilder.toString(); 302 } 303 304 /** 305 * Creates a url based on the supplied request where all the parameters are the same, except the 306 * <code>ShowUnusedSeedLists</code> boolean, which is flipped. 307 * 308 * @param request The original 'create domain' request to based the new url on. 309 * @return The new url with the <code>ShowUnusedSeedLists</code> boolean switched. 310 */ 311 public static String createDomainUrlWithFlippedShowSeeds(ServletRequest request) { 312 boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request 313 .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM)); 314 boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM)); 315 StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP); 316 urlBuilder 317 .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM))); 318 urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "=" 319 + Boolean.toString(showUnusedConfigurationsParam)); 320 urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(!showUnusedSeedsParam)); 321 return urlBuilder.toString(); 322 } 323 324 /** 325 * Search for domains matching the following criteria. 326 * TODO Should we allow more than one criteria? 327 * TODO use Enum instead for searchType 328 * 329 * @param context the context of the JSP page calling 330 * @param i18n The translation properties file used 331 * @param searchQuery The given searchQuery for searching for among the domains. 332 * @param searchType The given searchCriteria 333 * @return the set of domain-names matching the given criteria. 334 */ 335 public static List<String> getDomains(PageContext context, I18n i18n, String searchQuery, String searchType) { 336 List<String> resultSet = new ArrayList<String>(); 337 ArgumentNotValid.checkNotNullOrEmpty(searchQuery, "String searchQuery"); 338 ArgumentNotValid.checkNotNullOrEmpty(searchType, "String searchType"); 339 340 try { 341 DomainSearchType.parse(searchType); 342 } catch (ArgumentNotValid e) { 343 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;invalid.domain.search.criteria.0", searchType); 344 throw new ForwardedToErrorPage("Unknown domain search criteria '" + searchType + "'"); 345 } 346 347 log.debug("SearchQuery '" + searchQuery + "', searchType: " + searchType); 348 resultSet = DomainDAO.getInstance().getDomains(searchQuery, searchType); 349 return resultSet; 350 } 351 352 /** 353 * Returns the list of domain configurations which are either used in a concrete harvest or is a 'default 354 * configuration'. 355 * <p> 356 * The list is sorted alphabetically by name according to the supplied locale. 357 * 358 * @param domain The domain to find the used configurations for. 359 * @param locale The locale to base the sorting on 360 * @return A sorted list of used configurations for the supplied domain. 361 */ 362 public static List<DomainConfiguration> getUsedConfiguration(Domain domain, Locale locale) { 363 List<Long> usedConfigurationIDs = DomainDAO.getInstance().findUsedConfigurations(domain.getID()); 364 List<DomainConfiguration> usedConfigurations = new LinkedList<DomainConfiguration>(); 365 366 for (DomainConfiguration configuration : domain.getAllConfigurationsAsSortedList(locale)) { 367 if (usedConfigurationIDs.contains(new Long(configuration.getID())) 368 || configuration.getID() == domain.getDefaultConfiguration().getID()) { 369 usedConfigurations.add(configuration); 370 } 371 } 372 373 NamedUtils.sortNamedObjectList(locale, usedConfigurations); 374 return usedConfigurations; 375 } 376 377 /** 378 * Returns the seed lists associated with the supplied configurations. 379 * 380 * @param configurations The configurations to find seed lists for 381 * @return The seed lists used in the supplied configurations. 382 */ 383 public static List<SeedList> getSeedLists(List<DomainConfiguration> configurations) { 384 List<SeedList> seedsLists = new LinkedList<SeedList>(); 385 for (DomainConfiguration configuration : configurations) { 386 Iterator<SeedList> seedListIterator = configuration.getSeedLists(); 387 while (seedListIterator.hasNext()) { 388 SeedList seedList = seedListIterator.next(); 389 if (!seedsLists.contains(seedList)) { 390 seedsLists.add(seedList); 391 } 392 } 393 } 394 395 return seedsLists; 396 } 397}