001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.webinterface; 025 026import java.io.IOException; 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.Set; 030import java.util.HashSet; 031 032import java.util.Iterator; 033import java.util.LinkedList; 034import java.util.List; 035import java.util.Locale; 036 037import javax.servlet.ServletException; 038import javax.servlet.ServletRequest; 039import javax.servlet.http.HttpServletRequest; 040import javax.servlet.http.HttpServletResponse; 041import javax.servlet.jsp.PageContext; 042import javax.servlet.RequestDispatcher; 043 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import dk.netarkivet.common.exceptions.ArgumentNotValid; 048import dk.netarkivet.common.exceptions.ForwardedToErrorPage; 049import dk.netarkivet.common.exceptions.IOFailure; 050import dk.netarkivet.common.utils.DomainUtils; 051import dk.netarkivet.common.utils.I18n; 052import dk.netarkivet.common.webinterface.HTMLUtils; 053import dk.netarkivet.harvester.datamodel.Domain; 054import dk.netarkivet.harvester.datamodel.DomainConfiguration; 055import dk.netarkivet.harvester.datamodel.DomainDAO; 056import dk.netarkivet.harvester.datamodel.NamedUtils; 057import dk.netarkivet.harvester.datamodel.SeedList; 058import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes; 059 060/** 061 * Utility class for handling update of domain from the domain jsp page. 062 */ 063public class DomainDefinition { 064 065 protected static final Logger log = LoggerFactory.getLogger(DomainDefinition.class); 066 067 protected static final String EDIT_DOMAIN_JSP = "/HarvestDefinition/Definitions-edit-domain.jsp?"; 068 069 /** Private constructor to prevent public construction of this class. */ 070 private DomainDefinition() { 071 } 072 073 /** 074 * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data 075 * to the updateDomain method for processing. 076 * <p> 077 * For reference, the parameters for this page look something like 078 * http://localhost:8076/HarvestDefinition/Definitions-edit-domain.jsp? 079 * update=1&name=netarkivet.dk&default=defaultconfig&configName=&order_xml=& 080 * load=&maxObjects=&urlListName=&seedList=+&passwordName=&passwordDomain=& passwordRealm=&userName=&password=& 081 * crawlertraps=%2Fcgi-bin%2F*%0D%0A%2Ftrap%2F*%0D%0A 082 * <p> 083 * update: This method throws an exception if update is not set 084 * <p> 085 * name: must be the name of a known domain 086 * <p> 087 * comments: optional user-entered comments about the domain 088 * <p> 089 * default: the defaultconfig is set to this value. Must be non-null and a known configuration of this domain. 090 * <p> 091 * crawlertraps: a newline-separated list of urls to be ignored. May be empty or null 092 * <p> 093 * alias: If set, this domain is an alias of the set domain renewAlias: If set, the alias date should be renewed 094 * 095 * @param context The context of this request 096 * @param i18n I18n information 097 * @throws IOFailure on updateerrors in the DAO 098 * @throws ForwardedToErrorPage if domain is not found, if the edition is out-of-date, or if parameters are missing 099 * or invalid 100 */ 101 public static void processRequest(PageContext context, I18n i18n) { 102 ArgumentNotValid.checkNotNull(context, "PageContext context"); 103 ArgumentNotValid.checkNotNull(i18n, "I18n i18n"); 104 105 HTMLUtils.forwardOnEmptyParameter(context, Constants.DOMAIN_PARAM, Constants.DEFAULT_PARAM); 106 ServletRequest request = context.getRequest(); 107 String name = request.getParameter(Constants.DOMAIN_PARAM).trim(); 108 109 if (!DomainDAO.getInstance().exists(name)) { 110 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.domain.0", name); 111 throw new ForwardedToErrorPage("Unknown domain '" + name + "'"); 112 } 113 Domain domain = DomainDAO.getInstance().read(name); 114 115 // check the edition number before updating 116 long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, -1L); 117 118 if (domain.getEdition() != edition) { 119 HTMLUtils.forwardWithRawErrorMessage( 120 context, 121 i18n, 122 "errormsg;domain.definition.changed.0.retry.1", 123 "<br/><a href=\"Definitions-edit-domain.jsp?" + Constants.DOMAIN_PARAM + "=" 124 + HTMLUtils.escapeHtmlValues(HTMLUtils.encode(name)) + "\">", "</a>"); 125 throw new ForwardedToErrorPage("Domain '" + name + "' has changed"); 126 } 127 128 // default configuration 129 String defaultConf = request.getParameter(Constants.DEFAULT_PARAM); 130 if (!domain.hasConfiguration(defaultConf)) { 131 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.default.configuration.0.for.1", 132 defaultConf, name); 133 throw new ForwardedToErrorPage("Unknown default configuration '" + defaultConf + "'"); 134 } 135 136 String crawlertraps = request.getParameter(Constants.CRAWLERTRAPS_PARAM); 137 if (crawlertraps == null) { 138 crawlertraps = ""; 139 } 140 String comments = request.getParameter(Constants.COMMENTS_PARAM); 141 if (comments == null) { 142 comments = ""; 143 } 144 String alias = request.getParameter(Constants.ALIAS_PARAM); 145 if (alias == null) { 146 alias = ""; 147 } 148 149 String aliasRenew = request.getParameter(Constants.RENEW_ALIAS_PARAM); 150 if (aliasRenew == null) { 151 aliasRenew = "no"; 152 } 153 154 boolean renewAlias = aliasRenew.equals("yes"); 155 156 ExtendedFieldValueDefinition.processRequest(context, i18n, domain, ExtendedFieldTypes.DOMAIN); 157 158 updateDomain(domain, defaultConf, crawlertraps, comments, alias, renewAlias); 159 } 160 161 /** 162 * This updates the given domain in the database. 163 * 164 * @param domain the given domain 165 * @param defaultConfig the name of the default configuration 166 * @param crawlertraps the current crawlertraps stated for the domain 167 * @param comments User-defined comments for the domain 168 * @param alias if this is non-null, this domain is an alias of 'alias'. 169 * @param renewAlias true, if alias is to be updated even if it is not changed 170 */ 171 private static void updateDomain(Domain domain, String defaultConfig, String crawlertraps, String comments, 172 String alias, boolean renewAlias) { 173 // Set default configuration 174 domain.setDefaultConfiguration(defaultConfig); 175 domain.setComments(comments); 176 177 // Update crawlertraps 178 List<String> trapList = new ArrayList<String>(); 179 if (crawlertraps.length() > 0) { 180 String[] traps = crawlertraps.split("[\\r\\n]+"); 181 for (String trap : traps) { 182 if (trap.trim().length() > 0) { 183 trapList.add(trap); 184 } 185 } 186 log.debug("Now {} crawlertraps for this domain.", trapList.size()); 187 } else { 188 log.debug("No crawlertraps for this domain."); 189 } 190 domain.setCrawlerTraps(trapList, true); // Note that exception is thrówn if any of the crawlertraps is not a valid regexp 191 192 // Update alias information 193 194 // If alias is empty string, do not regard this domain as an alias any 195 // more. 196 // If alias is not-empty, update only if alias is different from 197 // oldAlias or 198 // This only updates alias if it is required: See javadoc below for 199 // needToUpdateAlias() 200 String oldAlias = null; 201 if (domain.getAliasInfo() != null) { 202 oldAlias = domain.getAliasInfo().getAliasOf(); 203 } 204 String newAlias; 205 // If alias is empty string, this domain is or should not be an alias. 206 207 if (alias.trim().isEmpty()) { 208 newAlias = null; 209 } else { 210 newAlias = alias.trim(); 211 } 212 213 if (needToUpdateAlias(oldAlias, newAlias, renewAlias)) { 214 domain.updateAlias(newAlias); 215 } 216 217 DomainDAO.getInstance().update(domain); 218 } 219 220 /** 221 * Define the cases where we want to update the alias information. 1. The alias information is updated, if the new 222 * alias is null, and the old alias is different from null 2. The alias information is updated, if the new alias is 223 * different from null, and old alias is null 3. The alias information is updated, if the new alias is different 224 * from null, and the old alias is different from null, and they are not either not equal, or renewAlias is true 225 * 226 * @param oldAlias the old alias (could be null) 227 * @param newAlias the new alias (could be null) 228 * @param renewAlias should we renew alias, if the alias is unchanged? 229 * @return true, if we want to update the alias information, false otherwise 230 */ 231 private static boolean needToUpdateAlias(String oldAlias, String newAlias, boolean renewAlias) { 232 boolean needToUpdate = false; 233 // If new alias is null: update if old alias is different from null 234 if (newAlias == null) { 235 if (oldAlias != null) { 236 needToUpdate = true; 237 } 238 } else { // newAlias is not null 239 if (oldAlias == null) { 240 needToUpdate = true; 241 } else { 242 if (oldAlias.equals(newAlias)) { 243 if (renewAlias) { 244 needToUpdate = true; 245 } 246 } else { 247 needToUpdate = true; 248 } 249 } 250 } 251 return needToUpdate; 252 } 253 254 /** 255 * Creates domains with default attributes. 256 * 257 * @param domains a list of domain names 258 * @return List of the non-empty domain names that were not legal domain names or already exist. 259 */ 260 public static List<String> createDomains(String... domains) { 261 DomainDAO ddao = DomainDAO.getInstance(); 262 List<String> illegalOrExisting = new ArrayList<String>(); 263 List<Domain> domainsToCreate = new ArrayList<Domain>(); 264 for (String domain : domains) { 265 if (DomainUtils.isValidDomainName(domain) && !ddao.exists(domain)) { 266 domainsToCreate.add(Domain.getDefaultDomain(domain)); 267 } else { 268 if (domain.trim().length() > 0) { 269 illegalOrExisting.add(domain); 270 } 271 } 272 } 273 274 log.info("Creating {} new domains", domainsToCreate.size()); 275 ddao.create(domainsToCreate); 276 277 return illegalOrExisting; 278 } 279 280 /** 281 * Creates a link to the domain edit page. 282 * 283 * @param domain The domain to show with a link 284 * @return HTML code with the link and the domain name shown 285 */ 286 public static String makeDomainLink(String domain) { 287 ArgumentNotValid.checkNotNullOrEmpty(domain, "domain"); 288 String url = EDIT_DOMAIN_JSP + Constants.DOMAIN_PARAM + "=" 289 + HTMLUtils.encode(domain); 290 return "<a href=\"" + url + "\">" + HTMLUtils.escapeHtmlValues(domain) + "</a>"; 291 } 292 293 /** 294 * Creates a url based on the supplied request where all the parameters are the same, except the 295 * <code>ShowUnusedConfigurations</code> boolean, which is flipped. 296 * 297 * @param request The original 'create domain' request to based the new url on. 298 * @return The new url with the <code>ShowUnusedConfigurations</code> boolean switched. 299 */ 300 public static String createDomainUrlWithFlippedShowConfigurations(ServletRequest request) { 301 boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request 302 .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM)); 303 boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM)); 304 StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP); 305 urlBuilder 306 .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM))); 307 urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "=" 308 + Boolean.toString(!showUnusedConfigurationsParam)); 309 urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(showUnusedSeedsParam)); 310 return urlBuilder.toString(); 311 } 312 313 /** 314 * Creates a url based on the supplied request where all the parameters are the same, except the 315 * <code>ShowUnusedSeedLists</code> boolean, which is flipped. 316 * 317 * @param request The original 'create domain' request to based the new url on. 318 * @return The new url with the <code>ShowUnusedSeedLists</code> boolean switched. 319 */ 320 public static String createDomainUrlWithFlippedShowSeeds(ServletRequest request) { 321 boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request 322 .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM)); 323 boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM)); 324 StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP); 325 urlBuilder 326 .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM))); 327 urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "=" 328 + Boolean.toString(showUnusedConfigurationsParam)); 329 urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(!showUnusedSeedsParam)); 330 return urlBuilder.toString(); 331 } 332 333 /** 334 * Search for domains matching the following criteria. 335 * TODO Should we allow more than one criteria? 336 * TODO use Enum instead for searchType 337 * 338 * @param context the context of the JSP page calling 339 * @param i18n The translation properties file used 340 * @param searchQuery The given searchQuery for searching for among the domains. 341 * @param searchType The given searchCriteria 342 * @return the set of domain-names matching the given criteria. 343 */ 344 public static List<String> getDomains(PageContext context, I18n i18n, String searchQuery, String searchType) { 345 List<String> resultSet = new ArrayList<String>(); 346 ArgumentNotValid.checkNotNullOrEmpty(searchQuery, "String searchQuery"); 347 ArgumentNotValid.checkNotNullOrEmpty(searchType, "String searchType"); 348 349 try { 350 DomainSearchType.parse(searchType); 351 } catch (ArgumentNotValid e) { 352 HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;invalid.domain.search.criteria.0", searchType); 353 throw new ForwardedToErrorPage("Unknown domain search criteria '" + searchType + "'"); 354 } 355 356 log.debug("SearchQuery '" + searchQuery + "', searchType: " + searchType); 357 resultSet = DomainDAO.getInstance().getDomains(searchQuery, searchType); 358 return resultSet; 359 } 360 361 /** 362 * Returns the list of domain configurations which are either used in a concrete harvest or is a 'default 363 * configuration'. 364 * <p> 365 * The list is sorted alphabetically by name according to the supplied locale. 366 * 367 * @param domain The domain to find the used configurations for. 368 * @param locale The locale to base the sorting on 369 * @return A sorted list of used configurations for the supplied domain. 370 */ 371 public static List<DomainConfiguration> getUsedConfiguration(Domain domain, Locale locale) { 372 List<Long> usedConfigurationIDs = DomainDAO.getInstance().findUsedConfigurations(domain.getID()); 373 List<DomainConfiguration> usedConfigurations = new LinkedList<DomainConfiguration>(); 374 375 for (DomainConfiguration configuration : domain.getAllConfigurationsAsSortedList(locale)) { 376 if (usedConfigurationIDs.contains(new Long(configuration.getID())) 377 || configuration.getID() == domain.getDefaultConfiguration().getID()) { 378 usedConfigurations.add(configuration); 379 } 380 } 381 382 NamedUtils.sortNamedObjectList(locale, usedConfigurations); 383 return usedConfigurations; 384 } 385 386 /** 387 * Returns the seed lists associated with the supplied configurations. 388 * 389 * @param configurations The configurations to find seed lists for 390 * @return The seed lists used in the supplied configurations. 391 */ 392 public static List<SeedList> getSeedLists(List<DomainConfiguration> configurations) { 393 List<SeedList> seedsLists = new LinkedList<SeedList>(); 394 for (DomainConfiguration configuration : configurations) { 395 Iterator<SeedList> seedListIterator = configuration.getSeedLists(); 396 while (seedListIterator.hasNext()) { 397 SeedList seedList = seedListIterator.next(); 398 if (!seedsLists.contains(seedList)) { 399 seedsLists.add(seedList); 400 } 401 } 402 } 403 404 return seedsLists; 405 } 406 407 /** 408 * Create the domain definition list for the jsp page. Code has been moved from the jsp to here to avoid compile errors at 409 * runtime in correlation with the upgrade to java 1.8 and introduction of embedded tomcat to handle jsp pages. This was previously done via jetty 6. 410 * 411 * @param pageContext the current JSP context 412 * @param request the JSP request 413 * @param response the JSP response 414 * @param I18N internationalization object. 415 * 416 * @return void 417 * @throws ForwardedToErrorPage if an unknown bitarchive or update type is posted, or one of the two required 418 * parameters are missing. 419 * @throws ArgumentNotValid If the context is null. 420 */ 421 public static void CreateDomainDefinitionListInRequest(HttpServletRequest request, HttpServletResponse response, PageContext pageContext, I18n I18N) 422 throws ArgumentNotValid, ForwardedToErrorPage, ServletException, IOException { 423 ArgumentNotValid.checkNotNull(pageContext, "PageContext context"); 424 ArgumentNotValid.checkNotNull(request, "Page request"); 425 ArgumentNotValid.checkNotNull(response, "Page response"); 426 String domains = request.getParameter(Constants.DOMAINLIST_PARAM); 427 if (domains != null) { 428 String[] domainsList = domains.split("\\s+"); 429 Set<String> invalidDomainNames = new HashSet<String>( 430 DomainDefinition.createDomains(domainsList)); 431 432 if (domainsList.length == 1 433 && DomainDAO.getInstance().exists(domainsList[0])) { 434 RequestDispatcher rd = 435 pageContext.getServletContext(). 436 getRequestDispatcher( 437 "/Definitions-edit-domain.jsp?" 438 + Constants.DOMAIN_PARAM 439 + "=" + HTMLUtils.encode( 440 domainsList[0])); 441 rd.forward(request, response); 442 443 return; 444 } else { 445 StringBuilder message = new StringBuilder(); 446 Set<String> validDomains = new HashSet<String>(Arrays.asList(domainsList)); 447 validDomains.removeAll(invalidDomainNames); 448 if (!validDomains.isEmpty()) { 449 message.append("<h4>"); 450 message.append(I18N.getString(response.getLocale(), 451 "harvestdefinition.domains.created")); 452 message.append("</h4><br/>"); 453 454 for (String domain : validDomains) { 455 if (DomainDAO.getInstance().exists(domain)) { 456 message.append(DomainDefinition.makeDomainLink(domain)); 457 message.append("<br/>"); 458 } 459 } 460 } 461 if (invalidDomainNames.size() > 0) { 462 message.append("<br/>"); 463 message.append(I18N.getString(response.getLocale(), 464 "harvestdefinition.domains.notcreated")); 465 message.append("<br/>"); 466 DomainDAO dao = DomainDAO.getInstance(); 467 for (String invalid : invalidDomainNames) { 468 if (dao.exists(invalid)) { 469 message.append( 470 DomainDefinition.makeDomainLink(invalid)); 471 } else { 472 message.append(invalid); 473 } 474 message.append("<br/>"); 475 } 476 } 477 request.setAttribute("message", message.toString()); 478 RequestDispatcher rd = pageContext.getServletContext(). 479 getRequestDispatcher("/message.jsp"); 480 rd.forward(request, response); 481 return; 482 } 483 } 484 } 485}