001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.webinterface;
025
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.Arrays;
029import java.util.Set;
030import java.util.HashSet;
031
032import java.util.Iterator;
033import java.util.LinkedList;
034import java.util.List;
035import java.util.Locale;
036
037import javax.servlet.ServletException;
038import javax.servlet.ServletRequest;
039import javax.servlet.http.HttpServletRequest;
040import javax.servlet.http.HttpServletResponse;
041import javax.servlet.jsp.PageContext;
042import javax.servlet.RequestDispatcher;
043
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import dk.netarkivet.common.exceptions.ArgumentNotValid;
048import dk.netarkivet.common.exceptions.ForwardedToErrorPage;
049import dk.netarkivet.common.exceptions.IOFailure;
050import dk.netarkivet.common.utils.DomainUtils;
051import dk.netarkivet.common.utils.I18n;
052import dk.netarkivet.common.webinterface.HTMLUtils;
053import dk.netarkivet.harvester.datamodel.Domain;
054import dk.netarkivet.harvester.datamodel.DomainConfiguration;
055import dk.netarkivet.harvester.datamodel.DomainDAO;
056import dk.netarkivet.harvester.datamodel.NamedUtils;
057import dk.netarkivet.harvester.datamodel.SeedList;
058import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes;
059
060/**
061 * Utility class for handling update of domain from the domain jsp page.
062 */
063public class DomainDefinition {
064
065    protected static final Logger log = LoggerFactory.getLogger(DomainDefinition.class);
066    
067    protected static final String EDIT_DOMAIN_JSP = "/HarvestDefinition/Definitions-edit-domain.jsp?";
068    
069    /** Private constructor to prevent public construction of this class. */
070    private DomainDefinition() {
071    }
072
073    /**
074     * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data
075     * to the updateDomain method for processing.
076     * <p>
077     * For reference, the parameters for this page look something like
078     * http://localhost:8076/HarvestDefinition/Definitions-edit-domain.jsp?
079     * update=1&name=netarkivet.dk&default=defaultconfig&configName=&order_xml=&
080     * load=&maxObjects=&urlListName=&seedList=+&passwordName=&passwordDomain=& passwordRealm=&userName=&password=&
081     * crawlertraps=%2Fcgi-bin%2F*%0D%0A%2Ftrap%2F*%0D%0A
082     * <p>
083     * update: This method throws an exception if update is not set
084     * <p>
085     * name: must be the name of a known domain
086     * <p>
087     * comments: optional user-entered comments about the domain
088     * <p>
089     * default: the defaultconfig is set to this value. Must be non-null and a known configuration of this domain.
090     * <p>
091     * crawlertraps: a newline-separated list of urls to be ignored. May be empty or null
092     * <p>
093     * alias: If set, this domain is an alias of the set domain renewAlias: If set, the alias date should be renewed
094     *
095     * @param context The context of this request
096     * @param i18n I18n information
097     * @throws IOFailure on updateerrors in the DAO
098     * @throws ForwardedToErrorPage if domain is not found, if the edition is out-of-date, or if parameters are missing
099     * or invalid
100     */
101    public static void processRequest(PageContext context, I18n i18n) {
102        ArgumentNotValid.checkNotNull(context, "PageContext context");
103        ArgumentNotValid.checkNotNull(i18n, "I18n i18n");
104
105        HTMLUtils.forwardOnEmptyParameter(context, Constants.DOMAIN_PARAM, Constants.DEFAULT_PARAM);
106        ServletRequest request = context.getRequest();
107        String name = request.getParameter(Constants.DOMAIN_PARAM).trim();
108
109        if (!DomainDAO.getInstance().exists(name)) {
110            HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.domain.0", name);
111            throw new ForwardedToErrorPage("Unknown domain '" + name + "'");
112        }
113        Domain domain = DomainDAO.getInstance().read(name);
114
115        // check the edition number before updating
116        long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, -1L);
117
118        if (domain.getEdition() != edition) {
119            HTMLUtils.forwardWithRawErrorMessage(
120                    context,
121                    i18n,
122                    "errormsg;domain.definition.changed.0.retry.1",
123                    "<br/><a href=\"Definitions-edit-domain.jsp?" + Constants.DOMAIN_PARAM + "="
124                            + HTMLUtils.escapeHtmlValues(HTMLUtils.encode(name)) + "\">", "</a>");
125            throw new ForwardedToErrorPage("Domain '" + name + "' has changed");
126        }
127
128        // default configuration
129        String defaultConf = request.getParameter(Constants.DEFAULT_PARAM);
130        if (!domain.hasConfiguration(defaultConf)) {
131            HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;unknown.default.configuration.0.for.1",
132                    defaultConf, name);
133            throw new ForwardedToErrorPage("Unknown default configuration '" + defaultConf + "'");
134        }
135
136        String crawlertraps = request.getParameter(Constants.CRAWLERTRAPS_PARAM);
137        if (crawlertraps == null) {
138            crawlertraps = "";
139        }
140        String comments = request.getParameter(Constants.COMMENTS_PARAM);
141        if (comments == null) {
142            comments = "";
143        }
144        String alias = request.getParameter(Constants.ALIAS_PARAM);
145        if (alias == null) {
146            alias = "";
147        }
148
149        String aliasRenew = request.getParameter(Constants.RENEW_ALIAS_PARAM);
150        if (aliasRenew == null) {
151            aliasRenew = "no";
152        }
153
154        boolean renewAlias = aliasRenew.equals("yes");
155
156        ExtendedFieldValueDefinition.processRequest(context, i18n, domain, ExtendedFieldTypes.DOMAIN);
157
158        updateDomain(domain, defaultConf, crawlertraps, comments, alias, renewAlias);
159    }
160
161    /**
162     * This updates the given domain in the database.
163     *
164     * @param domain the given domain
165     * @param defaultConfig the name of the default configuration
166     * @param crawlertraps the current crawlertraps stated for the domain
167     * @param comments User-defined comments for the domain
168     * @param alias if this is non-null, this domain is an alias of 'alias'.
169     * @param renewAlias true, if alias is to be updated even if it is not changed
170     */
171    private static void updateDomain(Domain domain, String defaultConfig, String crawlertraps, String comments,
172            String alias, boolean renewAlias) {
173        // Set default configuration
174        domain.setDefaultConfiguration(defaultConfig);
175        domain.setComments(comments);
176
177        // Update crawlertraps
178        List<String> trapList = new ArrayList<String>();
179        if (crawlertraps.length() > 0) {
180            String[] traps = crawlertraps.split("[\\r\\n]+");
181            for (String trap : traps) {
182                if (trap.trim().length() > 0) {
183                    trapList.add(trap);
184                }
185            }
186            log.debug("Now {} crawlertraps for this domain.", trapList.size());
187        } else {
188                log.debug("No crawlertraps for this domain.");
189        }
190        domain.setCrawlerTraps(trapList, true); // Note that exception is thrówn if any of the crawlertraps is not a valid regexp
191
192        // Update alias information
193
194        // If alias is empty string, do not regard this domain as an alias any
195        // more.
196        // If alias is not-empty, update only if alias is different from
197        // oldAlias or
198        // This only updates alias if it is required: See javadoc below for
199        // needToUpdateAlias() 
200        String oldAlias = null;
201        if (domain.getAliasInfo() != null) {
202            oldAlias = domain.getAliasInfo().getAliasOf();
203        }
204        String newAlias;
205        // If alias is empty string, this domain is or should not be an alias.
206
207        if (alias.trim().isEmpty()) {
208            newAlias = null;
209        } else {
210            newAlias = alias.trim();
211        }
212
213        if (needToUpdateAlias(oldAlias, newAlias, renewAlias)) {
214            domain.updateAlias(newAlias);
215        }
216
217        DomainDAO.getInstance().update(domain);
218    }
219
220    /**
221     * Define the cases where we want to update the alias information. 1. The alias information is updated, if the new
222     * alias is null, and the old alias is different from null 2. The alias information is updated, if the new alias is
223     * different from null, and old alias is null 3. The alias information is updated, if the new alias is different
224     * from null, and the old alias is different from null, and they are not either not equal, or renewAlias is true
225     *
226     * @param oldAlias the old alias (could be null)
227     * @param newAlias the new alias (could be null)
228     * @param renewAlias should we renew alias, if the alias is unchanged?
229     * @return true, if we want to update the alias information, false otherwise
230     */
231    private static boolean needToUpdateAlias(String oldAlias, String newAlias, boolean renewAlias) {
232        boolean needToUpdate = false;
233        // If new alias is null: update if old alias is different from null
234        if (newAlias == null) {
235            if (oldAlias != null) {
236                needToUpdate = true;
237            }
238        } else { // newAlias is not null
239            if (oldAlias == null) {
240                needToUpdate = true;
241            } else {
242                if (oldAlias.equals(newAlias)) {
243                    if (renewAlias) {
244                        needToUpdate = true;
245                    }
246                } else {
247                    needToUpdate = true;
248                }
249            }
250        }
251        return needToUpdate;
252    }
253
254    /**
255     * Creates domains with default attributes.
256     *
257     * @param domains a list of domain names
258     * @return List of the non-empty domain names that were not legal domain names or already exist.
259     */
260    public static List<String> createDomains(String... domains) {
261        DomainDAO ddao = DomainDAO.getInstance();
262        List<String> illegalOrExisting = new ArrayList<String>();
263        List<Domain> domainsToCreate = new ArrayList<Domain>();
264        for (String domain : domains) {
265            if (DomainUtils.isValidDomainName(domain) && !ddao.exists(domain)) {
266                domainsToCreate.add(Domain.getDefaultDomain(domain));
267            } else {
268                if (domain.trim().length() > 0) {
269                    illegalOrExisting.add(domain);
270                }
271            }
272        }
273
274        log.info("Creating {} new domains", domainsToCreate.size());
275        ddao.create(domainsToCreate);
276
277        return illegalOrExisting;
278    }
279
280    /**
281     * Creates a link to the domain edit page.
282     *
283     * @param domain The domain to show with a link
284     * @return HTML code with the link and the domain name shown
285     */
286    public static String makeDomainLink(String domain) {
287        ArgumentNotValid.checkNotNullOrEmpty(domain, "domain");
288        String url = EDIT_DOMAIN_JSP + Constants.DOMAIN_PARAM + "="
289                + HTMLUtils.encode(domain);
290        return "<a href=\"" + url + "\">" + HTMLUtils.escapeHtmlValues(domain) + "</a>";
291    }
292
293    /**
294     * Creates a url based on the supplied request where all the parameters are the same, except the
295     * <code>ShowUnusedConfigurations</code> boolean, which is flipped.
296     *
297     * @param request The original 'create domain' request to based the new url on.
298     * @return The new url with the <code>ShowUnusedConfigurations</code> boolean switched.
299     */
300    public static String createDomainUrlWithFlippedShowConfigurations(ServletRequest request) {
301        boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request
302                .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM));
303        boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM));
304        StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP);
305        urlBuilder
306                .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM)));
307        urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "="
308                + Boolean.toString(!showUnusedConfigurationsParam));
309        urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(showUnusedSeedsParam));
310        return urlBuilder.toString();
311    }
312
313    /**
314     * Creates a url based on the supplied request where all the parameters are the same, except the
315     * <code>ShowUnusedSeedLists</code> boolean, which is flipped.
316     *
317     * @param request The original 'create domain' request to based the new url on.
318     * @return The new url with the <code>ShowUnusedSeedLists</code> boolean switched.
319     */
320    public static String createDomainUrlWithFlippedShowSeeds(ServletRequest request) {
321        boolean showUnusedConfigurationsParam = Boolean.parseBoolean(request
322                .getParameter(Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM));
323        boolean showUnusedSeedsParam = Boolean.parseBoolean(request.getParameter(Constants.SHOW_UNUSED_SEEDS_PARAM));
324        StringBuilder urlBuilder = new StringBuilder(EDIT_DOMAIN_JSP);
325        urlBuilder
326                .append(Constants.DOMAIN_PARAM + "=" + HTMLUtils.encode(request.getParameter(Constants.DOMAIN_PARAM)));
327        urlBuilder.append("&" + Constants.SHOW_UNUSED_CONFIGURATIONS_PARAM + "="
328                + Boolean.toString(showUnusedConfigurationsParam));
329        urlBuilder.append("&" + Constants.SHOW_UNUSED_SEEDS_PARAM + "=" + Boolean.toString(!showUnusedSeedsParam));
330        return urlBuilder.toString();
331    }
332
333    /**
334     * Search for domains matching the following criteria. 
335     * TODO Should we allow more than one criteria?
336     * TODO use Enum instead for searchType
337     *
338     * @param context the context of the JSP page calling
339     * @param i18n The translation properties file used
340     * @param searchQuery The given searchQuery for searching for among the domains.
341     * @param searchType The given searchCriteria 
342     * @return the set of domain-names matching the given criteria.
343     */
344    public static List<String> getDomains(PageContext context, I18n i18n, String searchQuery, String searchType) {
345        List<String> resultSet = new ArrayList<String>();
346        ArgumentNotValid.checkNotNullOrEmpty(searchQuery, "String searchQuery");
347        ArgumentNotValid.checkNotNullOrEmpty(searchType, "String searchType");
348
349        try {
350            DomainSearchType.parse(searchType);
351        } catch (ArgumentNotValid e) {
352            HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;invalid.domain.search.criteria.0", searchType);
353            throw new ForwardedToErrorPage("Unknown domain search criteria '" + searchType + "'");
354        }
355
356        log.debug("SearchQuery '" + searchQuery + "', searchType: " + searchType);
357        resultSet = DomainDAO.getInstance().getDomains(searchQuery, searchType);
358        return resultSet;
359    }
360
361    /**
362     * Returns the list of domain configurations which are either used in a concrete harvest or is a 'default
363     * configuration'.
364     * <p>
365     * The list is sorted alphabetically by name according to the supplied locale.
366     *
367     * @param domain The domain to find the used configurations for.
368     * @param locale The locale to base the sorting on
369     * @return A sorted list of used configurations for the supplied domain.
370     */
371    public static List<DomainConfiguration> getUsedConfiguration(Domain domain, Locale locale) {
372        List<Long> usedConfigurationIDs = DomainDAO.getInstance().findUsedConfigurations(domain.getID());
373        List<DomainConfiguration> usedConfigurations = new LinkedList<DomainConfiguration>();
374
375        for (DomainConfiguration configuration : domain.getAllConfigurationsAsSortedList(locale)) {
376            if (usedConfigurationIDs.contains(new Long(configuration.getID()))
377                    || configuration.getID() == domain.getDefaultConfiguration().getID()) {
378                usedConfigurations.add(configuration);
379            }
380        }
381
382        NamedUtils.sortNamedObjectList(locale, usedConfigurations);
383        return usedConfigurations;
384    }
385
386    /**
387     * Returns the seed lists associated with the supplied configurations.
388     *
389     * @param configurations The configurations to find seed lists for
390     * @return The seed lists used in the supplied configurations.
391     */
392    public static List<SeedList> getSeedLists(List<DomainConfiguration> configurations) {
393        List<SeedList> seedsLists = new LinkedList<SeedList>();
394        for (DomainConfiguration configuration : configurations) {
395            Iterator<SeedList> seedListIterator = configuration.getSeedLists();
396            while (seedListIterator.hasNext()) {
397                SeedList seedList = seedListIterator.next();
398                if (!seedsLists.contains(seedList)) {
399                    seedsLists.add(seedList);
400                }
401            }
402        }
403
404        return seedsLists;
405    }
406
407    /**
408     * Create the domain definition list for the jsp page. Code has been moved from the jsp to here to avoid compile errors at
409     * runtime in correlation with the upgrade to java 1.8 and introduction of embedded tomcat to handle jsp pages. This was previously done via jetty 6.
410     *
411     * @param pageContext the current JSP context
412     * @param request the JSP request
413     * @param response the JSP response
414     * @param I18N internationalization object.
415     *
416     * @return void
417     * @throws ForwardedToErrorPage if an unknown bitarchive or update type is posted, or one of the two required
418     * parameters are missing.
419     * @throws ArgumentNotValid If the context is null.
420     */
421    public static void CreateDomainDefinitionListInRequest(HttpServletRequest request, HttpServletResponse response, PageContext pageContext, I18n I18N)
422            throws ArgumentNotValid, ForwardedToErrorPage, ServletException, IOException {
423        ArgumentNotValid.checkNotNull(pageContext, "PageContext context");
424        ArgumentNotValid.checkNotNull(request, "Page request");
425        ArgumentNotValid.checkNotNull(response, "Page response");
426        String domains = request.getParameter(Constants.DOMAINLIST_PARAM);
427        if (domains != null) {
428            String[] domainsList = domains.split("\\s+");
429            Set<String> invalidDomainNames = new HashSet<String>(
430                    DomainDefinition.createDomains(domainsList));
431
432            if (domainsList.length == 1
433                    && DomainDAO.getInstance().exists(domainsList[0])) {
434                RequestDispatcher rd =
435                        pageContext.getServletContext().
436                                getRequestDispatcher(
437                                        "/Definitions-edit-domain.jsp?"
438                                                + Constants.DOMAIN_PARAM
439                                                + "=" + HTMLUtils.encode(
440                                                domainsList[0]));
441                rd.forward(request, response);
442
443                return;
444            } else {
445                StringBuilder message = new StringBuilder();
446                Set<String> validDomains = new HashSet<String>(Arrays.asList(domainsList));
447                validDomains.removeAll(invalidDomainNames);
448                if (!validDomains.isEmpty()) {
449                    message.append("<h4>");
450                    message.append(I18N.getString(response.getLocale(),
451                            "harvestdefinition.domains.created"));
452                    message.append("</h4><br/>");
453
454                    for (String domain : validDomains) {
455                        if (DomainDAO.getInstance().exists(domain)) {
456                            message.append(DomainDefinition.makeDomainLink(domain));
457                            message.append("<br/>");
458                        }
459                    }
460                }
461                if (invalidDomainNames.size() > 0) {
462                    message.append("<br/>");
463                    message.append(I18N.getString(response.getLocale(),
464                            "harvestdefinition.domains.notcreated"));
465                    message.append("<br/>");
466                    DomainDAO dao = DomainDAO.getInstance();
467                    for (String invalid : invalidDomainNames) {
468                        if (dao.exists(invalid)) {
469                            message.append(
470                                    DomainDefinition.makeDomainLink(invalid));
471                        } else {
472                            message.append(invalid);
473                        }
474                        message.append("<br/>");
475                    }
476                }
477                request.setAttribute("message", message.toString());
478                RequestDispatcher rd = pageContext.getServletContext().
479                        getRequestDispatcher("/message.jsp");
480                rd.forward(request, response);
481                return;
482            }
483        }
484    }
485}