001/*
002 * #%L
003 * Netarchivesuite - wayback
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.wayback.batch;
024
025import org.archive.wayback.UrlCanonicalizer;
026import org.archive.wayback.util.url.AggressiveUrlCanonicalizer;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029
030import dk.netarkivet.common.utils.SettingsFactory;
031import dk.netarkivet.wayback.WaybackSettings;
032
033/**
034 * A factory for returning a UrlCanonicalizer.
035 */
036public class UrlCanonicalizerFactory extends SettingsFactory<UrlCanonicalizer> {
037
038    /** Logger for this class. */
039    private static final Logger logger = LoggerFactory.getLogger(UrlCanonicalizerFactory.class);
040
041    /**
042     * This method returns an instance of the UrlCanonicalizer class specified in the settings.xml for the
043     * dk.netarkivet.wayback module. In the event that reading this file generates a SecurityException, as may occur in
044     * batch operation if security does not allow System properties to be read, the method will fall back on returning
045     * an instance of the class org.archive.wayback.util.url.AggressiveUrlCanonicalizer.
046     *
047     * @return a canonicalizer for urls
048     */
049    public static UrlCanonicalizer getDefaultUrlCanonicalizer() {
050        try {
051            return SettingsFactory.getInstance(WaybackSettings.URL_CANONICALIZER_CLASSNAME);
052        } catch (SecurityException e) {
053            logger.debug("The requested canoncializer could not be loaded. Falling back to {}",
054                    AggressiveUrlCanonicalizer.class.toString(), e);
055            return new AggressiveUrlCanonicalizer();
056        }
057    }
058
059}