001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils;
024
025import java.util.regex.Matcher;
026
027import dk.netarkivet.common.Constants;
028import dk.netarkivet.common.exceptions.ArgumentNotValid;
029
030/**
031 * Utilities for working with domain names.
032 */
033public final class DomainUtils {
034
035    /** Valid characters in a domain name, according to RFC3490. */
036    public static final String DOMAINNAME_CHAR_REGEX_STRING = "[^\\0000-,.-/:-@\\[-`{-\\0177]+";
037
038    /** Utility class, do not initialise. */
039    private DomainUtils() {
040    }
041    
042    /**
043     * Check if a given domainName is valid domain. A valid domain is an IP address or a domain name part followed by a
044     * TLD as defined in settings.
045     *
046     * @param domainName A name of a domain (netarkivet.dk)
047     * @return true if domain is valid; otherwise it returns false.
048     */
049    public static boolean isValidDomainName(String domainName) {
050        ArgumentNotValid.checkNotNull(domainName, "String domainName");
051        return TLD.getInstance().getValidDomainMatcher().matcher(domainName).matches();      
052    }
053
054    /**
055     * Return a domain name. A domain name is defined as either an IP address if the given host is an IP address, or a
056     * postfix of the given host name containing one hostnamepart and a TLD as defined in settings.
057     * <p>
058     * E.g. if '.dk' and 'co.uk' are valid TLDs, www.netarchive.dk will be become netarchive.dk and news.bbc.co.uk will
059     * be come bbc.co.uk
060     *
061     * @param hostname A hostname or IP address. Null hostname is not allowed
062     * @return A domain name (foo.bar) or IP address, or null if no valid domain could be obtained from the given
063     * hostname. If non-null, the return value is guaranteed to be a valid domain as determined by isValidDomainName().
064     */
065    public static String domainNameFromHostname(String hostname) {
066        ArgumentNotValid.checkNotNull(hostname, "String hostname");
067        String result = hostname;
068        // IP addresses are kept as-is, others are trimmed down.
069        if (!Constants.IP_KEY_REGEXP.matcher(hostname).matches()) {
070            Matcher matcher = TLD.getInstance().getHostnamePattern().matcher(hostname);
071            if (matcher.matches()) {
072                result = matcher.group(2);
073            }
074        }
075        if (isValidDomainName(result)) {
076            return result;
077        }
078        return null;
079    }
080
081    /**
082     * Reduce a hostname to a more readable form.
083     *
084     * @param hostname A host name, should not be null.
085     * @return The same host name with all domain parts stripped off.
086     * @throws ArgumentNotValid if argument isn't valid.
087     */
088    public static String reduceHostname(String hostname) throws ArgumentNotValid {
089        ArgumentNotValid.checkNotNull(hostname, "String hostName");
090        String[] split = hostname.split("\\.", 2);
091        return split[0];
092    }
093}