001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import dk.netarkivet.common.exceptions.ArgumentNotValid;
027
028/**
029 * A container for miscellaneous information about a TLD.
030 * <p>
031 * Currently contains the TLD name and a count of subdomains.
032 */
033public class TLDInfo implements Comparable<TLDInfo> {
034
035    /** The name of this TLD. */
036    private final String tldName;
037    /** Number of subdomains we have registered under this TLD. All IP addresses are lumped together as one TLD. */
038    private int count = 0;
039    /** The special name for IP adresses, since they have no TLD. */
040    static final String IP_ADDRESS_NAME = "IP Address";
041
042    /**
043     * Create TLD info holder.
044     *
045     * @param name The TLD domain name.
046     */
047    public TLDInfo(String name) {
048        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
049        tldName = name;
050    }
051
052    /**
053     * The name of this TLD (e.g. dk, com or museum). IP addresses are registered under a special "IP address" name.
054     *
055     * @return TLD name without .
056     */
057    public String getName() {
058        return tldName;
059    }
060
061    /**
062     * Number of subdomains we have registered under this TLD. All IP addresses are lumped together as one TLD.
063     *
064     * @return Number of 2nd-level domains we have registered under this TLD.
065     */
066    public int getCount() {
067        return count;
068    }
069
070    /**
071     * Add a 2nd-level domain to the information for this domain.
072     * <p>
073     * This tests that the given domain does in fact belong to this TLD, but not whether it has been added before.
074     *
075     * @param name A name of a domain
076     */
077    void addSubdomain(String name) {
078        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
079        if (tldName.equals(IP_ADDRESS_NAME)) {
080            ArgumentNotValid.checkTrue(dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(name).matches(),
081                    "name must be an IP address");
082        } else {
083            ArgumentNotValid.checkTrue(name.endsWith("." + tldName), "name must end with '." + tldName + "'");
084        }
085        count++;
086    }
087
088    /**
089     * @see Object#equals(Object)
090     */
091    public boolean equals(Object o) {
092        if (this == o) {
093            return true;
094        }
095        if (o == null || getClass() != o.getClass()) {
096            return false;
097        }
098
099        TLDInfo tldInfo = (TLDInfo) o;
100
101        if (!tldName.equals(tldInfo.tldName)) {
102            return false;
103        }
104
105        return true;
106    }
107
108    /**
109     * @return the hashcode for this object which is equal to the hashCode for the name of the tld.
110     * @see Object#hashCode()
111     */
112    public int hashCode() {
113        return tldName.hashCode();
114    }
115
116    /**
117     * Compares this object with the specified object for order. Returns a negative integer, zero, or a positive integer
118     * as this object is less than, equal to, or greater than the specified object.
119     * <p>
120     *
121     * @param o the Object to be compared.
122     * @return a negative integer, zero, or a positive integer as this object is less than, equal to, or greater than
123     * the specified object.
124     * @see Comparable#compareTo(Object o)
125     */
126    public int compareTo(TLDInfo o) {
127        return tldName.compareTo(o.tldName);
128    }
129
130    /**
131     * Get the TLD for a given domain.
132     *
133     * @param domain A domain, as specified by the global domain regexp.
134     * @return The TLD of the domain, or a special placeholder for IP addresses.
135     */
136    static String getTLD(String domain) {
137        ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain");
138        String tld;
139        if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) {
140            tld = IP_ADDRESS_NAME;
141        } else {
142            // We know the format of domains, so we can assume a dot
143            tld = domain.substring(domain.lastIndexOf('.') + 1);
144        }
145        return tld;
146    }
147
148    /**
149     * Get the TLD for a given domain including multilevel TLD. for example .gouv.fr is level 2 TLD
150     *
151     * @param domain A domain, as specified by the global domain regexp.
152     * @param maxLevel maximum level for TLD (can't be 0).
153     * @return The TLD of the domain, or a special placeholder for IP addresses.
154     */
155    static String getMultiLevelTLD(String domain, int maxLevel) {
156        ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain");
157        ArgumentNotValid.checkPositive(maxLevel, "max level must be equal to 1" + " or more");
158        String tld;
159        if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) {
160            tld = IP_ADDRESS_NAME;
161        } else {
162            // We know the format of domains, so we can assume one or more dot
163            int numberOfLevel = getTLDLevel(domain);
164            tld = domain;
165            while (numberOfLevel >= maxLevel) {
166                tld = tld.substring(tld.indexOf('.') + 1);
167                --numberOfLevel;
168            }
169        }
170        return tld;
171    }
172
173    /**
174     * Return TLD level of the domain.
175     *
176     * @param domain A domain
177     * @return TLD level of the domain 1 for IP addresses
178     */
179    static int getTLDLevel(String domain) {
180        if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) {
181            return 1;
182        } else {
183            int nbLevel = 0;
184            for (int i = 0; i < domain.length(); i++) {
185                char c = domain.charAt(i);
186                if (c == '.') {
187                    ++nbLevel;
188                }
189            }
190            return nbLevel;
191        }
192    }
193
194    /**
195     * True if this TLDinfo accumulates IP address information.
196     *
197     * @return True if the domains counted in the TLDinfo are IP domains.
198     */
199    public boolean isIP() {
200        return tldName.equals(IP_ADDRESS_NAME);
201    }
202
203}