001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import dk.netarkivet.common.exceptions.ArgumentNotValid; 027 028/** 029 * A container for miscellaneous information about a TLD. 030 * <p> 031 * Currently contains the TLD name and a count of subdomains. 032 */ 033public class TLDInfo implements Comparable<TLDInfo> { 034 035 /** The name of this TLD. */ 036 private final String tldName; 037 /** Number of subdomains we have registered under this TLD. All IP addresses are lumped together as one TLD. */ 038 private int count = 0; 039 /** The special name for IP adresses, since they have no TLD. */ 040 static final String IP_ADDRESS_NAME = "IP Address"; 041 042 /** 043 * Create TLD info holder. 044 * 045 * @param name The TLD domain name. 046 */ 047 public TLDInfo(String name) { 048 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 049 tldName = name; 050 } 051 052 /** 053 * The name of this TLD (e.g. dk, com or museum). IP addresses are registered under a special "IP address" name. 054 * 055 * @return TLD name without . 056 */ 057 public String getName() { 058 return tldName; 059 } 060 061 /** 062 * Number of subdomains we have registered under this TLD. All IP addresses are lumped together as one TLD. 063 * 064 * @return Number of 2nd-level domains we have registered under this TLD. 065 */ 066 public int getCount() { 067 return count; 068 } 069 070 /** 071 * Add a 2nd-level domain to the information for this domain. 072 * <p> 073 * This tests that the given domain does in fact belong to this TLD, but not whether it has been added before. 074 * 075 * @param name A name of a domain 076 */ 077 void addSubdomain(String name) { 078 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 079 if (tldName.equals(IP_ADDRESS_NAME)) { 080 ArgumentNotValid.checkTrue(dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(name).matches(), 081 "name must be an IP address"); 082 } else { 083 ArgumentNotValid.checkTrue(name.endsWith("." + tldName), "name must end with '." + tldName + "'"); 084 } 085 count++; 086 } 087 088 /** 089 * @see Object#equals(Object) 090 */ 091 public boolean equals(Object o) { 092 if (this == o) { 093 return true; 094 } 095 if (o == null || getClass() != o.getClass()) { 096 return false; 097 } 098 099 TLDInfo tldInfo = (TLDInfo) o; 100 101 if (!tldName.equals(tldInfo.tldName)) { 102 return false; 103 } 104 105 return true; 106 } 107 108 /** 109 * @return the hashcode for this object which is equal to the hashCode for the name of the tld. 110 * @see Object#hashCode() 111 */ 112 public int hashCode() { 113 return tldName.hashCode(); 114 } 115 116 /** 117 * Compares this object with the specified object for order. Returns a negative integer, zero, or a positive integer 118 * as this object is less than, equal to, or greater than the specified object. 119 * <p> 120 * 121 * @param o the Object to be compared. 122 * @return a negative integer, zero, or a positive integer as this object is less than, equal to, or greater than 123 * the specified object. 124 * @see Comparable#compareTo(Object o) 125 */ 126 public int compareTo(TLDInfo o) { 127 return tldName.compareTo(o.tldName); 128 } 129 130 /** 131 * Get the TLD for a given domain. 132 * 133 * @param domain A domain, as specified by the global domain regexp. 134 * @return The TLD of the domain, or a special placeholder for IP addresses. 135 */ 136 static String getTLD(String domain) { 137 ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain"); 138 String tld; 139 if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) { 140 tld = IP_ADDRESS_NAME; 141 } else { 142 // We know the format of domains, so we can assume a dot 143 tld = domain.substring(domain.lastIndexOf('.') + 1); 144 } 145 return tld; 146 } 147 148 /** 149 * Get the TLD for a given domain including multilevel TLD. for example .gouv.fr is level 2 TLD 150 * 151 * @param domain A domain, as specified by the global domain regexp. 152 * @param maxLevel maximum level for TLD (can't be 0). 153 * @return The TLD of the domain, or a special placeholder for IP addresses. 154 */ 155 static String getMultiLevelTLD(String domain, int maxLevel) { 156 ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain"); 157 ArgumentNotValid.checkPositive(maxLevel, "max level must be equal to 1" + " or more"); 158 String tld; 159 if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) { 160 tld = IP_ADDRESS_NAME; 161 } else { 162 // We know the format of domains, so we can assume one or more dot 163 int numberOfLevel = getTLDLevel(domain); 164 tld = domain; 165 while (numberOfLevel >= maxLevel) { 166 tld = tld.substring(tld.indexOf('.') + 1); 167 --numberOfLevel; 168 } 169 } 170 return tld; 171 } 172 173 /** 174 * Return TLD level of the domain. 175 * 176 * @param domain A domain 177 * @return TLD level of the domain 1 for IP addresses 178 */ 179 static int getTLDLevel(String domain) { 180 if (dk.netarkivet.common.Constants.IP_KEY_REGEXP.matcher(domain).matches()) { 181 return 1; 182 } else { 183 int nbLevel = 0; 184 for (int i = 0; i < domain.length(); i++) { 185 char c = domain.charAt(i); 186 if (c == '.') { 187 ++nbLevel; 188 } 189 } 190 return nbLevel; 191 } 192 } 193 194 /** 195 * True if this TLDinfo accumulates IP address information. 196 * 197 * @return True if the domains counted in the TLDinfo are IP domains. 198 */ 199 public boolean isIP() { 200 return tldName.equals(IP_ADDRESS_NAME); 201 } 202 203}