001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.datamodel; 024 025import java.sql.Connection; 026import java.util.Iterator; 027import java.util.List; 028 029import dk.netarkivet.common.exceptions.ArgumentNotValid; 030import dk.netarkivet.common.exceptions.UnknownID; 031 032/** 033 * Persistent storage for Domain objects. Configuration information and seeds are stored as well. 034 */ 035public abstract class DomainDAO implements DAO, Iterable<Domain> { 036 037 /** The Singleton DomainDAO. */ 038 private static DomainDAO instance; 039 040 /** 041 * protected constructor for singleton class. 042 */ 043 protected DomainDAO() { 044 } 045 046 /** 047 * Get the singleton DomainDAO instance. 048 * 049 * @return the singleton DomainDAO 050 */ 051 public static synchronized DomainDAO getInstance() { 052 if (instance == null) { 053 instance = new DomainDBDAO(); 054 } 055 056 return instance; 057 } 058 059 /** 060 * Create a domain in persistent storage. 061 * 062 * @param domain a given {@link Domain} object. 063 */ 064 public synchronized void create(Domain domain) { 065 Connection c = HarvestDBConnection.get(); 066 try { 067 create(c, domain); 068 } finally { 069 HarvestDBConnection.release(c); 070 } 071 } 072 073 /** 074 * Create a list of domains in persistent storage. 075 * 076 * @param domains a list of {@link Domain} objects. 077 */ 078 public synchronized void create(List<Domain> domains) { 079 Connection c = HarvestDBConnection.get(); 080 try { 081 for (Domain d : domains) { 082 create(c, d); 083 } 084 } finally { 085 HarvestDBConnection.release(c); 086 } 087 } 088 089 /** 090 * Create a domain in persistent storage. 091 * 092 * @param connection a connection to the harvest definition database. 093 * @param domain a given {@link Domain} object. 094 */ 095 protected abstract void create(Connection connection, Domain domain); 096 097 /** 098 * Read a domain from the persistent storage. 099 * 100 * @param domainName the name of the domain to retrieve 101 * @return the retrieved Domain 102 */ 103 public synchronized Domain read(String domainName) { 104 Connection c = HarvestDBConnection.get(); 105 try { 106 return read(c, domainName); 107 } finally { 108 HarvestDBConnection.release(c); 109 } 110 } 111 112 /** 113 * Read a domain from the persistent storage known to exist. 114 * 115 * @param domainName the name of the domain to retrieve 116 * @return the retrieved Domain 117 */ 118 public synchronized Domain readKnown(String domainName) { 119 Connection c = HarvestDBConnection.get(); 120 try { 121 return readKnown(c, domainName); 122 } finally { 123 HarvestDBConnection.release(c); 124 } 125 } 126 127 /** 128 * Read a domain from the persistent storage. 129 * 130 * @param connection a connection to the harvest definition database. 131 * @param domainName the name of the domain to retrieve 132 * @return the retrieved Domain 133 */ 134 protected abstract Domain read(Connection connection, String domainName); 135 136 /** 137 * Read a domain from the persistent storage known to exist. 138 * 139 * @param connection a connection to the harvest definition database. 140 * @param domainName the name of the domain to retrieve 141 * @return the retrieved Domain 142 */ 143 protected abstract Domain readKnown(Connection connection, String domainName); 144 145 /** 146 * Check existence of a domain with the given domainName. 147 * 148 * @param domainName A given domain name. 149 * @return true if the domain exists, false otherwise. 150 * @throws ArgumentNotValid if domainName is null or empty. 151 */ 152 public abstract boolean exists(String domainName); 153 154 /** 155 * Update information about existing domain information. 156 * 157 * @param domain the domain to update 158 * @throws ArgumentNotValid if domain is null 159 * @throws UnknownID if the Domain domain has not been added previously to persistent storage. 160 */ 161 public abstract void update(Domain domain); 162 163 /** 164 * Get the total number of domains available. 165 * 166 * @return the total number of registered domains. 167 */ 168 public abstract int getCountDomains(); 169 170 /** 171 * Gets list of all domains. 172 * 173 * @return List of all added domains 174 */ 175 public abstract Iterator<Domain> getAllDomains(); 176 177 /** 178 * Gets an iterator of all domains. Implements the Iterable interface. 179 * 180 * @return Iterator of all presently known domains. 181 */ 182 public Iterator<Domain> iterator() { 183 return getAllDomains(); 184 } 185 186 /** 187 * Gets list of all domains in the order expected by snapshot harvest job generation, that is order by template 188 * name, then byte limit (descending), then domain name. 189 * 190 * @return List of all added domains 191 */ 192 public abstract Iterator<Domain> getAllDomainsInSnapshotHarvestOrder(); 193 194 /** 195 * Reset the singleton. Only for use in tests! TODO remove this, no test methods in business classes! 196 */ 197 static void resetSingleton() { 198 instance = null; 199 } 200 201 /** 202 * Find all info about results of a harvest definition. 203 * 204 * @param previousHarvestDefinition A harvest definition that has already been run. 205 * @return An array of information for all domainconfigurations which were harvested by the given harvest 206 * definition. 207 */ 208 public abstract Iterator<HarvestInfo> getHarvestInfoBasedOnPreviousHarvestDefinition( 209 final HarvestDefinition previousHarvestDefinition); 210 211 /** 212 * Use a glob-like matcher to find a subset of domains. 213 * <p> 214 * In this simple matcher, * stands for any number of arbitrary characters, and ? stands for one arbitrary 215 * character. Including these, the given string must match the entire domain name. 216 * 217 * @param glob A domain name with * and ? wildcards 218 * @return List of domain names matching the glob, sorted by name. 219 */ 220 public abstract List<String> getDomains(String glob); 221 222 /** 223 * Return whether the given configuration can be deleted. This should be a fairly lightweight method, but is not 224 * likely to be instantaneous. Note that to increase speed, this method may rely on underlying systems to enforce 225 * transitive invariants. This means that if this method says a configuration can be deleted, the dao may still 226 * reject a delete request. If this method returns false, deletion will however definitely not be allowed. 227 * 228 * @param config the given configuration 229 * @return true if the he given configuration can be deleted, false otherwise 230 */ 231 public abstract boolean mayDelete(DomainConfiguration config); 232 233 /** 234 * Read a Domain from Database, and return the domain information as a SparseDomain object. We only read information 235 * relevant for the GUI listing. 236 * 237 * @param domainName a given domain 238 * @return a SparseDomain. 239 * @throws ArgumentNotValid if domainName is null or empty. 240 * @throws UnknownID if domain does not exist 241 */ 242 public abstract SparseDomain readSparse(String domainName); 243 244 /** 245 * Return a list of AliasInfo objects. If the given domain is not-null, it should return AliasInfo objects where 246 * AliasInfo.aliasOf == domain 247 * 248 * @param domain a given domain 249 * @return a list of AliasInfo objects. 250 * @throws UnknownID If the given domain does not exist. (!DomainDAO.exists(domain)) 251 * @throws ArgumentNotValid if domainName is null 252 */ 253 public abstract List<AliasInfo> getAliases(String domain); 254 255 /** 256 * Get a list of all current alias-relations. The list should be sorted by increasing last-update. This means any 257 * expired aliases will be at the start of the list, while un-expired aliases will be at the end. 258 * 259 * @return a list of all current alias-relations. 260 */ 261 public abstract List<AliasInfo> getAllAliases(); 262 263 /** 264 * Get a list of all TLDs present in the domains table. IP-numbers registered are counted together. 265 * 266 * @param level maximum level of TLD 267 * @return a list of all TLDs present in the domains table, sorted alphabetically. 268 */ 269 public abstract List<TLDInfo> getTLDs(int level); 270 271 /** 272 * Get the HarvestInfo object for a certain job and DomainConfiguration defined by domainName and configName. 273 * 274 * @param domainName the name of a given domain 275 * @param configName the name of a given configuration 276 * @param job the job 277 * @return The HarvestInfo object for a certain job and DomainConfiguration or null, if job has not yet been 278 * started. 279 */ 280 public abstract HarvestInfo getDomainJobInfo(Job job, String domainName, String configName); 281 282 /** 283 * Get a list of info about harvests performed on a given domain. 284 * <p> 285 * Note that harvest info from before the DB DAOs are unreliable, as harvests cannot be told apart and no dates are 286 * available. 287 * 288 * @param domainName Domain to get info for. 289 * @param orderBy The column attribute to order by. 290 * @param asc true if the results should be ordered according to the natural order, false if they are to be sorted 291 * in reverse. 292 * @return List of DomainHarvestInfo objects with information on that domain. 293 */ 294 public abstract List<DomainHarvestInfo> listDomainHarvestInfo(String domainName, String orderBy, boolean asc); 295 296 /** 297 * Get the DomainConfiguration given a specific domainName and a configurationName. 298 * 299 * @param domainName The name of a domain 300 * @param configName The name of a configuration for this domain 301 * @return the DomainConfiguration, if the specified configuration exists; otherwise throws UnknownID 302 */ 303 public abstract DomainConfiguration getDomainConfiguration(String domainName, String configName); 304 305 /** 306 * Get the domainHistory for a specific domain. 307 * 308 * @param domainName A name of a specific domain. 309 * @return the domainHistory for a specific domain. 310 */ 311 public abstract DomainHistory getDomainHistory(String domainName); 312 313 /** 314 * Use a glob-like matcher to find a subset of domains. 315 * <p> 316 * In this simple matcher, * stands for any number of arbitrary characters, and ? stands for one arbitrary 317 * character. Including these, the given string must match the entire domain name. 318 * 319 * @param glob A domain name with * and ? wildcards 320 * @param searchField The field in the Domain table to search 321 * @return List of domain names matching the glob, sorted by name. 322 */ 323 public abstract List<String> getDomains(String glob, String searchField); 324 325 /** 326 * Read the used configurations name + seedslists for the domain. Note that even though a list of 327 * <code>DomainConfiguration</code> object are returned, only the name + seeds lists are set. 328 * <p> 329 * A used configuration is the default configuration + configurations used in a active harvest definition. 330 * 331 * @param domainID The domain to find the configurations for. 332 * @return The list of ID for the used configurations. 333 */ 334 public abstract List<Long> findUsedConfigurations(Long domainID); 335 336 /** 337 * Rename and update a DomainConfiguration for a specific domain. 338 * @param domain The given domain 339 * @param domainConf The given domainConfig 340 * @param configOldName The old name of the domainConfig 341 */ 342 public abstract void renameAndUpdateConfig(Domain domain, DomainConfiguration domainConf, String configOldName); 343 344 /** 345 * Get the name of the default configuration for the given domain. 346 * 347 * @param domainName a name of a domain 348 * @return the name of the default configuration for the given domain. 349 */ 350 public abstract String getDefaultDomainConfigurationName(String domainName); 351 352 353 public abstract List<String> getAllDomainNames(); 354 355 356}