001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.datamodel;
024
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collections;
028import java.util.Date;
029import java.util.HashMap;
030import java.util.Iterator;
031import java.util.List;
032import java.util.Locale;
033import java.util.Map;
034import java.util.regex.Pattern;
035import java.util.regex.PatternSyntaxException;
036
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import dk.netarkivet.common.Constants;
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IllegalState;
043import dk.netarkivet.common.exceptions.PermissionDenied;
044import dk.netarkivet.common.exceptions.UnknownID;
045import dk.netarkivet.common.utils.DomainUtils;
046import dk.netarkivet.common.utils.Named;
047import dk.netarkivet.common.utils.Settings;
048import dk.netarkivet.common.utils.StringUtils;
049import dk.netarkivet.harvester.HarvesterSettings;
050import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory;
051import dk.netarkivet.harvester.datamodel.extendedfield.ExtendableEntity;
052import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes;
053import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue;
054
055/**
056 * Represents known information about a domain A domain is identified by a domain name (ex: kb.dk)
057 * <p>
058 * The following information is used to control how a domain is harvested: Seedlists, configurations and passwords. Each
059 * seedlist defines one or more URL's that the harvester should use as starting points. A configuration defines a
060 * specific combination of settings (seedlist, harvester settings, passwords) that should be used during harvest.
061 * Passwords define user names and passwords that might be used for the domain.
062 * <p>
063 * Information about previous harvests of this domain is available via the domainHistory.
064 * <p>
065 * Information from the domain registrant (DK-HOSTMASTER) about the domain registration is available in the
066 * registration. This includes the dates where the domain was known to exist (included in a domain list), together with
067 * domain owner information.
068 * <p>
069 * Notice that each configuration references one of the seedlists by name, and possibly one of the Passwords.
070 */
071@SuppressWarnings({"rawtypes"})
072public class Domain extends ExtendableEntity implements Named {
073
074    /** Prefix all domain names with this string. */
075    protected static final Logger log = LoggerFactory.getLogger(Domain.class);
076
077    /** The identification used to lookup the domain. */
078
079    private String domainName;
080
081    /**
082     * Map<String, DomainConfiguration> the various harvest configurations that can be used to harvest this domain.
083     */
084    private Map<String, DomainConfiguration> domainConfigurations;
085
086    /** Use this configuration unless otherwise specified. */
087    private String defaultConfigName;
088
089    /**
090     * Map<String, SeedList> The different seedlists used as starting points by the harvesters.
091     */
092    private Map<String, SeedList> seedlists;
093
094    /** Map<String, Password> with an entry for each known password. */
095    private Map<String, Password> passwords;
096
097    /**
098     * List of crawler traps, that is regexps that should be ignored for this domain.
099     */
100    private List<String> crawlerTraps;
101
102    /** Records all historical information about the domain. */
103    private DomainHistory history;
104
105    /**
106     * List<DomainOwnerInfo> contains information about the known owners of this domain.
107     */
108    private List<DomainOwnerInfo> domainOwnerInfos;
109
110    /** Comments that the user has entered. */
111    private String comments;
112
113    /** Edition is used by the DAO to keep track of changes. */
114    long edition = -1;
115
116    /**
117     * If non-null, this domain is considered an alias of the domain named. The field must be either null or aliasInfo
118     * that defines an alias from this domain to another, and the time the alias field was last updated. This is used to
119     * allow operators to check the domains that have been aliases for a long time.
120     * <p>
121     * Note that we do not allow transitive aliases, so the domain named in this field is not allowed to become an alias
122     * itself.
123     */
124    private AliasInfo aliasInfo;
125
126    /** ID autogenerated by DB DAO. */
127    private Long id;
128
129    /**
130     * Create new instance of a domain. It is generally recommended that getDefaultDomain is used instead of this
131     * constructor.
132     *
133     * @param theDomainName Name used to reference the domain
134     * @throws ArgumentNotValid if either of the arguments are null or empty, or if the domain does not match the regex
135     * for valid domains
136     */
137    protected Domain(String theDomainName) {
138        super(DAOProviderFactory.getExtendedFieldDAOProvider());
139        ArgumentNotValid.checkNotNullOrEmpty(theDomainName, "theDomainName");
140        if (!DomainUtils.isValidDomainName(theDomainName)) {
141            throw new ArgumentNotValid("Domain '" + theDomainName + "' does not match the regexp "
142                    + "defining valid domains: " + DomainUtils.VALID_DOMAIN_MATCHER.pattern());
143        }
144        domainName = theDomainName;
145        comments = "";
146        domainConfigurations = new HashMap<String, DomainConfiguration>();
147        seedlists = new HashMap<String, SeedList>();
148        passwords = new HashMap<String, Password>();
149        crawlerTraps = Collections.emptyList();
150        history = new DomainHistory();
151        domainOwnerInfos = new ArrayList<DomainOwnerInfo>();
152    }
153
154    /**
155     * Get a new domain, initialised with default values.
156     *
157     * @param domainName The name of the domain
158     * @return a domain with the given name
159     * @throws ArgumentNotValid if name is null or empty
160     */
161    public static Domain getDefaultDomain(String domainName) {
162        Domain myDomain;
163        myDomain = new Domain(domainName);
164
165        // Create default seed list containing one seed: http://www.domain
166        // or http://1.2.3.4 for IP-named domains.
167        String defaultSeedListName = Settings.get(HarvesterSettings.DEFAULT_SEEDLIST);
168
169        SeedList seedlist;
170        if (Constants.IP_KEY_REGEXP.matcher(domainName).matches()) {
171            // IP domains should not get www
172            seedlist = new SeedList(defaultSeedListName, "http://" + domainName);
173        } else {
174            seedlist = new SeedList(defaultSeedListName, "http://www." + domainName);
175        }
176        myDomain.addSeedList(seedlist);
177
178        List<SeedList> seedlists = Arrays.asList(seedlist);
179
180        // Create default configuration using the default seedlist
181        String domainDefaultConfig = Settings.get(HarvesterSettings.DOMAIN_DEFAULT_CONFIG);
182
183        DomainConfiguration cfg = new DomainConfiguration(domainDefaultConfig, myDomain, seedlists,
184                new ArrayList<Password>());
185        cfg.setOrderXmlName(Settings.get(HarvesterSettings.DOMAIN_DEFAULT_ORDERXML));
186        cfg.setMaxRequestRate(Integer.parseInt(Settings.get(HarvesterSettings.DOMAIN_CONFIG_MAXRATE)));
187        myDomain.addConfiguration(cfg);
188
189        return myDomain;
190    }
191
192    /**
193     * Adds a new configuration to the domain. If this is the first configuration added, it becomes the default
194     * configuration. The seedlist referenced by the configuration must already be registered in this domain otherwise
195     * an UnknownID exception is thrown.
196     *
197     * @param cfg the configuration that is added
198     * @throws UnknownID if the name of the seedlist referenced by cfg is unknown
199     * @throws PermissionDenied if a configuration with the same name already exists
200     * @throws ArgumentNotValid if null supplied
201     */
202    public void addConfiguration(DomainConfiguration cfg) {
203        ArgumentNotValid.checkNotNull(cfg, "cfg");
204
205        if (domainConfigurations.containsKey(cfg.getName())) {
206            throw new PermissionDenied("A configuration already exists with the name:" + cfg.getName()
207                    + "; in the domain:" + getName() + ";");
208        }
209
210        putConfiguration(cfg);
211
212        if (domainConfigurations.size() == 1) {
213            defaultConfigName = cfg.getName();
214        }
215    }
216
217    /**
218     * Set a configuration in the domain. This checks that the seedlists and passwords are legal.
219     *
220     * @param cfg The configuration to add.
221     */
222    private void putConfiguration(DomainConfiguration cfg) {
223        checkListContainsNamed(cfg, cfg.getSeedLists(), "seedlist", seedlists);
224        checkListContainsNamed(cfg, cfg.getPasswords(), "passwords", passwords);
225
226        domainConfigurations.put(cfg.getName(), cfg);
227    }
228
229    /**
230     * Helper method used to verify that a configuration does not reference seedlists or passwords that do not exist in
231     * this domain.
232     *
233     * @param cfg the configuration being checked
234     * @param items an iterator to the references that are checked (seedlists or passwords)
235     * @param typename the name of the references being checked
236     * @param m the corresponding domain map that must contain entries matching the names in the items
237     * @param <T> The type contained in items iterator. The type extends Named
238     */
239    private <T extends Named> void checkListContainsNamed(DomainConfiguration cfg, final Iterator<T> items,
240            final String typename, final Map m) {
241        while (items.hasNext()) {
242            Named named = items.next();
243
244            if (!m.containsKey(named.getName())) {
245                throw new UnknownID("Configuration:" + cfg.getName() + "; uses unknown " + typename + ":"
246                        + named.getName() + "; in the domain:" + getName() + ";");
247            }
248        }
249    }
250
251    /**
252     * Helper method that adds or updates an entry in a map. Used to add/update entries in seedlists and passwords maps
253     *
254     * @param m the map to modify
255     * @param name the name of the element to add or update
256     * @param addAction when true an add action is performed and en entry with the name is not allowed to exist in the
257     * map before the operation, when false an update operation is performed and an entry must already exists with the
258     * name in the map.
259     * @param value the object to add to m
260     * @param <T> The type contained as values in the map m.
261     */
262    private <T extends Named> void put(Map<String, T> m, String name, boolean addAction, T value) {
263        boolean alreadyExist = m.containsKey(name);
264
265        if (addAction && alreadyExist) {
266            throw new PermissionDenied("An entry already exists with the name:" + name + "; in the domain:" + getName()
267                    + ";");
268        }
269
270        if ((!addAction) && (!alreadyExist)) {
271            throw new UnknownID("No entry exists with the name '" + name + "' in the domain '" + getName() + "'");
272        }
273
274        m.put(name, value);
275    }
276
277    /**
278     * Adds a seed list to the domain.
279     *
280     * @param seedlist the actual seedslist.
281     * @throws ArgumentNotValid if an argument is null
282     * @throws PermissionDenied if the seedName already exists
283     */
284    public void addSeedList(SeedList seedlist) {
285        ArgumentNotValid.checkNotNull(seedlist, "seedlist");
286        put(seedlists, seedlist.getName(), true, seedlist);
287    }
288
289    /**
290     * Update a seed list to the domain. Replaces an existing seedlist with the same name.
291     *
292     * @param seedlist the actual seedslist.
293     * @throws ArgumentNotValid if an argument is null
294     * @throws UnknownID if the seedlist.getName() does not exists
295     */
296    public void updateSeedList(SeedList seedlist) {
297        ArgumentNotValid.checkNotNull(seedlist, "seedlist");
298        put(seedlists, seedlist.getName(), false, seedlist);
299    }
300
301    /**
302     * Adds a password to the domain.
303     *
304     * @param password A password object to add.
305     * @throws ArgumentNotValid if the argument is null
306     * @throws PermissionDenied if a password already exists with this name
307     */
308    public void addPassword(Password password) {
309        ArgumentNotValid.checkNotNull(password, "password");
310        put(passwords, password.getName(), true, password);
311    }
312
313    /**
314     * Updates a password on the domain.
315     *
316     * @param password A password object to update.
317     * @throws ArgumentNotValid if the argument is null
318     * @throws PermissionDenied if no password exists with this name
319     */
320    public void updatePassword(Password password) {
321        ArgumentNotValid.checkNotNull(password, "password");
322        put(passwords, password.getName(), false, password);
323    }
324
325    /**
326     * Mark a configuration as the default configuration to use. The configuration name must match an already added
327     * configuration, otherwise an UnknownID exception is thrown.
328     *
329     * @param cfgName a name of a configuration
330     * @throws UnknownID when the cfgName does not match an added configuration
331     * @throws ArgumentNotValid if cfgName is null or empty
332     */
333    public void setDefaultConfiguration(String cfgName) {
334        ArgumentNotValid.checkNotNullOrEmpty(cfgName, "cfgName");
335
336        if (!domainConfigurations.containsKey(cfgName)) {
337            throw new UnknownID("Default configuration not registered:" + cfgName + "; in the domain:" + getName()
338                    + ";");
339        }
340
341        defaultConfigName = cfgName;
342    }
343
344    /**
345     * Returns an already registered configuration.
346     *
347     * @param cfgName the name of an registered configuration
348     * @return the configuration
349     * @throws UnknownID if the name is not a registered configuration
350     * @throws ArgumentNotValid if cfgName is null or empty
351     */
352    public DomainConfiguration getConfiguration(String cfgName) {
353        ArgumentNotValid.checkNotNullOrEmpty(cfgName, "cfgName");
354
355        if (!domainConfigurations.containsKey(cfgName)) {
356            throw new UnknownID("Configuration '" + cfgName + "' not registered in the domain '" + getName() + "'");
357        }
358        DomainConfiguration cfg = domainConfigurations.get(cfgName);
359        cfg.setDomainhistory(this.getHistory());
360        return cfg;
361    }
362
363    /**
364     * Gets the default configuration. If no configuration has been explicitly set the first configuration added to this
365     * domain is returned. If no configurations have been added at all a UnknownID exception is thrown.
366     *
367     * @return the default configuration (never null)
368     * @throws UnknownID if no configurations exists
369     */
370    public DomainConfiguration getDefaultConfiguration() {
371        if (domainConfigurations.size() == 0) {
372            throw new UnknownID("No configurations have been registered in the domain:" + getName() + ";");
373        }
374
375        return getConfiguration(defaultConfigName);
376    }
377
378    /**
379     * Gets the name of this domain.
380     *
381     * @return the name of this domain
382     */
383    public String getName() {
384        return domainName;
385    }
386
387    /**
388     * @return the domain comments.
389     */
390    public String getComments() {
391        return comments;
392    }
393
394    /**
395     * Get the domain history.
396     *
397     * @return the domain history
398     */
399    public DomainHistory getHistory() {
400        return history;
401    }
402
403    /**
404     * Get a specific seedlist previously added to this domain.
405     *
406     * @param name the name of the seedlist to return
407     * @return the specified seedlist
408     * @throws ArgumentNotValid if name is null or empty
409     * @throws UnknownID if no seedlist has been added with the supplied name
410     */
411    public SeedList getSeedList(String name) {
412        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
413
414        if (!hasSeedList(name)) {
415            throw new UnknownID("Seedlist '" + name + " has not been registered in the domain '" + getName() + "'");
416        }
417
418        return seedlists.get(name);
419    }
420
421    /**
422     * Return true if the named seedlist exists in this domain.
423     *
424     * @param name String representing a possible seedlist for the domain.
425     * @return true, if the named seedlist exists in this domain
426     */
427    public boolean hasSeedList(String name) {
428        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
429
430        return seedlists.containsKey(name);
431    }
432
433    /**
434     * Removes a seedlist from this Domain. The seedlist must not be in use by any of the configurations, otherwise a
435     * PermissionDenied exception is thrown.
436     *
437     * @param name the name of the seedlist to remove
438     * @throws PermissionDenied if the seedlist is in use by a configuration or this is the last seedlist in this Domain
439     * @throws UnknownID if the no seedlist exists with the name
440     * @throws ArgumentNotValid if a null argument is supplied
441     */
442    public void removeSeedList(String name) {
443        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
444
445        if (!seedlists.containsKey(name)) {
446            throw new UnknownID("Seedlist has not been registered:" + name + "; in the domain:" + getName() + ";");
447        }
448
449        if (seedlists.size() <= 1) {
450            throw new PermissionDenied("Can not remove the last seedlist:" + name + ";");
451        }
452
453        for (String cfgname : domainConfigurations.keySet()) {
454            DomainConfiguration cfg = domainConfigurations.get(cfgname);
455
456            for (Iterator<SeedList> i = cfg.getSeedLists(); i.hasNext();) {
457                SeedList seedlist = i.next();
458
459                if (seedlist.getName().equals(name)) {
460                    throw new PermissionDenied("The seedlist:" + name + "; is used by the configuration:" + cfgname
461                            + ";");
462                }
463            }
464        }
465
466        // if we get here without an exception - the seedlist is not in use
467        seedlists.remove(name);
468    }
469
470    /**
471     * Removes a password from this Domain. The password must not be in use by any of the configurations, otherwise a
472     * PermissionDenied exception is thrown.
473     *
474     * @param name the name of the password to remove
475     * @throws PermissionDenied if the password is in use by a configuration or this is the last password in this Domain
476     * @throws UnknownID if the no password exists with the name
477     * @throws ArgumentNotValid if a null argument is supplied
478     */
479    public void removePassword(String name) {
480        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
481
482        if (!passwords.containsKey(name)) {
483            throw new UnknownID("Password has not been registered:" + name + "; in the domain:" + getName() + ";");
484        }
485
486        for (String cfgname : domainConfigurations.keySet()) {
487            DomainConfiguration cfg = domainConfigurations.get(cfgname);
488
489            if (cfg.usesPassword(name)) {
490                throw new PermissionDenied("The password:" + name + "; is used by the configuration:" + cfgname + ";");
491            }
492        }
493
494        // if we get here without an exception - the password is not in use
495        passwords.remove(name);
496    }
497
498    /**
499     * Removes a configuration from this domain. The default configuration can not be removed, instead PermissionDenied
500     * is thrown. It is not possible to remove a configuration that is referenced by one or more HarvestDefinitions
501     *
502     * @param configName The name of a configuration to remove.
503     * @throws ArgumentNotValid if name is null or empty
504     * @throws PermissionDenied if the default configuration is attempted removed or if one or more HarvestDefinitions
505     * reference the configuration
506     */
507    public void removeConfiguration(String configName) {
508        ArgumentNotValid.checkNotNullOrEmpty(configName, "configName");
509
510        if (defaultConfigName.equals(configName)) {
511            throw new PermissionDenied("The default configuration can not be removed:" + configName + ";");
512        }
513
514        if (!domainConfigurations.containsKey(configName)) {
515            throw new UnknownID("Configuration not registered:" + configName + ";");
516        }
517
518        // Test that no harvest definition uses this configuration
519        final DomainDAO dao = DomainDAO.getInstance();
520        if (!dao.mayDelete(getConfiguration(configName))) {
521            // Since this is an error case, spend a little time getting better
522            // info. This could be done a lot faster by adding a function to
523            // the DomainDAO.
524            HarvestDefinitionDAO hddao = HarvestDefinitionDAO.getInstance();
525            Iterator<HarvestDefinition> hds = hddao.getAllHarvestDefinitions();
526            List<String> usages = new ArrayList<String>();
527            while (hds.hasNext()) {
528                HarvestDefinition hd = hds.next();
529                Iterator<DomainConfiguration> configs = hd.getDomainConfigurations();
530                while (configs.hasNext()) {
531                    DomainConfiguration dc = configs.next();
532                    if (dc.getName().equals(configName) && dc.getDomainName().equals(getName())) {
533                        usages.add(hd.getName());
534                    }
535                }
536            }
537            throw new PermissionDenied("Cannot delete domain configuration '" + configName + "', because it is used "
538                    + "by the following " + "harvest definitions: " + usages);
539        }
540
541        domainConfigurations.remove(configName);
542    }
543
544    /**
545     * Gets all configurations belonging to this domain.
546     *
547     * @return all configurations belonging to this domain.
548     */
549    public Iterator<DomainConfiguration> getAllConfigurations() {
550        return domainConfigurations.values().iterator();
551    }
552
553    /**
554     * Get all seedlists belonging to this domain.
555     *
556     * @return all seedlists belonging to this domain
557     */
558    public Iterator<SeedList> getAllSeedLists() {
559        return seedlists.values().iterator();
560    }
561
562    /**
563     * Return the passwords defined for this domain.
564     *
565     * @return Iterator<Password> of known passwords.
566     */
567    public Iterator<Password> getAllPasswords() {
568        return passwords.values().iterator();
569    }
570
571    /**
572     * Gets all configurations belonging to this domain. The returned list is sorted by name according to language given
573     * in the parameter.
574     *
575     * @param loc contains the language sorting must adhere to
576     * @return all configurations belonging to this domain sorted according to language
577     */
578    public List<DomainConfiguration> getAllConfigurationsAsSortedList(Locale loc) {
579        ArgumentNotValid.checkNotNull(loc, "loc");
580        List<DomainConfiguration> resultSet = new ArrayList<DomainConfiguration>(domainConfigurations.values());
581        NamedUtils.sortNamedObjectList(loc, resultSet);
582        return resultSet;
583    }
584
585    /**
586     * Gets all seedlists belonging to this domain. The returned list is sorted by name according to language given in
587     * the parameter.
588     *
589     * @param loc contains the language sorting must adhere to
590     * @return all seedlists belonging to this domain sorted according to language
591     */
592    public List<SeedList> getAllSeedListsAsSortedList(Locale loc) {
593        ArgumentNotValid.checkNotNull(loc, "loc");
594        List<SeedList> resultSet = new ArrayList<SeedList>(seedlists.values());
595        NamedUtils.sortNamedObjectList(loc, resultSet);
596        return resultSet;
597    }
598
599    /**
600     * Returns the passwords defined for this domain. The returned list is sorted by name according to language given in
601     * the parameter.
602     *
603     * @param loc contains the language sorting must adhere to
604     * @return a sorted list of known passwords according to language
605     */
606    public List<Password> getAllPasswordsAsSortedList(Locale loc) {
607        ArgumentNotValid.checkNotNull(loc, "loc");
608        List<Password> resultSet = new ArrayList<Password>(passwords.values());
609        NamedUtils.sortNamedObjectList(loc, resultSet);
610        return resultSet;
611    }
612
613    /**
614     * Add owner information.
615     *
616     * @param owner owner
617     */
618    public void addOwnerInfo(DomainOwnerInfo owner) {
619        ArgumentNotValid.checkNotNull(owner, "owner");
620        domainOwnerInfos.add(owner);
621    }
622
623    /**
624     * Get array of domain owner information.
625     *
626     * @return array containing information about the domain owner(s)
627     */
628    public DomainOwnerInfo[] getAllDomainOwnerInfo() {
629        return domainOwnerInfos.toArray(new DomainOwnerInfo[0]);
630    }
631
632    /**
633     * Get password information.
634     *
635     * @param name the id of the password settings to retrieve
636     * @return the password information
637     * @throws UnknownID if no password info exists with the id "name"
638     */
639    public Password getPassword(String name) {
640        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
641
642        if (!passwords.containsKey(name)) {
643            throw new UnknownID("Password has not been registered:" + name + "; in the domain:" + getName() + ";");
644        }
645
646        return passwords.get(name);
647    }
648
649    /**
650     * Set the comments for this domain.
651     *
652     * @param comments The new comments (can be null)
653     */
654    public void setComments(String comments) {
655        this.comments = comments;
656    }
657
658    /**
659     * Replaces existing configuration with cfg, using cfg.getName() as the id for the configuration.
660     *
661     * @param cfg the configuration to update
662     * @throws UnknownID if no configuration exists with the id cfg.getName(). ArgumentNotValid if cfg is null.
663     */
664    public void updateConfiguration(DomainConfiguration cfg) {
665        ArgumentNotValid.checkNotNull(cfg, "cfg");
666
667        if (!domainConfigurations.containsKey(cfg.getName())) {
668            throw new UnknownID("No configuration exists with the name:" + cfg.getName() + "; in the domain:"
669                    + getName() + ";");
670        }
671
672        putConfiguration(cfg);
673    }
674
675    /**
676     * Returns true if this domain has the named password.
677     *
678     * @param passwordName the identifier of the password info
679     * @return true if this domain has password info with id passwordname
680     */
681    public boolean hasPassword(String passwordName) {
682        return passwords.containsKey(passwordName);
683    }
684
685    /**
686     * Returns true if this domain has the named configuration.
687     *
688     * @param configName the identifier of the configuration
689     * @return true if this domain has a configuration with id configNmae
690     */
691    public boolean hasConfiguration(String configName) {
692        return domainConfigurations.containsKey(configName);
693    }
694
695    /**
696     * Get the edition number.
697     *
698     * @return the edition number
699     */
700    public long getEdition() {
701        return edition;
702    }
703
704    /**
705     * Set the edition number.
706     *
707     * @param theNewEdition the new edition
708     */
709    public void setEdition(long theNewEdition) {
710        edition = theNewEdition;
711    }
712
713    /**
714     * Get the ID of this domain. Only for use by DBDAO
715     *
716     * @return Get the ID of this domain
717     */
718    public long getID() {
719        return id;
720    }
721
722    /**
723     * Set the ID of this domain. Only for use by DBDAO.
724     *
725     * @param newId The new ID for this domain.
726     */
727    void setID(long newId) {
728        this.id = newId;
729    }
730
731    /**
732     * Check if this harvestinfo has an ID set yet (doesn't happen until the DBDAO persists it).
733     *
734     * @return true, if this domain has an ID different from null
735     */
736    boolean hasID() {
737        return id != null;
738    }
739
740    /**
741     * Return a human-readable representation of this object.
742     *
743     * @return Some string identifying the object. Do not use this for machine processing.
744     */
745    public String toString() {
746        StringBuilder sb = new StringBuilder();
747        sb.append("Domain:").append(getName()).append(";\n");
748        sb.append("Comment:").append(getComments()).append(";\n");
749
750        sb.append("Configurations:\n");
751
752        for (String cfgName : domainConfigurations.keySet()) {
753            sb.append("\t").append(cfgName).append(";\n");
754        }
755
756        sb.append("Seedlists:\n");
757
758        for (String seedName : seedlists.keySet()) {
759            sb.append("\t").append(seedName).append(";\n");
760        }
761
762        sb.append("Passwords:\n");
763
764        for (String pwName : passwords.keySet()) {
765            sb.append("\t").append(pwName).append(";\n");
766        }
767
768        sb.append("Extended Fields:\n");
769
770        for (int i = 0; i < extendedFieldValues.size(); i++) {
771            ExtendedFieldValue efv = extendedFieldValues.get(i);
772            sb.append("\t").append(efv.getExtendedFieldID() + ": " + efv.getContent()).append(";\n");
773        }
774
775        sb.append("---------------\n");
776
777        return sb.toString();
778    }
779
780    /**
781     * Sets a list of regular expressions defining urls that should never be harvested from this domain. The list (after
782     * trimming the strings, and any empty strings have been removed) is copied to a list that is stored immutably.
783     *
784     * @param regExps The list defining urls never to be harvested.
785     * @param strictMode If true, we throw ArgumentNotValid exception if invalid regexps are found
786     * @throws ArgumentNotValid if regExps is null or regExps contains invalid regular expressions (unless strictMode is
787     * false).
788     */
789    public void setCrawlerTraps(List<String> regExps, boolean strictMode) {
790        ArgumentNotValid.checkNotNull(regExps, "List<String> regExps");
791        List<String> cleanedListOfCrawlerTraps = new ArrayList<String>();
792        for (String crawlerTrap : regExps) {
793            log.trace("original trap: '" + crawlerTrap + "'");
794            String trimmedString = crawlerTrap.trim();
795            log.trace("trimmed  trap: '" + trimmedString + "'");
796            if (!(trimmedString.length() == 0)) {
797                cleanedListOfCrawlerTraps.add(crawlerTrap);
798            } else {
799                log.trace("Removed empty string from list of crawlertraps");
800            }
801        }
802        // Validate regexps
803        for (String regexp : cleanedListOfCrawlerTraps) {
804            try {
805                Pattern.compile(regexp);
806            } catch (PatternSyntaxException e) {
807                final String errMsg = "The regular expression '" + regexp + "' is invalid. "
808                        + "Please correct the expression.";
809                if (strictMode) {
810                    throw new ArgumentNotValid(errMsg, e);
811                } else {
812                    log.warn(errMsg, e);
813                }
814            }
815        }
816        crawlerTraps = Collections.unmodifiableList(cleanedListOfCrawlerTraps);
817    }
818
819    /**
820     * Returns the list of regexps never to be harvested from this domain, or the empty list if none. The returned list
821     * should never be null.
822     *
823     * @return The list of regexps of url's never to be harvested when harvesting this domain. This list is immutable.
824     */
825    public List<String> getCrawlerTraps() {
826        return crawlerTraps;
827    }
828
829    /**
830     * Returns the alias info for this domain, or null if this domain is not an alias.
831     *
832     * @return A domain name.
833     */
834    public AliasInfo getAliasInfo() {
835        return aliasInfo;
836    }
837
838    /**
839     * Update which domain this domain is considered an alias of. Calling this function will a) cause some slightly
840     * expensive checks to be performed, and b) set the time of last update. For object construction and copying, use
841     * setAlias.
842     *
843     * @param alias The name (e.g. "netarkivet.dk") of the domain that this domain is an alias of.
844     * @throws UnknownID If the given domain does not exist
845     * @throws IllegalState If updating the alias info would violate constraints of alias: No transitivity, no
846     * reflection.
847     */
848    public void updateAlias(String alias) {
849        if (getName().equals(alias)) {
850            String message = "Cannot make domain '" + this.getName() + "' an alias of itself";
851            log.debug(message);
852            throw new IllegalState(message);
853        }
854
855        if (alias != null) {
856            DomainDAO dao = DomainDAO.getInstance();
857            Domain otherD = dao.read(alias);
858            if (otherD.aliasInfo != null) {
859                String message = "Cannot make domain '" + this.getName() + "' an alias of '" + otherD.getName() + "',"
860                        + " as that domain is already an alias of '" + otherD.aliasInfo.getAliasOf() + "'";
861                log.debug(message);
862                throw new IllegalState(message);
863            }
864            if (dao.getAliases(getName()).size() != 0) {
865                List<String> aliasesForThisDomain = new ArrayList<String>();
866                for (AliasInfo ai : dao.getAliases(getName())) {
867                    aliasesForThisDomain.add(ai.getDomain());
868                }
869                String message = "Cannot make domain '" + this.getName() + "' an alias of '" + otherD.getName() + "',"
870                        + " as the domains '" + StringUtils.conjoin(",", aliasesForThisDomain) + "' are "
871                        + "already aliases of '" + this.getName() + "'";
872                log.debug(message);
873                throw new IllegalState(message);
874            }
875            setAliasInfo(new AliasInfo(domainName, alias, new Date()));
876        } else {
877            setAliasInfo(null);
878        }
879    }
880
881    /**
882     * Set the alias field on this object. This function performs no checking of existence of transitivity of alias
883     * domains, but it does check that the alias info is for this domain
884     *
885     * @param aliasInfo Alias information
886     * @throws ArgumentNotValid if the alias info is not for this domain
887     */
888    void setAliasInfo(AliasInfo aliasInfo) {
889        if (aliasInfo != null && !aliasInfo.getDomain().equals(domainName)) {
890            throw new ArgumentNotValid("AliasInfo must be for this domain");
891        }
892        this.aliasInfo = aliasInfo;
893    }
894
895    /**
896     * Gets the harvest info giving best information for expectation or how many objects a harvest using a given
897     * configuration will retrieve, we will prioritise the most recently harvest, where we have a full harvest.
898     *
899     * @param configName The name of the configuration
900     * @return The Harvest Information for the harvest defining the best expectation, including the number retrieved and
901     * the stop reason.
902     */
903    public HarvestInfo getBestHarvestInfoExpectation(String configName) {
904        ArgumentNotValid.checkNotNullOrEmpty(configName, "String configName");
905        return DomainHistory.getBestHarvestInfoExpectation(configName, this.getHistory());
906    }
907
908    /**
909     * All derived classes allow ExtendedFields from Type ExtendedFieldTypes.DOMAIN
910     *
911     * @return ExtendedFieldTypes.DOMAIN
912     */
913    protected int getExtendedFieldType() {
914        return ExtendedFieldTypes.DOMAIN;
915    }
916
917}