001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.datamodel;
024
025import java.io.BufferedReader;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileNotFoundException;
029import java.io.IOException;
030import java.io.InputStreamReader;
031import java.util.ArrayList;
032import java.util.Date;
033import java.util.List;
034import java.util.Locale;
035
036import javax.servlet.jsp.JspWriter;
037
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.utils.DomainUtils;
044import dk.netarkivet.common.utils.I18n;
045import dk.netarkivet.common.utils.StringUtils;
046import dk.netarkivet.harvester.Constants;
047
048/**
049 * Utility class for ingesting new domains into the database.
050 */
051public class IngestDomainList {
052
053    /** The logger. */
054    protected static final Logger log = LoggerFactory.getLogger(IngestDomainList.class);
055
056    /** I18n bundle used by this class. */
057    private static final I18n I18N = new dk.netarkivet.common.utils.I18n(Constants.TRANSLATIONS_BUNDLE);
058
059    /** How often to log progress. */
060    private static final int PRINT_INTERVAL = 10000;
061
062    /** Connection to the persistent store of Domains. */
063    private DomainDAO dao;
064
065    /**
066     * Constructor for the IngestDomainList class. It makes a connection to the Domains store.
067     */
068    public IngestDomainList() {
069        dao = DomainDAO.getInstance();
070    }
071
072    /**
073     * Adds all new domains from a newline-separated file of domain names. The file is assumed to be in the UTF-8
074     * format. For large files, a line is printed to the log, and to the out variable (if not set to null), every
075     * PRINT_INTERVAL lines.
076     *
077     * @param domainList the file containing the domain names.
078     * @param out a stream to which output can be sent. May be null.
079     * @param theLocale the given Locale
080     */
081    public void updateDomainInfo(File domainList, JspWriter out, Locale theLocale) {
082        ArgumentNotValid.checkNotNull(domainList, "File domainList");
083        ArgumentNotValid.checkNotNull(theLocale, "Locale theLocale");
084        Domain myDomain;
085        String domainName;
086        BufferedReader in = null;
087        int countDomains = 0;
088        List<String> invalidDomains = new ArrayList<String>();
089        int countCreatedDomains = 0;
090        boolean print = (out != null);
091        try {
092            in = new BufferedReader(new InputStreamReader(new FileInputStream(domainList), "UTF-8"));
093
094            while ((domainName = in.readLine()) != null) {
095                try {
096                    countDomains++;
097                    if ((countDomains % PRINT_INTERVAL) == 0) {
098                        Date d = new Date();
099                        String msg = "Domain #" + countDomains + ": " + domainName + " added at " + d;
100                        log.info(msg);
101                        if (print) {
102                            out.print(I18N.getString(theLocale, "domain.number.0.1.added.at.2", countDomains,
103                                    domainName, d));
104                            out.print("<br/>");
105                            out.flush();
106                        }
107                    }
108
109                    if (DomainUtils.isValidDomainName(domainName)) {
110                        if (!dao.exists(domainName)) {
111                            myDomain = Domain.getDefaultDomain(domainName);
112                            dao.create(myDomain);
113                            countCreatedDomains++;
114                        }
115                    } else {
116                        log.debug("domain '{}' is not a valid domain Name", domainName);
117                        invalidDomains.add(domainName);
118                        if (print) {
119                            out.print(I18N.getString(theLocale, "errormsg;domain.0.is.not.a.valid" + ".domainname",
120                                    domainName));
121                            out.print("<br/>");
122                            out.flush();
123                        }
124                    }
125                } catch (Exception e) {
126                    log.debug("Could not create domain '{}'", domainName, e);
127                    if (print) {
128                        out.print(I18N.getString(theLocale, "errormsg;unable.to.create" + ".domain.0.due.to.error.1",
129                                domainName, e.getMessage()));
130                        out.print("<br/>\n");
131                        out.flush();
132                    }
133                }
134            }
135            log.info("Looked at {} domains, created {} new domains and found {} invalid domains", countDomains, countCreatedDomains, invalidDomains.size());
136            if (!invalidDomains.isEmpty()) {
137                log.warn("Found the following {} invalid domains during ingest", invalidDomains.size(), StringUtils.conjoin(",", invalidDomains));
138            }
139        } catch (FileNotFoundException e) {
140            String msg = "File '" + domainList.getAbsolutePath() + "' not found";
141            log.debug(msg);
142            throw new IOFailure(msg, e);
143        } catch (IOException e) {
144            String msg = " Can't read the domain-file '" + domainList.getAbsolutePath() + "'.";
145            log.debug(msg);
146            throw new IOFailure(msg, e);
147        } finally {
148            try {
149                if (in != null) {
150                    in.close();
151                }
152            } catch (IOException e) {
153                throw new IOFailure("Problem closing input stream", e);
154            }
155        }
156    }
157
158}