001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.datamodel;
024
025import java.io.BufferedReader;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileNotFoundException;
029import java.io.IOException;
030import java.io.InputStreamReader;
031import java.util.ArrayList;
032import java.util.Date;
033import java.util.List;
034import java.util.Locale;
035
036import javax.servlet.jsp.JspWriter;
037
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import dk.netarkivet.common.exceptions.ArgumentNotValid;
042import dk.netarkivet.common.exceptions.IOFailure;
043import dk.netarkivet.common.utils.DomainUtils;
044import dk.netarkivet.common.utils.I18n;
045import dk.netarkivet.common.utils.StringUtils;
046import dk.netarkivet.harvester.Constants;
047
048/**
049 * Utility class for ingesting new domains into the database.
050 */
051public class IngestDomainList {
052
053    /** The logger. */
054    protected static final Logger log = LoggerFactory.getLogger(IngestDomainList.class);
055
056    /** I18n bundle used by this class. */
057    private static final I18n I18N = new dk.netarkivet.common.utils.I18n(Constants.TRANSLATIONS_BUNDLE);
058
059    /** How often to log progress. */
060    private static final int PRINT_INTERVAL = 10000;
061
062    /** Connection to the persistent store of Domains. */
063    private DomainDAO dao;
064
065    /**
066     * Constructor for the IngestDomainList class. It makes a connection to the Domains store.
067     */
068    public IngestDomainList() {
069        dao = DomainDAO.getInstance();
070    }
071
072    /**
073     * Adds all new domains from a newline-separated file of domain names. The file is assumed to be in the UTF-8
074     * format. For large files, a line is printed to the log, and to the out variable (if not set to null), every
075     * PRINT_INTERVAL lines.
076     *
077     * @param domainList the file containing the domain names.
078     * @param out a stream to which output can be sent. May be null.
079     * @param theLocale the given Locale
080     */
081    public void updateDomainInfo(File domainList, JspWriter out, Locale theLocale) {
082        ArgumentNotValid.checkNotNull(domainList, "File domainList");
083        ArgumentNotValid.checkNotNull(theLocale, "Locale theLocale");
084        Domain myDomain;
085        String domainName;
086        BufferedReader in = null;
087        int countDomains = 0;
088        List<String> invalidDomains = new ArrayList<String>();
089        int countCreatedDomains = 0;
090        boolean print = (out != null);
091        try {
092            in = new BufferedReader(new InputStreamReader(new FileInputStream(domainList), "UTF-8"));
093
094            while ((domainName = in.readLine()) != null) {
095                domainName = domainName.trim();
096                if (domainName.isEmpty()) {
097                        continue; // Skip empty lines
098                }
099                try {
100                    countDomains++;
101                    if ((countDomains % PRINT_INTERVAL) == 0) {
102                        Date d = new Date();
103                        String msg = "Domain #" + countDomains + ": " + domainName + " added at " + d;
104                        log.info(msg);
105                        if (print) {
106                            out.print(I18N.getString(theLocale, "domain.number.0.1.added.at.2", countDomains,
107                                    domainName, d));
108                            out.print("<br/>");
109                            out.flush();
110                        }
111                    }
112
113                    if (DomainUtils.isValidDomainName(domainName)) {
114                        if (!dao.exists(domainName)) {
115                            myDomain = Domain.getDefaultDomain(domainName);
116                            dao.create(myDomain);
117                            countCreatedDomains++;
118                        }
119                    } else {
120                        log.debug("domain '{}' is not a valid domain Name", domainName);
121                        invalidDomains.add(domainName);
122                        if (print) {
123                            out.print(I18N.getString(theLocale, "errormsg;domain.0.is.not.a.valid" + ".domainname",
124                                    domainName));
125                            out.print("<br/>");
126                            out.flush();
127                        }
128                    }
129                } catch (Exception e) {
130                    log.debug("Could not create domain '{}'", domainName, e);
131                    if (print) {
132                        out.print(I18N.getString(theLocale, "errormsg;unable.to.create" + ".domain.0.due.to.error.1",
133                                domainName, e.getMessage()));
134                        out.print("<br/>\n");
135                        out.flush();
136                    }
137                }
138            }
139            log.info("Looked at {} domains, created {} new domains and found {} invalid domains", countDomains, countCreatedDomains, invalidDomains.size());
140            if (!invalidDomains.isEmpty()) {
141                log.warn("Found the following {} invalid domains during ingest", invalidDomains.size(), StringUtils.conjoin(",", invalidDomains));
142            }
143        } catch (FileNotFoundException e) {
144            String msg = "File '" + domainList.getAbsolutePath() + "' not found";
145            log.debug(msg);
146            throw new IOFailure(msg, e);
147        } catch (IOException e) {
148            String msg = " Can't read the domain-file '" + domainList.getAbsolutePath() + "'.";
149            log.debug(msg);
150            throw new IOFailure(msg, e);
151        } finally {
152            try {
153                if (in != null) {
154                    in.close();
155                }
156            } catch (IOException e) {
157                throw new IOFailure("Problem closing input stream", e);
158            }
159        }
160    }
161
162}