001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.io.BufferedReader;
027import java.io.IOException;
028import java.io.PrintWriter;
029import java.io.Serializable;
030import java.io.StringReader;
031import java.io.StringWriter;
032import java.util.Iterator;
033import java.util.LinkedList;
034import java.util.List;
035import java.util.regex.Pattern;
036
037import org.apache.commons.io.IOUtils;
038
039import dk.netarkivet.common.exceptions.ArgumentNotValid;
040import dk.netarkivet.common.exceptions.IOFailure;
041import dk.netarkivet.common.utils.Named;
042import dk.netarkivet.common.utils.Settings;
043import dk.netarkivet.harvester.HarvesterSettings;
044
045/**
046 * Representation of the list of harvesting seeds. Basically just a list of URL's.
047 */
048@SuppressWarnings({"serial"})
049public class SeedList implements Serializable, Named {
050
051    /** The name of the seedlist. Used for sorting. */
052    private String name;
053    /** The List of Seeds; Each String in the List holds one seed. */
054    private List<String> seeds;
055    /** Any comments associated with this seedlist. */
056    private String comments;
057
058    /** ID autogenerated by DB, ignored otherwise. */
059    private Long id;
060
061    /**
062     * Create new seedlist. Helper constructor that takes the seeds as a newline separated string.
063     *
064     * @param name the name of the new seedlist
065     * @param seedsAsString the seeds
066     * @throws ArgumentNotValid if name is null or empty or seeds are null. Empty seeds are allowed.
067     */
068    public SeedList(String name, String seedsAsString) {
069        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
070        ArgumentNotValid.checkNotNull(seedsAsString, "seeds");
071
072        BufferedReader urlreader = new BufferedReader(new StringReader(seedsAsString));
073        seeds = new LinkedList<String>();
074        String url;
075        try {
076            while ((url = urlreader.readLine()) != null) {
077                if (isAcceptableURL(url)) {
078                    seeds.add(url);
079                } else {
080                    throw new ArgumentNotValid("The URL '" + url + "' is not valid");
081                }
082            }
083        } catch (IOException e) {
084            throw new IOFailure("Should never happen: " + "IO Failure while reading a string", e);
085        }
086        this.name = name;
087        this.comments = "";
088    }
089
090    /**
091     * Check urls for validity. Valid seeds are controlled by a configurable regular expression
092     *
093     * @param url The url to check
094     * @return true, if it is accepted
095     * @see {@link HarvesterSettings#VALID_SEED_REGEX}.
096     */
097    private boolean isAcceptableURL(String url) {
098        Pattern validSeedPattern = Pattern.compile(Settings.get(HarvesterSettings.VALID_SEED_REGEX));
099        if (!validSeedPattern.matcher(url).matches()) {
100            return false;
101        }
102        return true;
103    }
104
105    /**
106     * Create new seedlist.
107     *
108     * @param name the name of the new seedlist
109     * @param seeds the seeds
110     * @throws ArgumentNotValid if the arguments are null or empty strings
111     */
112    public SeedList(String name, List<String> seeds) {
113        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
114        ArgumentNotValid.checkNotNullOrEmpty(seeds, "seeds");
115
116        this.name = name;
117        this.seeds = seeds;
118        this.comments = "";
119    }
120
121    /**
122     * Gets all seeds in a list.
123     *
124     * @return The seeds
125     */
126    public List<String> getSeeds() {
127        return seeds;
128    }
129
130    /**
131     * Gets the seeds. Seeds are separated by newline,
132     *
133     * @return the seedlist as a String
134     */
135    public String getSeedsAsString() {
136        StringWriter urls = new StringWriter();
137        PrintWriter urlwriter = new PrintWriter(urls);
138        for (Iterator<String> i = seeds.iterator(); i.hasNext();) {
139            String url = i.next();
140            urlwriter.println(url);
141        }
142        urlwriter.flush();
143        String tmp = urls.toString();
144        IOUtils.closeQuietly(urls);
145        return tmp;
146    }
147
148    /**
149     * Get the name of this seedlist.
150     *
151     * @return The name.
152     */
153    public String getName() {
154        return name;
155    }
156
157    /**
158     * Get the comments of this seedlist.
159     *
160     * @return The comments.
161     */
162    public String getComments() {
163        return comments;
164    }
165
166    /**
167     * Set the comments for this list.
168     *
169     * @param s User-entered free-form comments.
170     */
171    public void setComments(String s) {
172        comments = s;
173    }
174
175    /**
176     * Get the ID of this seedlist. Only for use by DBDAO
177     *
178     * @return the ID of this seedlist
179     */
180    long getID() {
181        return id;
182    }
183
184    /**
185     * Set the ID of this seedlist. Only for use by DBDAO
186     *
187     * @param newID the new ID of this seedlist
188     */
189    void setID(long newID) {
190        this.id = newID;
191    }
192
193    /**
194     * Check if this seedlist has an ID set yet (doesn't happen until the DBDAO persists it).
195     *
196     * @return true if this seedlist has an ID set
197     */
198    boolean hasID() {
199        return id != null;
200    }
201
202    /**
203     * Returns a human-readable representation of the seeds.
204     *
205     * @return A readable string
206     */
207    public String toString() {
208        return getName() + ": " + seeds.toString();
209    }
210
211    /**
212     * Auto generated by IntelliJ IDEA.
213     *
214     * @param o The object to compare with
215     * @return Whether they are equal
216     */
217    public boolean equals(Object o) {
218        if (this == o) {
219            return true;
220        }
221        if (!(o instanceof SeedList)) {
222            return false;
223        }
224
225        final SeedList seedList = (SeedList) o;
226
227        if (!comments.equals(seedList.comments)) {
228            return false;
229        }
230        if (!name.equals(seedList.name)) {
231            return false;
232        }
233        if (!seeds.equals(seedList.seeds)) {
234            return false;
235        }
236
237        return true;
238    }
239
240    /**
241     * Auto generated by IntelliJ IDEA.
242     *
243     * @return hashcode
244     */
245    public int hashCode() {
246        int result;
247        result = name.hashCode();
248        result = 29 * result + seeds.hashCode();
249        result = 29 * result + comments.hashCode();
250        return result;
251    }
252
253}