001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.io.BufferedReader;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.InputStreamReader;
030import java.util.HashSet;
031import java.util.List;
032import java.util.Set;
033import java.util.regex.Pattern;
034import java.util.regex.PatternSyntaxException;
035
036import dk.netarkivet.common.exceptions.ArgumentNotValid;
037import dk.netarkivet.common.exceptions.IOFailure;
038
039/**
040 * Class representing one or more global crawler traps, modelled as a list of regular expressions.
041 */
042public class GlobalCrawlerTrapList {
043
044    /** The unique id of this collection of crawler traps. */
045    private int id;
046
047    /**
048     * The list of traps. Each item is a regular expression matching url's to be avoided. In the database, (id, trap) is
049     * a primary key for the table global_crawler_trap_expressions so we model the traps as a Set to avoid possible
050     * duplicates.
051     */
052    private Set<String> traps;
053
054    /** A unique name by which this list is identified. */
055    private String name;
056
057    /** A free-text description of the traps in this collection. */
058    private String description;
059
060    /** Whether or not this set of traps is active (in use). */
061    private boolean isActive;
062
063    /**
064     * Protected constructor used by the DAO to create instances of this class.
065     *
066     * @param id the id of this list.
067     * @param name a name by which this list is known.
068     * @param traps the set of trap expressions.
069     * @param description A textual description of this list (may be null).
070     * @param isActive flag indicating whether this list is isActive.
071     * @throws ArgumentNotValid if the name is empty or null.
072     */
073    protected GlobalCrawlerTrapList(int id, List<String> traps, String name, String description, boolean isActive)
074            throws ArgumentNotValid {
075        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
076        ArgumentNotValid.checkNotNull(traps, "traps");
077        this.id = id;
078        this.traps = new HashSet<String>(traps.size());
079        this.traps.addAll(traps);
080        this.description = description;
081        this.isActive = isActive;
082        this.name = name;
083    }
084
085    /**
086     * Construct a new GlobalCrawlerTrapList from an input stream consisting of newline-separated regular expressions.
087     *
088     * @param is an input stream from which the list of trap expressions can be read.
089     * @param name a name by which this list is known.
090     * @param description A textual description of this list.
091     * @param isActive flag indicating whether this list is isActive.
092     * @throws IOFailure if the input stream cannot be found or read.
093     * @throws ArgumentNotValid if the input stream is null or the name is null or empty.
094     */
095    public GlobalCrawlerTrapList(InputStream is, String name, String description, boolean isActive) throws IOFailure,
096            ArgumentNotValid {
097        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
098        ArgumentNotValid.checkNotNull(is, "is");
099        this.traps = new HashSet<String>();
100        this.isActive = isActive;
101        this.name = name;
102        if (description == null) {
103            this.description = "";
104        } else {
105            this.description = description;
106        }
107        setTrapsFromInputStream(is);
108    }
109
110    /**
111     * A utility method to read the list of traps from an InputStream, line-by-line.
112     *
113     * @param is The input stream from which to read.
114     * @throws IOFailure if the input stream cannot be read.
115     * @throws ArgumentNotValid if the input stream is null or if any of the specified traps are not valid regular
116     * expressions.
117     */
118    public void setTrapsFromInputStream(InputStream is) throws ArgumentNotValid {
119        ArgumentNotValid.checkNotNull(is, "is");
120        traps.clear();
121        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
122        String line;
123        try {
124            while ((line = reader.readLine()) != null) {
125                final String trap = line.trim();
126                try {
127                    Pattern.compile(trap);
128                } catch (PatternSyntaxException e) {
129                    throw new ArgumentNotValid("Cannot parse the string '" + trap + "' as a Java regular expression.",
130                            e);
131                }
132                traps.add(trap);
133            }
134        } catch (IOException e) {
135            throw new IOFailure("Could not read crawler traps", e);
136        }
137    }
138
139    /**
140     * Get the id of this list.
141     *
142     * @return the id.
143     */
144    public int getId() {
145        return id;
146    }
147
148    /**
149     * Set the id of this list.
150     *
151     * @param id the id.
152     */
153    protected void setId(int id) {
154        this.id = id;
155    }
156
157    /**
158     * Get the name of the list.
159     *
160     * @return the name.
161     */
162    public String getName() {
163        return name;
164    }
165
166    /**
167     * Set the name of the list.
168     *
169     * @param name the name.
170     */
171    public void setName(String name) {
172        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
173        this.name = name;
174    }
175
176    /**
177     * Get the trap expressions for this list.
178     *
179     * @return the trap expressions.
180     */
181    public Set<String> getTraps() {
182        return traps;
183    }
184
185    /**
186     * Set the trap expressions for this list.
187     *
188     * @param traps the trap expressions.
189     */
190    public void setTraps(Set<String> traps) {
191        ArgumentNotValid.checkNotNull(traps, "traps");
192        this.traps = traps;
193    }
194
195    /**
196     * Get the description of this list.
197     *
198     * @return the description.
199     */
200    public String getDescription() {
201        return description;
202    }
203
204    /**
205     * Set the description of this list.
206     *
207     * @param description the description.
208     */
209    public void setDescription(String description) {
210        ArgumentNotValid.checkNotNull(description, "description");
211        this.description = description;
212    }
213
214    /**
215     * Retruns true if this list is active.
216     *
217     * @return the activity state of the list.
218     */
219    public boolean isActive() {
220        return isActive;
221    }
222
223    /**
224     * Set the activity state of the list.
225     *
226     * @param active the activity state.
227     */
228    public void setActive(boolean active) {
229        isActive = active;
230    }
231
232    @Override
233    public boolean equals(Object o) {
234        if (this == o) {
235            return true;
236        }
237        if (o == null || getClass() != o.getClass()) {
238            return false;
239        }
240
241        GlobalCrawlerTrapList that = (GlobalCrawlerTrapList) o;
242
243        if (id != that.id) {
244            return false;
245        }
246        if (isActive != that.isActive) {
247            return false;
248        }
249        if (description != null ? !description.equals(that.description) : that.description != null) {
250            return false;
251        }
252        if (name != null ? !name.equals(that.name) : that.name != null) {
253            return false;
254        }
255        if (traps != null ? !traps.equals(that.traps) : that.traps != null) {
256            return false;
257        }
258
259        return true;
260    }
261
262    @Override
263    public int hashCode() {
264        int result = id;
265        result = 31 * result + (traps != null ? traps.hashCode() : 0);
266        result = 31 * result + (name != null ? name.hashCode() : 0);
267        result = 31 * result + (description != null ? description.hashCode() : 0);
268        result = 31 * result + (isActive ? 1 : 0);
269        return result;
270    }
271
272}