001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.io.BufferedReader; 027import java.io.IOException; 028import java.io.InputStream; 029import java.io.InputStreamReader; 030import java.util.HashSet; 031import java.util.List; 032import java.util.Set; 033import java.util.regex.Pattern; 034import java.util.regex.PatternSyntaxException; 035 036import dk.netarkivet.common.exceptions.ArgumentNotValid; 037import dk.netarkivet.common.exceptions.IOFailure; 038 039/** 040 * Class representing one or more global crawler traps, modelled as a list of regular expressions. 041 */ 042public class GlobalCrawlerTrapList { 043 044 /** The unique id of this collection of crawler traps. */ 045 private int id; 046 047 /** 048 * The list of traps. Each item is a regular expression matching url's to be avoided. In the database, (id, trap) is 049 * a primary key for the table global_crawler_trap_expressions so we model the traps as a Set to avoid possible 050 * duplicates. 051 */ 052 private Set<String> traps; 053 054 /** A unique name by which this list is identified. */ 055 private String name; 056 057 /** A free-text description of the traps in this collection. */ 058 private String description; 059 060 /** Whether or not this set of traps is active (in use). */ 061 private boolean isActive; 062 063 /** 064 * Protected constructor used by the DAO to create instances of this class. 065 * 066 * @param id the id of this list. 067 * @param name a name by which this list is known. 068 * @param traps the set of trap expressions. 069 * @param description A textual description of this list (may be null). 070 * @param isActive flag indicating whether this list is isActive. 071 * @throws ArgumentNotValid if the name is empty or null. 072 */ 073 protected GlobalCrawlerTrapList(int id, List<String> traps, String name, String description, boolean isActive) 074 throws ArgumentNotValid { 075 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 076 ArgumentNotValid.checkNotNull(traps, "traps"); 077 this.id = id; 078 this.traps = new HashSet<String>(traps.size()); 079 this.traps.addAll(traps); 080 this.description = description; 081 this.isActive = isActive; 082 this.name = name; 083 } 084 085 /** 086 * Construct a new GlobalCrawlerTrapList from an input stream consisting of newline-separated regular expressions. 087 * 088 * @param is an input stream from which the list of trap expressions can be read. 089 * @param name a name by which this list is known. 090 * @param description A textual description of this list. 091 * @param isActive flag indicating whether this list is isActive. 092 * @throws IOFailure if the input stream cannot be found or read. 093 * @throws ArgumentNotValid if the input stream is null or the name is null or empty. 094 */ 095 public GlobalCrawlerTrapList(InputStream is, String name, String description, boolean isActive) throws IOFailure, 096 ArgumentNotValid { 097 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 098 ArgumentNotValid.checkNotNull(is, "is"); 099 this.traps = new HashSet<String>(); 100 this.isActive = isActive; 101 this.name = name; 102 if (description == null) { 103 this.description = ""; 104 } else { 105 this.description = description; 106 } 107 setTrapsFromInputStream(is); 108 } 109 110 /** 111 * A utility method to read the list of traps from an InputStream, line-by-line. 112 * 113 * @param is The input stream from which to read. 114 * @throws IOFailure if the input stream cannot be read. 115 * @throws ArgumentNotValid if the input stream is null or if any of the specified traps are not valid regular 116 * expressions. 117 */ 118 public void setTrapsFromInputStream(InputStream is) throws ArgumentNotValid { 119 ArgumentNotValid.checkNotNull(is, "is"); 120 traps.clear(); 121 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 122 String line; 123 try { 124 while ((line = reader.readLine()) != null) { 125 final String trap = line.trim(); 126 try { 127 Pattern.compile(trap); 128 } catch (PatternSyntaxException e) { 129 throw new ArgumentNotValid("Cannot parse the string '" + trap + "' as a Java regular expression.", 130 e); 131 } 132 traps.add(trap); 133 } 134 } catch (IOException e) { 135 throw new IOFailure("Could not read crawler traps", e); 136 } 137 } 138 139 /** 140 * Get the id of this list. 141 * 142 * @return the id. 143 */ 144 public int getId() { 145 return id; 146 } 147 148 /** 149 * Set the id of this list. 150 * 151 * @param id the id. 152 */ 153 protected void setId(int id) { 154 this.id = id; 155 } 156 157 /** 158 * Get the name of the list. 159 * 160 * @return the name. 161 */ 162 public String getName() { 163 return name; 164 } 165 166 /** 167 * Set the name of the list. 168 * 169 * @param name the name. 170 */ 171 public void setName(String name) { 172 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 173 this.name = name; 174 } 175 176 /** 177 * Get the trap expressions for this list. 178 * 179 * @return the trap expressions. 180 */ 181 public Set<String> getTraps() { 182 return traps; 183 } 184 185 /** 186 * Set the trap expressions for this list. 187 * 188 * @param traps the trap expressions. 189 */ 190 public void setTraps(Set<String> traps) { 191 ArgumentNotValid.checkNotNull(traps, "traps"); 192 this.traps = traps; 193 } 194 195 /** 196 * Get the description of this list. 197 * 198 * @return the description. 199 */ 200 public String getDescription() { 201 return description; 202 } 203 204 /** 205 * Set the description of this list. 206 * 207 * @param description the description. 208 */ 209 public void setDescription(String description) { 210 ArgumentNotValid.checkNotNull(description, "description"); 211 this.description = description; 212 } 213 214 /** 215 * Retruns true if this list is active. 216 * 217 * @return the activity state of the list. 218 */ 219 public boolean isActive() { 220 return isActive; 221 } 222 223 /** 224 * Set the activity state of the list. 225 * 226 * @param active the activity state. 227 */ 228 public void setActive(boolean active) { 229 isActive = active; 230 } 231 232 @Override 233 public boolean equals(Object o) { 234 if (this == o) { 235 return true; 236 } 237 if (o == null || getClass() != o.getClass()) { 238 return false; 239 } 240 241 GlobalCrawlerTrapList that = (GlobalCrawlerTrapList) o; 242 243 if (id != that.id) { 244 return false; 245 } 246 if (isActive != that.isActive) { 247 return false; 248 } 249 if (description != null ? !description.equals(that.description) : that.description != null) { 250 return false; 251 } 252 if (name != null ? !name.equals(that.name) : that.name != null) { 253 return false; 254 } 255 if (traps != null ? !traps.equals(that.traps) : that.traps != null) { 256 return false; 257 } 258 259 return true; 260 } 261 262 @Override 263 public int hashCode() { 264 int result = id; 265 result = 31 * result + (traps != null ? traps.hashCode() : 0); 266 result = 31 * result + (name != null ? name.hashCode() : 0); 267 result = 31 * result + (description != null ? description.hashCode() : 0); 268 result = 31 * result + (isActive ? 1 : 0); 269 return result; 270 } 271 272}