001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.io.BufferedReader; 027import java.io.IOException; 028import java.io.PrintWriter; 029import java.io.Serializable; 030import java.io.StringReader; 031import java.io.StringWriter; 032import java.util.Iterator; 033import java.util.LinkedList; 034import java.util.List; 035import java.util.regex.Pattern; 036 037import org.apache.commons.io.IOUtils; 038 039import dk.netarkivet.common.exceptions.ArgumentNotValid; 040import dk.netarkivet.common.exceptions.IOFailure; 041import dk.netarkivet.common.utils.Named; 042import dk.netarkivet.common.utils.Settings; 043import dk.netarkivet.harvester.HarvesterSettings; 044 045/** 046 * Representation of the list of harvesting seeds. Basically just a list of URL's. 047 */ 048@SuppressWarnings({"serial"}) 049public class SeedList implements Serializable, Named { 050 051 /** The name of the seedlist. Used for sorting. */ 052 private String name; 053 /** The List of Seeds; Each String in the List holds one seed. */ 054 private List<String> seeds; 055 /** Any comments associated with this seedlist. */ 056 private String comments; 057 058 /** ID autogenerated by DB, ignored otherwise. */ 059 private Long id; 060 061 /** 062 * Create new seedlist. Helper constructor that takes the seeds as a newline separated string. 063 * 064 * @param name the name of the new seedlist 065 * @param seedsAsString the seeds 066 * @throws ArgumentNotValid if name is null or empty or seeds are null. Empty seeds are allowed. 067 */ 068 public SeedList(String name, String seedsAsString) { 069 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 070 ArgumentNotValid.checkNotNull(seedsAsString, "seeds"); 071 072 BufferedReader urlreader = new BufferedReader(new StringReader(seedsAsString)); 073 seeds = new LinkedList<String>(); 074 String url; 075 try { 076 while ((url = urlreader.readLine()) != null) { 077 if (isAcceptableURL(url)) { 078 seeds.add(url); 079 } else { 080 throw new ArgumentNotValid("The URL '" + url + "' is not valid"); 081 } 082 } 083 } catch (IOException e) { 084 throw new IOFailure("Should never happen: " + "IO Failure while reading a string", e); 085 } 086 this.name = name; 087 this.comments = ""; 088 } 089 090 /** 091 * Check urls for validity. Valid seeds are controlled by a configurable regular expression 092 * 093 * @param url The url to check 094 * @return true, if it is accepted 095 * @see {@link HarvesterSettings#VALID_SEED_REGEX}. 096 */ 097 private boolean isAcceptableURL(String url) { 098 Pattern validSeedPattern = Pattern.compile(Settings.get(HarvesterSettings.VALID_SEED_REGEX)); 099 if (!validSeedPattern.matcher(url).matches()) { 100 return false; 101 } 102 return true; 103 } 104 105 /** 106 * Create new seedlist. 107 * 108 * @param name the name of the new seedlist 109 * @param seeds the seeds 110 * @throws ArgumentNotValid if the arguments are null or empty strings 111 */ 112 public SeedList(String name, List<String> seeds) { 113 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 114 ArgumentNotValid.checkNotNullOrEmpty(seeds, "seeds"); 115 116 this.name = name; 117 this.seeds = seeds; 118 this.comments = ""; 119 } 120 121 /** 122 * Gets all seeds in a list. 123 * 124 * @return The seeds 125 */ 126 public List<String> getSeeds() { 127 return seeds; 128 } 129 130 /** 131 * Gets the seeds. Seeds are separated by newline, 132 * 133 * @return the seedlist as a String 134 */ 135 public String getSeedsAsString() { 136 StringWriter urls = new StringWriter(); 137 PrintWriter urlwriter = new PrintWriter(urls); 138 for (Iterator<String> i = seeds.iterator(); i.hasNext();) { 139 String url = i.next(); 140 urlwriter.println(url); 141 } 142 urlwriter.flush(); 143 String tmp = urls.toString(); 144 IOUtils.closeQuietly(urls); 145 return tmp; 146 } 147 148 /** 149 * Get the name of this seedlist. 150 * 151 * @return The name. 152 */ 153 public String getName() { 154 return name; 155 } 156 157 /** 158 * Get the comments of this seedlist. 159 * 160 * @return The comments. 161 */ 162 public String getComments() { 163 return comments; 164 } 165 166 /** 167 * Set the comments for this list. 168 * 169 * @param s User-entered free-form comments. 170 */ 171 public void setComments(String s) { 172 comments = s; 173 } 174 175 /** 176 * Get the ID of this seedlist. Only for use by DBDAO 177 * 178 * @return the ID of this seedlist 179 */ 180 long getID() { 181 return id; 182 } 183 184 /** 185 * Set the ID of this seedlist. Only for use by DBDAO 186 * 187 * @param newID the new ID of this seedlist 188 */ 189 void setID(long newID) { 190 this.id = newID; 191 } 192 193 /** 194 * Check if this seedlist has an ID set yet (doesn't happen until the DBDAO persists it). 195 * 196 * @return true if this seedlist has an ID set 197 */ 198 boolean hasID() { 199 return id != null; 200 } 201 202 /** 203 * Returns a human-readable representation of the seeds. 204 * 205 * @return A readable string 206 */ 207 public String toString() { 208 return getName() + ": " + seeds.toString(); 209 } 210 211 /** 212 * Auto generated by IntelliJ IDEA. 213 * 214 * @param o The object to compare with 215 * @return Whether they are equal 216 */ 217 public boolean equals(Object o) { 218 if (this == o) { 219 return true; 220 } 221 if (!(o instanceof SeedList)) { 222 return false; 223 } 224 225 final SeedList seedList = (SeedList) o; 226 227 if (!comments.equals(seedList.comments)) { 228 return false; 229 } 230 if (!name.equals(seedList.name)) { 231 return false; 232 } 233 if (!seeds.equals(seedList.seeds)) { 234 return false; 235 } 236 237 return true; 238 } 239 240 /** 241 * Auto generated by IntelliJ IDEA. 242 * 243 * @return hashcode 244 */ 245 public int hashCode() { 246 int result; 247 result = name.hashCode(); 248 result = 29 * result + seeds.hashCode(); 249 result = 29 * result + comments.hashCode(); 250 return result; 251 } 252 253}