001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.ResultSet; 029import java.sql.SQLException; 030import java.sql.Statement; 031import java.util.ArrayList; 032import java.util.List; 033 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import dk.netarkivet.common.exceptions.ArgumentNotValid; 038import dk.netarkivet.common.exceptions.IOFailure; 039import dk.netarkivet.common.exceptions.UnknownID; 040import dk.netarkivet.common.utils.DBUtils; 041import dk.netarkivet.common.utils.ExceptionUtils; 042 043/** 044 * A singleton giving access to global crawler traps. 045 */ 046public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO { 047 048 /** The logger for this class. */ 049 private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class); 050 051 /** 052 * protected constructor of this class. Checks if any migration are needed before operation starts. 053 */ 054 protected GlobalCrawlerTrapListDBDAO() { 055 Connection connection = HarvestDBConnection.get(); 056 try { 057 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS); 058 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS); 059 } finally { 060 HarvestDBConnection.release(connection); 061 } 062 } 063 064 /** 065 * Statement to select all trap lists which are either active or inactive. 066 */ 067 private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?"; 068 069 /** 070 * Returns a list of either all active or all inactive trap lists. 071 * 072 * @param isActive whether to return active or inactive lists. 073 * @return a list if global crawler trap lists. 074 */ 075 private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) { 076 List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>(); 077 Connection conn = HarvestDBConnection.get(); 078 PreparedStatement stmt = null; 079 try { 080 stmt = conn.prepareStatement(SELECT_BY_ACTIVITY); 081 stmt.setBoolean(1, isActive); 082 ResultSet rs = stmt.executeQuery(); 083 while (rs.next()) { 084 result.add(read(rs.getInt(1))); 085 } 086 return result; 087 } catch (SQLException e) { 088 String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e); 089 log.warn(message, e); 090 throw new UnknownID(message, e); 091 } finally { 092 DBUtils.closeStatementIfOpen(stmt); 093 HarvestDBConnection.release(conn); 094 } 095 } 096 097 @Override 098 public List<GlobalCrawlerTrapList> getAllActive() { 099 return getAllByActivity(true); 100 } 101 102 @Override 103 public List<GlobalCrawlerTrapList> getAllInActive() { 104 return getAllByActivity(false); 105 } 106 107 @Override 108 public List<String> getAllActiveTrapExpressions() { 109 Connection conn = HarvestDBConnection.get(); 110 List<String> result = new ArrayList<String>(); 111 PreparedStatement stmt = null; 112 try { 113 stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, " 114 + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = " 115 + "crawler_trap_list_id " + "AND isActive = ?"); 116 stmt.setBoolean(1, true); 117 ResultSet rs = stmt.executeQuery(); 118 while (rs.next()) { 119 result.add(rs.getString(1)); 120 } 121 return result; 122 } catch (SQLException e) { 123 String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e); 124 log.warn(message, e); 125 throw new IOFailure(message, e); 126 } finally { 127 DBUtils.closeStatementIfOpen(stmt); 128 HarvestDBConnection.release(conn); 129 } 130 } 131 132 /** 133 * Statement to insert a new trap list. 134 */ 135 private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)" 136 + "VALUES (?,?,?)"; 137 138 /** 139 * Statement to insert a new trap expression in a given list. 140 */ 141 private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) "; 142 143 @Override 144 public int create(GlobalCrawlerTrapList trapList) { 145 ArgumentNotValid.checkNotNull(trapList, "trapList"); 146 // Check for existence of a trapList in the database with the same name 147 // and throw argumentNotValid if not 148 if (exists(trapList.getName())) { 149 throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database"); 150 } 151 int trapId; 152 Connection conn = HarvestDBConnection.get(); 153 PreparedStatement stmt = null; 154 try { 155 conn.setAutoCommit(false); 156 stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS); 157 stmt.setString(1, trapList.getName()); 158 stmt.setString(2, trapList.getDescription()); 159 stmt.setBoolean(3, trapList.isActive()); 160 stmt.executeUpdate(); 161 ResultSet rs = stmt.getGeneratedKeys(); 162 rs.next(); 163 trapId = rs.getInt(1); 164 trapList.setId(trapId); 165 for (String expr : trapList.getTraps()) { 166 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 167 stmt.setInt(1, trapId); 168 stmt.setString(2, expr); 169 stmt.executeUpdate(); 170 } 171 conn.commit(); 172 } catch (SQLException e) { 173 String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e); 174 log.warn(message, e); 175 throw new IOFailure(message, e); 176 } finally { 177 DBUtils.closeStatementIfOpen(stmt); 178 DBUtils.rollbackIfNeeded(conn, "create trap list", trapList); 179 HarvestDBConnection.release(conn); 180 } 181 return trapId; 182 } 183 184 /** 185 * Statement to delete a trap list. 186 */ 187 private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 188 189 /** 190 * Statement to delete all expressions in a given trap list. 191 */ 192 private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 193 194 @Override 195 public void delete(int id) { 196 Connection conn = HarvestDBConnection.get(); 197 PreparedStatement stmt = null; 198 try { 199 conn.setAutoCommit(false); 200 // First delete the list. 201 stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT); 202 stmt.setInt(1, id); 203 stmt.executeUpdate(); 204 stmt.close(); 205 // Then delete all its expressions. 206 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 207 stmt.setInt(1, id); 208 stmt.executeUpdate(); 209 conn.commit(); 210 } catch (SQLException e) { 211 String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e); 212 log.warn(message, e); 213 throw new UnknownID(message, e); 214 } finally { 215 DBUtils.closeStatementIfOpen(stmt); 216 DBUtils.rollbackIfNeeded(conn, "delete trap list", id); 217 HarvestDBConnection.release(conn); 218 } 219 } 220 221 /** 222 * Statement to update the elementary properties of a trap list. 223 */ 224 private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?"; 225 226 /** 227 * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap 228 * expressions for the list then add all those in the updated version. 229 * 230 * @param trapList the trap list to update 231 */ 232 @Override 233 public void update(GlobalCrawlerTrapList trapList) { 234 ArgumentNotValid.checkNotNull(trapList, "trapList"); 235 Connection conn = HarvestDBConnection.get(); 236 PreparedStatement stmt = null; 237 try { 238 conn.setAutoCommit(false); 239 stmt = conn.prepareStatement(LIST_UPDATE_STMT); 240 stmt.setString(1, trapList.getName()); 241 stmt.setString(2, trapList.getDescription()); 242 stmt.setBoolean(3, trapList.isActive()); 243 stmt.setInt(4, trapList.getId()); 244 stmt.executeUpdate(); 245 stmt.close(); 246 247 // Delete all the trap expressions. 248 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 249 stmt.setInt(1, trapList.getId()); 250 stmt.executeUpdate(); 251 stmt.close(); 252 // Add the new trap expressions one by one. 253 for (String expr : trapList.getTraps()) { 254 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 255 stmt.setInt(1, trapList.getId()); 256 stmt.setString(2, expr); 257 stmt.executeUpdate(); 258 stmt.close(); 259 } 260 conn.commit(); 261 } catch (SQLException e) { 262 String message = "Error updating trap list :'" + trapList.getId() + "'\n" 263 + ExceptionUtils.getSQLExceptionCause(e); 264 log.warn(message, e); 265 throw new UnknownID(message, e); 266 } finally { 267 DBUtils.closeStatementIfOpen(stmt); 268 DBUtils.rollbackIfNeeded(conn, "update trap list", trapList); 269 HarvestDBConnection.release(conn); 270 } 271 } 272 273 /** 274 * Statement to read the elementary properties of a trap list. 275 */ 276 private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 277 278 /** 279 * Statement to read the trap expressions for a trap list. 280 */ 281 private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 282 283 @Override 284 public GlobalCrawlerTrapList read(int id) { 285 Connection conn = HarvestDBConnection.get(); 286 PreparedStatement stmt = null; 287 try { 288 stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT); 289 stmt.setInt(1, id); 290 ResultSet rs = stmt.executeQuery(); 291 if (!rs.next()) { 292 throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'"); 293 } 294 String name = rs.getString("name"); 295 String description = rs.getString("description"); 296 boolean isActive = rs.getBoolean("isActive"); 297 stmt.close(); 298 stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT); 299 stmt.setInt(1, id); 300 rs = stmt.executeQuery(); 301 List<String> exprs = new ArrayList<String>(); 302 while (rs.next()) { 303 exprs.add(rs.getString("trap_expression")); 304 } 305 return new GlobalCrawlerTrapList(id, exprs, name, description, isActive); 306 } catch (SQLException e) { 307 String message = "Error retrieving trap list for id '" + id + "'\n" 308 + ExceptionUtils.getSQLExceptionCause(e); 309 log.warn(message, e); 310 throw new IOFailure(message, e); 311 } finally { 312 DBUtils.closeStatementIfOpen(stmt); 313 HarvestDBConnection.release(conn); 314 } 315 } 316 317 /** 318 * Statement to read the elementary properties of a trap list. 319 */ 320 private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?"; 321 322 /** 323 * Does crawlertrap with this name already exist. 324 * 325 * @param name The name for a crawlertrap 326 * @return true, if a crawlertrap with the given name already exists in the database; otherwise false 327 */ 328 @Override 329 public boolean exists(String name) { 330 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 331 Connection conn = HarvestDBConnection.get(); 332 PreparedStatement stmt = null; 333 boolean exists = false; 334 try { 335 stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT); 336 stmt.setString(1, name); 337 ResultSet rs = stmt.executeQuery(); 338 if (rs.next()) { 339 exists = true; 340 } 341 342 return exists; 343 } catch (SQLException e) { 344 String message = "Error checking for existence of trap list " + "with name '" + name + "'\n" 345 + ExceptionUtils.getSQLExceptionCause(e); 346 log.warn(message, e); 347 throw new IOFailure(message, e); 348 } finally { 349 DBUtils.closeStatementIfOpen(stmt); 350 HarvestDBConnection.release(conn); 351 } 352 } 353 354 /** 355 * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template 356 * for this job. 357 * @param orderXmlDoc The template to add crawlertraps to. 358 */ 359 public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) { 360 //System.out.println("Calling addGlobalCrawlerTraps w/arg: " + orderXmlDoc); 361 362 GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance(); 363 orderXmlDoc.insertCrawlerTraps(Constants.GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME, 364 dao.getAllActiveTrapExpressions()); 365 } 366}