001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.ResultSet; 029import java.sql.SQLException; 030import java.sql.Statement; 031import java.util.ArrayList; 032import java.util.List; 033 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import dk.netarkivet.common.exceptions.ArgumentNotValid; 038import dk.netarkivet.common.exceptions.IOFailure; 039import dk.netarkivet.common.exceptions.UnknownID; 040import dk.netarkivet.common.utils.DBUtils; 041import dk.netarkivet.common.utils.ExceptionUtils; 042 043/** 044 * A singleton giving access to global crawler traps. 045 */ 046public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO { 047 048 /** The logger for this class. */ 049 private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class); 050 051 /** 052 * protected constructor of this class. Checks if any migration are needed before operation starts. 053 */ 054 protected GlobalCrawlerTrapListDBDAO() { 055 Connection connection = HarvestDBConnection.get(); 056 try { 057 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS); 058 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS); 059 } finally { 060 HarvestDBConnection.release(connection); 061 } 062 } 063 064 /** 065 * Statement to select all trap lists which are either active or inactive. 066 */ 067 private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?"; 068 069 /** 070 * Returns a list of either all active or all inactive trap lists. 071 * 072 * @param isActive whether to return active or inactive lists. 073 * @return a list if global crawler trap lists. 074 */ 075 private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) { 076 List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>(); 077 Connection conn = HarvestDBConnection.get(); 078 PreparedStatement stmt = null; 079 try { 080 stmt = conn.prepareStatement(SELECT_BY_ACTIVITY); 081 stmt.setBoolean(1, isActive); 082 ResultSet rs = stmt.executeQuery(); 083 while (rs.next()) { 084 result.add(read(rs.getInt(1))); 085 } 086 return result; 087 } catch (SQLException e) { 088 String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e); 089 log.warn(message, e); 090 throw new UnknownID(message, e); 091 } finally { 092 DBUtils.closeStatementIfOpen(stmt); 093 HarvestDBConnection.release(conn); 094 } 095 } 096 097 @Override 098 public List<GlobalCrawlerTrapList> getAllActive() { 099 return getAllByActivity(true); 100 } 101 102 @Override 103 public List<GlobalCrawlerTrapList> getAllInActive() { 104 return getAllByActivity(false); 105 } 106 107 @Override 108 public List<String> getAllActiveTrapExpressions() { 109 Connection conn = HarvestDBConnection.get(); 110 List<String> result = new ArrayList<String>(); 111 PreparedStatement stmt = null; 112 try { 113 stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, " 114 + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = " 115 + "crawler_trap_list_id " + "AND isActive = ?"); 116 stmt.setBoolean(1, true); 117 ResultSet rs = stmt.executeQuery(); 118 while (rs.next()) { 119 result.add(rs.getString(1)); 120 } 121 return result; 122 } catch (SQLException e) { 123 String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e); 124 log.warn(message, e); 125 throw new IOFailure(message, e); 126 } finally { 127 DBUtils.closeStatementIfOpen(stmt); 128 HarvestDBConnection.release(conn); 129 } 130 } 131 132 /** 133 * Statement to insert a new trap list. 134 */ 135 private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)" 136 + "VALUES (?,?,?)"; 137 138 /** 139 * Statement to insert a new trap expression in a given list. 140 */ 141 private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) "; 142 143 @Override 144 public int create(GlobalCrawlerTrapList trapList) { 145 ArgumentNotValid.checkNotNull(trapList, "trapList"); 146 // Check for existence of a trapList in the database with the same name 147 // and throw argumentNotValid if not 148 if (exists(trapList.getName())) { 149 throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database"); 150 } 151 int trapId; 152 Connection conn = HarvestDBConnection.get(); 153 PreparedStatement stmt = null; 154 try { 155 conn.setAutoCommit(false); 156 stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS); 157 stmt.setString(1, trapList.getName()); 158 stmt.setString(2, trapList.getDescription()); 159 stmt.setBoolean(3, trapList.isActive()); 160 stmt.executeUpdate(); 161 ResultSet rs = stmt.getGeneratedKeys(); 162 rs.next(); 163 trapId = rs.getInt(1); 164 trapList.setId(trapId); 165 log.debug("Inserting {} traps into database for list {} (trapId={})", trapList.getTraps().size(), trapList.getName(), trapId); 166 for (String expr : trapList.getTraps()) { 167 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 168 stmt.setInt(1, trapId); 169 stmt.setString(2, expr); 170 stmt.executeUpdate(); 171 } 172 conn.commit(); 173 } catch (SQLException e) { 174 String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e); 175 log.warn(message, e); 176 throw new IOFailure(message, e); 177 } finally { 178 DBUtils.closeStatementIfOpen(stmt); 179 DBUtils.rollbackIfNeeded(conn, "create trap list", trapList); 180 HarvestDBConnection.release(conn); 181 } 182 return trapId; 183 } 184 185 /** 186 * Statement to delete a trap list. 187 */ 188 private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 189 190 /** 191 * Statement to delete all expressions in a given trap list. 192 */ 193 private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 194 195 @Override 196 public void delete(int id) { 197 Connection conn = HarvestDBConnection.get(); 198 PreparedStatement stmt = null; 199 try { 200 conn.setAutoCommit(false); 201 // First delete the list. 202 stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT); 203 stmt.setInt(1, id); 204 stmt.executeUpdate(); 205 stmt.close(); 206 // Then delete all its expressions. 207 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 208 stmt.setInt(1, id); 209 stmt.executeUpdate(); 210 conn.commit(); 211 } catch (SQLException e) { 212 String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e); 213 log.warn(message, e); 214 throw new UnknownID(message, e); 215 } finally { 216 DBUtils.closeStatementIfOpen(stmt); 217 DBUtils.rollbackIfNeeded(conn, "delete trap list", id); 218 HarvestDBConnection.release(conn); 219 } 220 } 221 222 /** 223 * Statement to update the elementary properties of a trap list. 224 */ 225 private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?"; 226 227 /** 228 * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap 229 * expressions for the list then add all those in the updated version. 230 * 231 * @param trapList the trap list to update 232 */ 233 @Override 234 public void update(GlobalCrawlerTrapList trapList) { 235 ArgumentNotValid.checkNotNull(trapList, "trapList"); 236 Connection conn = HarvestDBConnection.get(); 237 PreparedStatement stmt = null; 238 try { 239 conn.setAutoCommit(false); 240 stmt = conn.prepareStatement(LIST_UPDATE_STMT); 241 stmt.setString(1, trapList.getName()); 242 stmt.setString(2, trapList.getDescription()); 243 stmt.setBoolean(3, trapList.isActive()); 244 stmt.setInt(4, trapList.getId()); 245 stmt.executeUpdate(); 246 stmt.close(); 247 248 // Delete all the trap expressions. 249 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 250 stmt.setInt(1, trapList.getId()); 251 stmt.executeUpdate(); 252 stmt.close(); 253 // Add the new trap expressions one by one. 254 for (String expr : trapList.getTraps()) { 255 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 256 stmt.setInt(1, trapList.getId()); 257 stmt.setString(2, expr); 258 stmt.executeUpdate(); 259 stmt.close(); 260 } 261 conn.commit(); 262 } catch (SQLException e) { 263 String message = "Error updating trap list :'" + trapList.getId() + "'\n" 264 + ExceptionUtils.getSQLExceptionCause(e); 265 log.warn(message, e); 266 throw new UnknownID(message, e); 267 } finally { 268 DBUtils.closeStatementIfOpen(stmt); 269 DBUtils.rollbackIfNeeded(conn, "update trap list", trapList); 270 HarvestDBConnection.release(conn); 271 } 272 } 273 274 /** 275 * Statement to read the elementary properties of a trap list. 276 */ 277 private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 278 279 /** 280 * Statement to read the trap expressions for a trap list. 281 */ 282 private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 283 284 @Override 285 public GlobalCrawlerTrapList read(int id) { 286 Connection conn = HarvestDBConnection.get(); 287 PreparedStatement stmt = null; 288 try { 289 stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT); 290 stmt.setInt(1, id); 291 ResultSet rs = stmt.executeQuery(); 292 if (!rs.next()) { 293 throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'"); 294 } 295 String name = rs.getString("name"); 296 String description = rs.getString("description"); 297 boolean isActive = rs.getBoolean("isActive"); 298 stmt.close(); 299 stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT); 300 stmt.setInt(1, id); 301 rs = stmt.executeQuery(); 302 List<String> exprs = new ArrayList<String>(); 303 while (rs.next()) { 304 exprs.add(rs.getString("trap_expression")); 305 } 306 return new GlobalCrawlerTrapList(id, exprs, name, description, isActive); 307 } catch (SQLException e) { 308 String message = "Error retrieving trap list for id '" + id + "'\n" 309 + ExceptionUtils.getSQLExceptionCause(e); 310 log.warn(message, e); 311 throw new IOFailure(message, e); 312 } finally { 313 DBUtils.closeStatementIfOpen(stmt); 314 HarvestDBConnection.release(conn); 315 } 316 } 317 318 /** 319 * Statement to read the elementary properties of a trap list. 320 */ 321 private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?"; 322 323 /** 324 * Does crawlertrap with this name already exist. 325 * 326 * @param name The name for a crawlertrap 327 * @return true, if a crawlertrap with the given name already exists in the database; otherwise false 328 */ 329 @Override 330 public boolean exists(String name) { 331 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 332 Connection conn = HarvestDBConnection.get(); 333 PreparedStatement stmt = null; 334 boolean exists = false; 335 try { 336 stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT); 337 stmt.setString(1, name); 338 ResultSet rs = stmt.executeQuery(); 339 if (rs.next()) { 340 exists = true; 341 } 342 343 return exists; 344 } catch (SQLException e) { 345 String message = "Error checking for existence of trap list " + "with name '" + name + "'\n" 346 + ExceptionUtils.getSQLExceptionCause(e); 347 log.warn(message, e); 348 throw new IOFailure(message, e); 349 } finally { 350 DBUtils.closeStatementIfOpen(stmt); 351 HarvestDBConnection.release(conn); 352 } 353 } 354 355 /** 356 * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template 357 * for this job. 358 * @param orderXmlDoc The template to add crawlertraps to. 359 */ 360 public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) { 361 362 GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance(); 363 orderXmlDoc.insertCrawlerTraps(Constants.GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME, 364 dao.getAllActiveTrapExpressions()); 365 } 366}