001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.ResultSet; 029import java.sql.SQLException; 030import java.sql.Statement; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.List; 034 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import dk.netarkivet.common.exceptions.ArgumentNotValid; 039import dk.netarkivet.common.exceptions.IOFailure; 040import dk.netarkivet.common.exceptions.UnknownID; 041import dk.netarkivet.common.utils.DBUtils; 042import dk.netarkivet.common.utils.ExceptionUtils; 043 044/** 045 * A singleton giving access to global crawler traps. 046 */ 047public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO { 048 049 /** The logger for this class. */ 050 private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class); 051 052 /** 053 * protected constructor of this class. Checks if any migration are needed before operation starts. 054 */ 055 protected GlobalCrawlerTrapListDBDAO() { 056 Connection connection = HarvestDBConnection.get(); 057 try { 058 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS); 059 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS); 060 } finally { 061 HarvestDBConnection.release(connection); 062 } 063 } 064 065 /** 066 * Statement to select all trap lists which are either active or inactive. 067 */ 068 private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?"; 069 070 /** 071 * Returns a list of either all active or all inactive trap lists. 072 * 073 * @param isActive whether to return active or inactive lists. 074 * @return a list if global crawler trap lists. 075 */ 076 private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) { 077 List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>(); 078 Connection conn = HarvestDBConnection.get(); 079 PreparedStatement stmt = null; 080 try { 081 stmt = conn.prepareStatement(SELECT_BY_ACTIVITY); 082 stmt.setBoolean(1, isActive); 083 ResultSet rs = stmt.executeQuery(); 084 while (rs.next()) { 085 result.add(read(rs.getInt(1))); 086 } 087 return result; 088 } catch (SQLException e) { 089 String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e); 090 log.warn(message, e); 091 throw new UnknownID(message, e); 092 } finally { 093 DBUtils.closeStatementIfOpen(stmt); 094 HarvestDBConnection.release(conn); 095 } 096 } 097 098 @Override 099 public List<GlobalCrawlerTrapList> getAllActive() { 100 return getAllByActivity(true); 101 } 102 103 @Override 104 public List<GlobalCrawlerTrapList> getAllInActive() { 105 return getAllByActivity(false); 106 } 107 108 @Override 109 public List<String> getAllActiveTrapExpressions() { 110 Connection conn = HarvestDBConnection.get(); 111 List<String> result = new ArrayList<String>(); 112 PreparedStatement stmt = null; 113 try { 114 stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, " 115 + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = " 116 + "crawler_trap_list_id " + "AND isActive = ?"); 117 stmt.setBoolean(1, true); 118 ResultSet rs = stmt.executeQuery(); 119 while (rs.next()) { 120 result.add(rs.getString(1)); 121 } 122 return result; 123 } catch (SQLException e) { 124 String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e); 125 log.warn(message, e); 126 throw new IOFailure(message, e); 127 } finally { 128 DBUtils.closeStatementIfOpen(stmt); 129 HarvestDBConnection.release(conn); 130 } 131 } 132 133 /** 134 * Statement to insert a new trap list. 135 */ 136 private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)" 137 + "VALUES (?,?,?)"; 138 139 /** 140 * Statement to insert a new trap expression in a given list. 141 */ 142 private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) "; 143 144 @Override 145 public int create(GlobalCrawlerTrapList trapList) { 146 ArgumentNotValid.checkNotNull(trapList, "trapList"); 147 // Check for existence of a trapList in the database with the same name 148 // and throw argumentNotValid if not 149 if (exists(trapList.getName())) { 150 throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database"); 151 } 152 int trapId; 153 Connection conn = HarvestDBConnection.get(); 154 PreparedStatement stmt = null; 155 try { 156 conn.setAutoCommit(false); 157 stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS); 158 stmt.setString(1, trapList.getName()); 159 stmt.setString(2, trapList.getDescription()); 160 stmt.setBoolean(3, trapList.isActive()); 161 stmt.executeUpdate(); 162 ResultSet rs = stmt.getGeneratedKeys(); 163 rs.next(); 164 trapId = rs.getInt(1); 165 trapList.setId(trapId); 166 log.debug("Inserting {} traps into database for list {} (trapId={})", trapList.getTraps().size(), trapList.getName(), trapId); 167 for (String expr : trapList.getTraps()) { 168 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 169 stmt.setInt(1, trapId); 170 stmt.setString(2, expr); 171 stmt.executeUpdate(); 172 } 173 conn.commit(); 174 } catch (SQLException e) { 175 String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e); 176 log.warn(message, e); 177 throw new IOFailure(message, e); 178 } finally { 179 DBUtils.closeStatementIfOpen(stmt); 180 DBUtils.rollbackIfNeeded(conn, "create trap list", trapList); 181 HarvestDBConnection.release(conn); 182 } 183 return trapId; 184 } 185 186 /** 187 * Statement to delete a trap list. 188 */ 189 private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 190 191 /** 192 * Statement to delete all expressions in a given trap list. 193 */ 194 private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 195 196 @Override 197 public void delete(int id) { 198 Connection conn = HarvestDBConnection.get(); 199 PreparedStatement stmt = null; 200 try { 201 conn.setAutoCommit(false); 202 // First delete the list. 203 stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT); 204 stmt.setInt(1, id); 205 stmt.executeUpdate(); 206 stmt.close(); 207 // Then delete all its expressions. 208 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 209 stmt.setInt(1, id); 210 stmt.executeUpdate(); 211 conn.commit(); 212 } catch (SQLException e) { 213 String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e); 214 log.warn(message, e); 215 throw new UnknownID(message, e); 216 } finally { 217 DBUtils.closeStatementIfOpen(stmt); 218 DBUtils.rollbackIfNeeded(conn, "delete trap list", id); 219 HarvestDBConnection.release(conn); 220 } 221 } 222 223 /** 224 * Statement to update the elementary properties of a trap list. 225 */ 226 private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?"; 227 228 /** 229 * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap 230 * expressions for the list then add all those in the updated version. 231 * 232 * @param trapList the trap list to update 233 */ 234 @Override 235 public void update(GlobalCrawlerTrapList trapList) { 236 ArgumentNotValid.checkNotNull(trapList, "trapList"); 237 Connection conn = HarvestDBConnection.get(); 238 PreparedStatement stmt = null; 239 try { 240 conn.setAutoCommit(false); 241 stmt = conn.prepareStatement(LIST_UPDATE_STMT); 242 stmt.setString(1, trapList.getName()); 243 stmt.setString(2, trapList.getDescription()); 244 stmt.setBoolean(3, trapList.isActive()); 245 stmt.setInt(4, trapList.getId()); 246 stmt.executeUpdate(); 247 stmt.close(); 248 249 // Delete all the trap expressions. 250 stmt = conn.prepareStatement(DELETE_EXPR_STMT); 251 stmt.setInt(1, trapList.getId()); 252 stmt.executeUpdate(); 253 stmt.close(); 254 // Add the new trap expressions one by one. 255 for (String expr : trapList.getTraps()) { 256 stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT); 257 stmt.setInt(1, trapList.getId()); 258 stmt.setString(2, expr); 259 stmt.executeUpdate(); 260 stmt.close(); 261 } 262 conn.commit(); 263 } catch (SQLException e) { 264 String message = "Error updating trap list :'" + trapList.getId() + "'\n" 265 + ExceptionUtils.getSQLExceptionCause(e); 266 log.warn(message, e); 267 throw new UnknownID(message, e); 268 } finally { 269 DBUtils.closeStatementIfOpen(stmt); 270 DBUtils.rollbackIfNeeded(conn, "update trap list", trapList); 271 HarvestDBConnection.release(conn); 272 } 273 } 274 275 /** 276 * Statement to read the elementary properties of a trap list. 277 */ 278 private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?"; 279 280 /** 281 * Statement to read the trap expressions for a trap list. 282 */ 283 private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?"; 284 285 @Override 286 public GlobalCrawlerTrapList read(int id) { 287 Connection conn = HarvestDBConnection.get(); 288 PreparedStatement stmt = null; 289 try { 290 stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT); 291 stmt.setInt(1, id); 292 ResultSet rs = stmt.executeQuery(); 293 if (!rs.next()) { 294 throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'"); 295 } 296 String name = rs.getString("name"); 297 String description = rs.getString("description"); 298 boolean isActive = rs.getBoolean("isActive"); 299 stmt.close(); 300 stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT); 301 stmt.setInt(1, id); 302 rs = stmt.executeQuery(); 303 List<String> exprs = new ArrayList<String>(); 304 while (rs.next()) { 305 exprs.add(rs.getString("trap_expression")); 306 } 307 return new GlobalCrawlerTrapList(id, exprs, name, description, isActive); 308 } catch (SQLException e) { 309 String message = "Error retrieving trap list for id '" + id + "'\n" 310 + ExceptionUtils.getSQLExceptionCause(e); 311 log.warn(message, e); 312 throw new IOFailure(message, e); 313 } finally { 314 DBUtils.closeStatementIfOpen(stmt); 315 HarvestDBConnection.release(conn); 316 } 317 } 318 319 /** 320 * Statement to read the elementary properties of a trap list. 321 */ 322 private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?"; 323 324 /** 325 * Does crawlertrap with this name already exist. 326 * 327 * @param name The name for a crawlertrap 328 * @return true, if a crawlertrap with the given name already exists in the database; otherwise false 329 */ 330 @Override 331 public boolean exists(String name) { 332 ArgumentNotValid.checkNotNullOrEmpty(name, "name"); 333 Connection conn = HarvestDBConnection.get(); 334 PreparedStatement stmt = null; 335 boolean exists = false; 336 try { 337 stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT); 338 stmt.setString(1, name); 339 ResultSet rs = stmt.executeQuery(); 340 if (rs.next()) { 341 exists = true; 342 } 343 344 return exists; 345 } catch (SQLException e) { 346 String message = "Error checking for existence of trap list " + "with name '" + name + "'\n" 347 + ExceptionUtils.getSQLExceptionCause(e); 348 log.warn(message, e); 349 throw new IOFailure(message, e); 350 } finally { 351 DBUtils.closeStatementIfOpen(stmt); 352 HarvestDBConnection.release(conn); 353 } 354 } 355 356 /** 357 * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template 358 * for this job. 359 * @param orderXmlDoc The template to add crawlertraps to. 360 */ 361 public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) { 362 363 GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance(); 364 dao.getAllActive().stream().forEach(globalCrawlerTrapList -> { 365 orderXmlDoc.insertCrawlerTraps(globalCrawlerTrapList.getName(), 366 new ArrayList<>(globalCrawlerTrapList.getTraps())); 367 }); 368 369 } 370}