001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Clob; 027import java.sql.Connection; 028import java.sql.PreparedStatement; 029import java.sql.ResultSet; 030import java.sql.SQLException; 031import java.sql.Statement; 032import java.sql.Timestamp; 033import java.sql.Types; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Date; 038import java.util.HashMap; 039import java.util.Iterator; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.stream.Collectors; 044 045import org.slf4j.Logger; 046import org.slf4j.LoggerFactory; 047 048import dk.netarkivet.common.exceptions.ArgumentNotValid; 049import dk.netarkivet.common.exceptions.IOFailure; 050import dk.netarkivet.common.exceptions.PermissionDenied; 051import dk.netarkivet.common.exceptions.UnknownID; 052import dk.netarkivet.common.utils.DBUtils; 053import dk.netarkivet.common.utils.DomainUtils; 054import dk.netarkivet.common.utils.FilterIterator; 055import dk.netarkivet.common.utils.StringUtils; 056import dk.netarkivet.harvester.datamodel.eav.EAV; 057import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType; 058import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue; 059import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDAO; 060import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDBDAO; 061 062/** 063 * A database-based implementation of the DomainDAO. 064 * <p> 065 * The statements to create the tables are located in: 066 * <ul> 067 * <li><em>Derby:</em> scripts/sql/createfullhddb.sql</li> 068 * <li><em>MySQL:</em> scripts/sql/createfullhddb.mysql</li> 069 * <li><em>PostgreSQL:</em> scripts/postgresql/netarchivesuite_init.sql</li> 070 * </ul> 071 */ 072public class DomainDBDAO extends DomainDAO { 073 074 /** The log. */ 075 private static final Logger log = LoggerFactory.getLogger(DomainDBDAO.class); 076 077 /** 078 * Creates a database-based implementation of the DomainDAO. Will check that all schemas have correct versions, and 079 * update the ones that haven't. 080 * 081 * @throws IOFailure on trouble updating tables to new versions, or on tables with wrong versions that we don't know 082 * how to change to expected version. 083 */ 084 protected DomainDBDAO() { 085 Connection connection = HarvestDBConnection.get(); 086 try { 087 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.CONFIGURATIONS); 088 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.DOMAINS); 089 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.CONFIGPASSWORDS); 090 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.CONFIGSEEDLISTS); 091 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.SEEDLISTS); 092 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.PASSWORDS); 093 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.OWNERINFO); 094 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HISTORYINFO); 095 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDTYPE); 096 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELD); 097 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDVALUE); 098 } finally { 099 HarvestDBConnection.release(connection); 100 } 101 } 102 103 @Override 104 protected void create(Connection connection, Domain d) { 105 ArgumentNotValid.checkNotNull(d, "d"); 106 ArgumentNotValid.checkNotNullOrEmpty(d.getName(), "d.getName()"); 107 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(d.getName()),"Not creating domain wth invalid name " + d.getName()); 108 109 if (exists(connection, d.getName())) { 110 String msg = "Cannot create already existing domain " + d; 111 log.debug(msg); 112 throw new PermissionDenied(msg); 113 } 114 115 PreparedStatement s = null; 116 log.debug("trying to create domain with name: " + d.getName()); 117 try { 118 connection.setAutoCommit(false); 119 s = connection.prepareStatement("INSERT INTO domains " 120 + "(name, comments, defaultconfig, crawlertraps, edition, alias, lastaliasupdate ) " 121 + "VALUES ( ?, ?, -1, ?, ?, ?, ? )", Statement.RETURN_GENERATED_KEYS); 122 // Id is autogenerated 123 // defaultconfig cannot exist yet, so we put in -1 124 // until we have configs 125 DBUtils.setName(s, 1, d, Constants.MAX_NAME_SIZE); 126 DBUtils.setComments(s, 2, d, Constants.MAX_COMMENT_SIZE); 127 s.setString(3, StringUtils.conjoin("\n", d.getCrawlerTraps())); 128 long initialEdition = 1; 129 s.setLong(4, initialEdition); 130 AliasInfo aliasInfo = d.getAliasInfo(); 131 DBUtils.setLongMaybeNull( 132 s, 133 5, 134 aliasInfo == null ? null : DBUtils.selectLongValue(connection, 135 "SELECT domain_id FROM domains WHERE name = ?", aliasInfo.getAliasOf())); 136 DBUtils.setDateMaybeNull(s, 6, aliasInfo == null ? null : aliasInfo.getLastChange()); 137 s.executeUpdate(); 138 139 d.setID(DBUtils.getGeneratedID(s)); 140 s.close(); 141 142 Iterator<Password> passwords = d.getAllPasswords(); 143 while (passwords.hasNext()) { 144 Password p = passwords.next(); 145 insertPassword(connection, d, p); 146 } 147 148 Iterator<SeedList> seedlists = d.getAllSeedLists(); 149 if (!seedlists.hasNext()) { 150 String msg = "No seedlists for domain " + d; 151 log.debug(msg); 152 throw new ArgumentNotValid(msg); 153 } 154 while (seedlists.hasNext()) { 155 SeedList sl = seedlists.next(); 156 insertSeedlist(connection, d, sl); 157 } 158 159 Iterator<DomainConfiguration> dcs = d.getAllConfigurations(); 160 if (!dcs.hasNext()) { 161 String msg = "No configurations for domain " + d; 162 log.debug(msg); 163 throw new ArgumentNotValid(msg); 164 } 165 while (dcs.hasNext()) { 166 DomainConfiguration dc = dcs.next(); 167 insertConfiguration(connection, d, dc); 168 169 // Create xref tables for seedlists referenced by this config 170 createConfigSeedlistsEntries(connection, d, dc); 171 172 // Create xref tables for passwords referenced by this config 173 createConfigPasswordsEntries(connection, d, dc); 174 } 175 176 // Now that configs are defined, set the default config. 177 s = connection.prepareStatement("UPDATE domains SET defaultconfig = (SELECT config_id FROM configurations " 178 + "WHERE configurations.name = ? AND configurations.domain_id = ?) WHERE domain_id = ?"); 179 DBUtils.setName(s, 1, d.getDefaultConfiguration(), Constants.MAX_NAME_SIZE); 180 s.setLong(2, d.getID()); 181 s.setLong(3, d.getID()); 182 s.executeUpdate(); 183 s.close(); 184 for (Iterator<HarvestInfo> hi = d.getHistory().getHarvestInfo(); hi.hasNext();) { 185 insertHarvestInfo(connection, d, hi.next()); 186 } 187 188 for (DomainOwnerInfo doi : d.getAllDomainOwnerInfo()) { 189 insertOwnerInfo(connection, d, doi); 190 } 191 192 saveExtendedFieldValues(connection, d); 193 194 connection.commit(); 195 d.setEdition(initialEdition); 196 } catch (SQLException e) { 197 String message = "SQL error creating domain " + d + " in database" + "\n"; 198 log.warn(message, e); 199 throw new IOFailure(message, e); 200 } finally { 201 DBUtils.rollbackIfNeeded(connection, "creating", d); 202 } 203 } 204 205 @Override 206 public synchronized void update(Domain d) { 207 ArgumentNotValid.checkNotNull(d, "domain"); 208 209 if (!exists(d.getName())) { 210 throw new UnknownID("No domain named " + d.getName() + " exists"); 211 } 212 Connection connection = HarvestDBConnection.get(); 213 PreparedStatement s = null; 214 try { 215 connection.setAutoCommit(false); 216 // Domain object may not have ID yet, so get it from the DB 217 long domainID = DBUtils.selectLongValue(connection, "SELECT domain_id FROM domains WHERE name = ?", 218 d.getName()); 219 if (d.hasID() && d.getID() != domainID) { 220 String message = "Domain " + d + " has wrong id: Has " + d.getID() + ", but persistent store claims " 221 + domainID; 222 log.warn(message); 223 throw new ArgumentNotValid(message); 224 } 225 d.setID(domainID); 226 227 // The alias field is now updated using a separate select request 228 // rather than embedding the select inside the update statement. 229 // This change was needed to accommodate MySQL, and may lower 230 // performance. 231 s = connection.prepareStatement("UPDATE domains SET " 232 + "comments = ?, crawlertraps = ?, edition = ?, alias = ?, lastAliasUpdate = ? " 233 + "WHERE domain_id = ? AND edition = ?"); 234 DBUtils.setComments(s, 1, d, Constants.MAX_COMMENT_SIZE); 235 s.setString(2, StringUtils.conjoin("\n", d.getCrawlerTraps())); 236 final long newEdition = d.getEdition() + 1; 237 s.setLong(3, newEdition); 238 AliasInfo aliasInfo = d.getAliasInfo(); 239 DBUtils.setLongMaybeNull( 240 s, 241 4, 242 aliasInfo == null ? null : DBUtils.selectLongValue(connection, 243 "SELECT domain_id FROM domains WHERE name = ?", aliasInfo.getAliasOf())); 244 DBUtils.setDateMaybeNull(s, 5, aliasInfo == null ? null : aliasInfo.getLastChange()); 245 s.setLong(6, d.getID()); 246 s.setLong(7, d.getEdition()); 247 int rows = s.executeUpdate(); 248 if (rows == 0) { 249 String message = "Edition " + d.getEdition() + " has expired, cannot update " + d; 250 log.debug(message); 251 throw new PermissionDenied(message); 252 } 253 s.close(); 254 255 updatePasswords(connection, d); 256 257 updateSeedlists(connection, d); 258 259 updateConfigurations(connection, d); 260 261 updateOwnerInfo(connection, d); 262 263 updateHarvestInfo(connection, d); 264 265 saveExtendedFieldValues(connection, d); 266 267 // Now that configs are updated, we can set default_config 268 s = connection.prepareStatement("UPDATE domains SET defaultconfig = (SELECT config_id " 269 + "FROM configurations WHERE domain_id = ? AND name = ?) WHERE domain_id = ?"); 270 s.setLong(1, d.getID()); 271 s.setString(2, d.getDefaultConfiguration().getName()); 272 s.setLong(3, d.getID()); 273 s.executeUpdate(); 274 connection.commit(); 275 d.setEdition(newEdition); 276 } catch (SQLException e) { 277 String message = "SQL error updating domain " + d + " in database" + "\n"; 278 log.warn(message, e); 279 throw new IOFailure(message, e); 280 } finally { 281 DBUtils.closeStatementIfOpen(s); 282 DBUtils.rollbackIfNeeded(connection, "updating", d); 283 HarvestDBConnection.release(connection); 284 } 285 } 286 287 /** 288 * Update the list of passwords for the given domain, keeping IDs where applicable. 289 * 290 * @param c A connection to the database 291 * @param d A domain to update. 292 * @throws SQLException If any database problems occur during the update process. 293 */ 294 private void updatePasswords(Connection c, Domain d) throws SQLException { 295 Map<String, Long> oldNames = DBUtils.selectStringLongMap(c, 296 "SELECT name, password_id FROM passwords WHERE domain_id = ?", d.getID()); 297 PreparedStatement s = c.prepareStatement("UPDATE passwords SET " + "comments = ?, " + "url = ?, " 298 + "realm = ?, username = ?, " + "password = ? " + "WHERE name = ? AND domain_id = ?"); 299 for (Iterator<Password> pwds = d.getAllPasswords(); pwds.hasNext();) { 300 Password pwd = pwds.next(); 301 if (oldNames.containsKey(pwd.getName())) { 302 DBUtils.setComments(s, 1, pwd, Constants.MAX_COMMENT_SIZE); 303 DBUtils.setStringMaxLength(s, 2, pwd.getPasswordDomain(), Constants.MAX_URL_SIZE, pwd, "password url"); 304 DBUtils.setStringMaxLength(s, 3, pwd.getRealm(), Constants.MAX_REALM_NAME_SIZE, pwd, "password realm"); 305 DBUtils.setStringMaxLength(s, 4, pwd.getUsername(), Constants.MAX_USER_NAME_SIZE, pwd, 306 "password username"); 307 DBUtils.setStringMaxLength(s, 5, pwd.getPassword(), Constants.MAX_PASSWORD_SIZE, pwd, "password"); 308 s.setString(6, pwd.getName()); 309 s.setLong(7, d.getID()); 310 s.executeUpdate(); 311 s.clearParameters(); 312 pwd.setID(oldNames.get(pwd.getName())); 313 oldNames.remove(pwd.getName()); 314 } else { 315 insertPassword(c, d, pwd); 316 } 317 } 318 s.close(); 319 s = c.prepareStatement("DELETE FROM passwords WHERE password_id = ?"); 320 for (Long gone : oldNames.values()) { 321 // Check that we're not deleting something that's in use 322 // Since deletion is very rare, this is allowed to take 323 // some time. 324 String usages = DBUtils.getUsages(c, "SELECT configurations.name FROM configurations, config_passwords" 325 + " WHERE configurations.config_id = config_passwords.config_id " 326 + "AND config_passwords.password_id = ?", gone, gone); 327 if (usages != null) { 328 String name = DBUtils.selectStringValue(c, "SELECT name FROM passwords WHERE password_id = ?", gone); 329 String message = "Cannot delete password " + name + " as it is used in " + usages; 330 log.debug(message); 331 throw new PermissionDenied(message); 332 } 333 s.setLong(1, gone); 334 s.executeUpdate(); 335 s.clearParameters(); 336 } 337 } 338 339 /** 340 * Update the list of seedlists for the given domain, keeping IDs where applicable. 341 * 342 * @param c A connection to the database 343 * @param d A domain to update. 344 * @throws SQLException If any database problems occur during the update process. 345 */ 346 private void updateSeedlists(Connection c, Domain d) throws SQLException { 347 Map<String, Long> oldNames = DBUtils.selectStringLongMap(c, "SELECT name, seedlist_id FROM seedlists " 348 + "WHERE domain_id = ?", d.getID()); 349 PreparedStatement s = c.prepareStatement("UPDATE seedlists SET comments = ?, " + "seeds = ? " 350 + "WHERE name = ? AND domain_id = ?"); 351 for (Iterator<SeedList> sls = d.getAllSeedLists(); sls.hasNext();) { 352 SeedList sl = sls.next(); 353 if (oldNames.containsKey(sl.getName())) { 354 DBUtils.setComments(s, 1, sl, Constants.MAX_COMMENT_SIZE); 355 DBUtils.setClobMaxLength(s, 2, sl.getSeedsAsString(), Constants.MAX_SEED_LIST_SIZE, sl, "seedlist"); 356 s.setString(3, sl.getName()); 357 s.setLong(4, d.getID()); 358 s.executeUpdate(); 359 s.clearParameters(); 360 sl.setID(oldNames.get(sl.getName())); 361 oldNames.remove(sl.getName()); 362 } else { 363 insertSeedlist(c, d, sl); 364 } 365 } 366 s.close(); 367 s = c.prepareStatement("DELETE FROM seedlists WHERE seedlist_id = ?"); 368 for (Long gone : oldNames.values()) { 369 // Check that we're not deleting something that's in use 370 // Since deletion is very rare, this is allowed to take 371 // some time. 372 String usages = DBUtils.getUsages(c, "SELECT configurations.name FROM configurations, config_seedlists" 373 + " WHERE configurations.config_id = config_seedlists.config_id " 374 + "AND config_seedlists.seedlist_id = ?", gone, gone); 375 if (usages != null) { 376 String name = DBUtils.selectStringValue(c, "SELECT name FROM seedlists WHERE seedlist_id = ?", gone); 377 String message = "Cannot delete seedlist " + name + " as it is used in " + usages; 378 log.debug(message); 379 throw new PermissionDenied(message); 380 } 381 s.setLong(1, gone); 382 s.executeUpdate(); 383 s.clearParameters(); 384 } 385 } 386 387 /** 388 * Update the list of configurations for the given domain, keeping IDs where applicable. This also builds the xref 389 * tables for passwords and seedlists used in configurations, and so should be run after those are updated. 390 * 391 * @param connection A connection to the database 392 * @param d A domain to update. 393 * @throws SQLException If any database problems occur during the update process. 394 */ 395 private void updateConfigurations(Connection connection, Domain d) throws SQLException { 396 Map<String, Long> oldNames = DBUtils.selectStringLongMap(connection, 397 "SELECT name, config_id FROM configurations WHERE domain_id = ?", d.getID()); 398 PreparedStatement s = connection.prepareStatement("UPDATE configurations SET comments = ?, " 399 + "template_id = ( SELECT template_id FROM ordertemplates " + "WHERE name = ? ), " + "maxobjects = ?, " 400 + "maxrate = ?, " + "maxbytes = ? " + "WHERE name = ? AND domain_id = ?"); 401 for (Iterator<DomainConfiguration> dcs = d.getAllConfigurations(); dcs.hasNext();) { 402 DomainConfiguration dc = dcs.next(); 403 404 if (oldNames.containsKey(dc.getName())) { 405 // Update 406 DBUtils.setComments(s, 1, dc, Constants.MAX_COMMENT_SIZE); 407 s.setString(2, dc.getOrderXmlName()); 408 s.setLong(3, dc.getMaxObjects()); 409 s.setInt(4, dc.getMaxRequestRate()); 410 s.setLong(5, dc.getMaxBytes()); 411 s.setString(6, dc.getName()); 412 s.setLong(7, d.getID()); 413 s.executeUpdate(); 414 s.clearParameters(); 415 dc.setID(oldNames.get(dc.getName())); 416 oldNames.remove(dc.getName()); 417 } else { 418 insertConfiguration(connection, d, dc); 419 } 420 421 updateConfigPasswordsEntries(connection, d, dc); 422 updateConfigSeedlistsEntries(connection, d, dc); 423 } 424 s.close(); 425 s = connection.prepareStatement("DELETE FROM configurations WHERE config_id = ?"); 426 for (Long gone : oldNames.values()) { 427 // Before deleting, check if this is unused. Since deletion is 428 // rare, this is allowed to take some time to give good output 429 String usages = DBUtils.getUsages(connection, "SELECT harvestdefinitions.name" 430 + " FROM harvestdefinitions, harvest_configs WHERE harvestdefinitions.harvest_id = " 431 + "harvest_configs.harvest_id AND harvest_configs.config_id = ?", gone, gone); 432 if (usages != null) { 433 String name = DBUtils.selectStringValue(connection, "SELECT name FROM configurations " 434 + "WHERE config_id = ?", gone); 435 String message = "Cannot delete configuration " + name + " as it is used in " + usages; 436 log.debug(message); 437 throw new PermissionDenied(message); 438 } 439 s.setLong(1, gone); 440 s.executeUpdate(); 441 s.clearParameters(); 442 } 443 } 444 445 /** 446 * Update the list of owner info for the given domain, keeping IDs where applicable. 447 * 448 * @param c A connection to the database 449 * @param d A domain to update. 450 * @throws SQLException If any database problems occur during the update process. 451 */ 452 private void updateOwnerInfo(Connection c, Domain d) throws SQLException { 453 List<Long> oldIDs = DBUtils.selectLongList(c, "SELECT ownerinfo_id FROM ownerinfo " + "WHERE domain_id = ?", 454 d.getID()); 455 PreparedStatement s = c.prepareStatement("UPDATE ownerinfo SET " + "created = ?, " + "info = ? " 456 + "WHERE ownerinfo_id = ?"); 457 for (DomainOwnerInfo doi : d.getAllDomainOwnerInfo()) { 458 if (doi.hasID() && oldIDs.remove(doi.getID())) { 459 s.setTimestamp(1, new Timestamp(doi.getDate().getTime())); 460 DBUtils.setStringMaxLength(s, 2, doi.getInfo(), Constants.MAX_OWNERINFO_SIZE, doi, "owner info"); 461 s.setLong(3, doi.getID()); 462 s.executeUpdate(); 463 s.clearParameters(); 464 } else { 465 insertOwnerInfo(c, d, doi); 466 } 467 } 468 if (oldIDs.size() != 0) { 469 String message = "Not allowed to delete ownerinfo " + oldIDs + " on " + d; 470 log.debug(message); 471 throw new IOFailure(message); 472 } 473 } 474 475 /** 476 * Update the list of harvest info for the given domain, keeping IDs where applicable. 477 * 478 * @param c A connection to the database 479 * @param d A domain to update. 480 * @throws SQLException If any database problems occur during the update process. 481 */ 482 private void updateHarvestInfo(Connection c, Domain d) throws SQLException { 483 List<Long> oldIDs = DBUtils.selectLongList(c, "SELECT historyinfo.historyinfo_id " 484 + "FROM historyinfo, configurations WHERE historyinfo.config_id = configurations.config_id" 485 + " AND configurations.domain_id = ?", d.getID()); 486 PreparedStatement s = c.prepareStatement("UPDATE historyinfo SET " + "stopreason = ?, " + "objectcount = ?, " 487 + "bytecount = ?, " + "config_id = " + " (SELECT config_id FROM configurations, domains" 488 + " WHERE domains.domain_id = ?" + " AND configurations.name = ?" 489 + " AND configurations.domain_id = domains.domain_id), " + "harvest_id = ?, " + "job_id = ? " 490 + "WHERE historyinfo_id = ?"); 491 Iterator<HarvestInfo> his = d.getHistory().getHarvestInfo(); 492 while (his.hasNext()) { 493 HarvestInfo hi = his.next(); 494 if (hi.hasID() && oldIDs.remove(hi.getID())) { 495 s.setInt(1, hi.getStopReason().ordinal()); 496 s.setLong(2, hi.getCountObjectRetrieved()); 497 s.setLong(3, hi.getSizeDataRetrieved()); 498 s.setLong(4, d.getID()); 499 s.setString(5, d.getConfiguration(hi.getDomainConfigurationName()).getName()); 500 s.setLong(6, hi.getHarvestID()); 501 if (hi.getJobID() != null) { 502 s.setLong(7, hi.getJobID()); 503 } else { 504 s.setNull(7, Types.BIGINT); 505 } 506 s.setLong(8, hi.getID()); 507 s.executeUpdate(); 508 s.clearParameters(); 509 } else { 510 insertHarvestInfo(c, d, hi); 511 } 512 } 513 if (oldIDs.size() != 0) { 514 String message = "Not allowed to delete historyinfo " + oldIDs + " on " + d; 515 log.debug(message); 516 throw new IOFailure(message); 517 } 518 } 519 520 /** 521 * Insert new harvest info for a domain. 522 * 523 * @param c A connection to the database 524 * @param d A domain to insert on. The domains ID must be correct. 525 * @param harvestInfo Harvest info to insert. 526 */ 527 private void insertHarvestInfo(Connection c, Domain d, HarvestInfo harvestInfo) { 528 PreparedStatement s = null; 529 try { 530 // Note that the config_id is grabbed from the configurations table. 531 s = c.prepareStatement("INSERT INTO historyinfo " + "( stopreason, objectcount, bytecount, config_id, " 532 + "job_id, harvest_id, harvest_time ) " + "VALUES ( ?, ?, ?, ?, ?, ?, ? )", 533 Statement.RETURN_GENERATED_KEYS); 534 s.setInt(1, harvestInfo.getStopReason().ordinal()); 535 s.setLong(2, harvestInfo.getCountObjectRetrieved()); 536 s.setLong(3, harvestInfo.getSizeDataRetrieved()); 537 // TODO More stable way to get IDs, use a select 538 s.setLong(4, d.getConfiguration(harvestInfo.getDomainConfigurationName()).getID()); 539 if (harvestInfo.getJobID() != null) { 540 s.setLong(5, harvestInfo.getJobID()); 541 } else { 542 s.setNull(5, Types.BIGINT); 543 } 544 s.setLong(6, harvestInfo.getHarvestID()); 545 s.setTimestamp(7, new Timestamp(harvestInfo.getDate().getTime())); 546 s.executeUpdate(); 547 harvestInfo.setID(DBUtils.getGeneratedID(s)); 548 } catch (SQLException e) { 549 throw new IOFailure("SQL error while inserting harvest info " + harvestInfo + " for " + d + "\n", e); 550 } 551 } 552 553 /** 554 * Insert new owner info for a domain. 555 * 556 * @param c A connection to the database 557 * @param d A domain to insert on. The domains ID must be correct. 558 * @param doi Owner info to insert. 559 * @throws SQLException If any database problems occur during the insertion process. 560 */ 561 private void insertOwnerInfo(Connection c, Domain d, DomainOwnerInfo doi) throws SQLException { 562 PreparedStatement s = c.prepareStatement("INSERT INTO ownerinfo ( domain_id, created, info ) " 563 + "VALUES ( ?, ?, ?)", Statement.RETURN_GENERATED_KEYS); 564 s.setLong(1, d.getID()); 565 s.setTimestamp(2, new Timestamp(doi.getDate().getTime())); 566 s.setString(3, doi.getInfo()); 567 s.executeUpdate(); 568 doi.setID(DBUtils.getGeneratedID(s)); 569 } 570 571 /** 572 * Insert new seedlist for a domain. 573 * 574 * @param c A connection to the database 575 * @param d A domain to insert on. The domains ID must be correct. 576 * @param sl Seedlist to insert. 577 * @throws SQLException If some database error occurs during the insertion process. 578 */ 579 private void insertSeedlist(Connection c, Domain d, SeedList sl) throws SQLException { 580 PreparedStatement s = c.prepareStatement("INSERT INTO seedlists ( name, comments, domain_id, seeds ) " 581 + "VALUES ( ?, ?, ?, ? )", Statement.RETURN_GENERATED_KEYS); 582 // ID is autogenerated 583 DBUtils.setName(s, 1, sl, Constants.MAX_NAME_SIZE); 584 DBUtils.setComments(s, 2, sl, Constants.MAX_COMMENT_SIZE); 585 s.setLong(3, d.getID()); 586 DBUtils.setClobMaxLength(s, 4, sl.getSeedsAsString(), Constants.MAX_SEED_LIST_SIZE, sl, "seedlist"); 587 s.executeUpdate(); 588 sl.setID(DBUtils.getGeneratedID(s)); 589 } 590 591 /** 592 * Inserts a new password entry into the database. 593 * 594 * @param c A connection to the database 595 * @param d A domain to insert on. The domains ID must be correct. 596 * @param p A password entry to insert. 597 * @throws SQLException If some database error occurs during the insertion process. 598 */ 599 private void insertPassword(Connection c, Domain d, Password p) throws SQLException { 600 PreparedStatement s = c.prepareStatement("INSERT INTO passwords " 601 + "( name, comments, domain_id, url, realm, username, " + "password ) " 602 + "VALUES ( ?, ?, ?, ?, ?, ?, ? )", Statement.RETURN_GENERATED_KEYS); 603 // ID is autogenerated 604 DBUtils.setName(s, 1, p, Constants.MAX_NAME_SIZE); 605 DBUtils.setComments(s, 2, p, Constants.MAX_COMMENT_SIZE); 606 s.setLong(3, d.getID()); 607 DBUtils.setStringMaxLength(s, 4, p.getPasswordDomain(), Constants.MAX_URL_SIZE, p, "password url"); 608 DBUtils.setStringMaxLength(s, 5, p.getRealm(), Constants.MAX_REALM_NAME_SIZE, p, "password realm"); 609 DBUtils.setStringMaxLength(s, 6, p.getUsername(), Constants.MAX_USER_NAME_SIZE, p, "password username"); 610 DBUtils.setStringMaxLength(s, 7, p.getPassword(), Constants.MAX_PASSWORD_SIZE, p, "password"); 611 s.executeUpdate(); 612 p.setID(DBUtils.getGeneratedID(s)); 613 } 614 615 /** 616 * Insert the basic configuration info into the DB. This does not establish the connections with seedlists and 617 * passwords, use {create,update}Config{Passwords,Seedlists}Entries for that. 618 * 619 * @param connection A connection to the database 620 * @param d a domain 621 * @param dc a domainconfiguration 622 * @throws SQLException If some database error occurs during the insertion process. 623 */ 624 private void insertConfiguration(Connection connection, Domain d, DomainConfiguration dc) throws SQLException { 625 long templateId = DBUtils.selectLongValue(connection, "SELECT template_id FROM ordertemplates WHERE name = ?", 626 dc.getOrderXmlName()); 627 PreparedStatement s = connection.prepareStatement("INSERT INTO configurations " 628 + "( name, comments, domain_id, template_id, maxobjects, " + "maxrate, maxbytes ) " 629 + "VALUES ( ?, ?, ?, ?, ?, ?, ?)", Statement.RETURN_GENERATED_KEYS); 630 // Id is autogenerated 631 DBUtils.setName(s, 1, dc, Constants.MAX_NAME_SIZE); 632 DBUtils.setComments(s, 2, dc, Constants.MAX_COMMENT_SIZE); 633 s.setLong(3, d.getID()); 634 s.setLong(4, templateId); 635 s.setLong(5, dc.getMaxObjects()); 636 s.setInt(6, dc.getMaxRequestRate()); 637 s.setLong(7, dc.getMaxBytes()); 638 int rows = s.executeUpdate(); 639 if (rows != 1) { 640 String message = "Error inserting configuration " + dc; 641 log.warn(message); 642 throw new IOFailure(message); 643 } 644 dc.setID(DBUtils.getGeneratedID(s)); 645 } 646 647 /** 648 * Delete all entries in the given crossref table that belong to the configuration. 649 * 650 * @param c A connection to the database 651 * @param configId The domain configuration to remove entries for. 652 * @param table One of "config_passwords" or "config_seedlists" 653 * @throws SQLException If any database problems occur during the delete process. 654 */ 655 private void deleteConfigFromTable(Connection c, long configId, String table) throws SQLException { 656 PreparedStatement s = c.prepareStatement("DELETE FROM " + table + " WHERE " + table + ".config_id = ?"); 657 s.setLong(1, configId); 658 s.executeUpdate(); 659 } 660 661 /** 662 * Delete all entries from the config_passwords table that refer to the given configuration and insert the current 663 * ones. 664 * 665 * @param c A connection to the database 666 * @param d A domain to operate on 667 * @param dc Configuration to update. 668 * @throws SQLException If any database problems occur during the update process. 669 */ 670 private void updateConfigPasswordsEntries(Connection c, Domain d, DomainConfiguration dc) throws SQLException { 671 deleteConfigFromTable(c, dc.getID(), "config_passwords"); 672 createConfigPasswordsEntries(c, d, dc); 673 } 674 675 /** 676 * Create the xref table for passwords used by configurations. 677 * 678 * @param c A connection to the database 679 * @param d A domain to operate on. 680 * @param dc A configuration to create xref table for. 681 * @throws SQLException If any database problems occur during the insertion of password entries for the given domain 682 * configuration 683 */ 684 private void createConfigPasswordsEntries(Connection c, Domain d, DomainConfiguration dc) throws SQLException { 685 PreparedStatement s = c.prepareStatement("INSERT INTO config_passwords " + "( config_id, password_id ) " 686 + "SELECT config_id, password_id " + " FROM configurations, passwords" 687 + " WHERE configurations.domain_id = ?" + " AND configurations.name = ?" 688 + " AND passwords.name = ?" + " AND passwords.domain_id = configurations.domain_id"); 689 for (Iterator<Password> passwords = dc.getPasswords(); passwords.hasNext();) { 690 Password p = passwords.next(); 691 s.setLong(1, d.getID()); 692 s.setString(2, dc.getName()); 693 s.setString(3, p.getName()); 694 s.executeUpdate(); 695 s.clearParameters(); 696 } 697 } 698 699 /** 700 * Delete all entries from the config_seedlists table that refer to the given configuration and insert the current 701 * ones. 702 * 703 * @param c An open connection to the harvestDatabase. 704 * @param d A domain to operate on 705 * @param dc Configuration to update. 706 * @throws SQLException If any database problems occur during the update process. 707 */ 708 private void updateConfigSeedlistsEntries(Connection c, Domain d, DomainConfiguration dc) throws SQLException { 709 deleteConfigFromTable(c, dc.getID(), "config_seedlists"); 710 createConfigSeedlistsEntries(c, d, dc); 711 } 712 713 /** 714 * Create the xref table for seedlists used by configurations. 715 * 716 * @param c A connection to the database 717 * @param d A domain to operate on. 718 * @param dc A configuration to create xref table for. 719 * @throws SQLException If any database problems occur during the insertion of seedlist entries for the given domain 720 * configuration 721 */ 722 private void createConfigSeedlistsEntries(Connection c, Domain d, DomainConfiguration dc) throws SQLException { 723 PreparedStatement s = c.prepareStatement("INSERT INTO config_seedlists " + " ( config_id, seedlist_id ) " 724 + "SELECT configurations.config_id, seedlists.seedlist_id" + " FROM configurations, seedlists" 725 + " WHERE configurations.name = ?" + " AND seedlists.name = ?" 726 + " AND configurations.domain_id = ?" + " AND seedlists.domain_id = ?"); 727 for (Iterator<SeedList> seedlists = dc.getSeedLists(); seedlists.hasNext();) { 728 SeedList sl = seedlists.next(); 729 s.setString(1, dc.getName()); 730 s.setString(2, sl.getName()); 731 s.setLong(3, d.getID()); 732 s.setLong(4, d.getID()); 733 s.executeUpdate(); 734 s.clearParameters(); 735 } 736 } 737 738 @Override 739 protected synchronized Domain read(Connection c, String domainName) { 740 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 741 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Invalid domain name " + domainName); 742 if (!exists(c, domainName)) { 743 throw new UnknownID("No domain by the name '" + domainName + "'"); 744 } 745 return readKnown(c, domainName); 746 } 747 748 @Override 749 protected synchronized Domain readKnown(Connection c, String domainName) { 750 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 751 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Invalid domain name " + domainName); 752 Domain result; 753 PreparedStatement s = null; 754 try { 755 s = c.prepareStatement("SELECT domains.domain_id, " + "domains.comments, " + "domains.crawlertraps, " 756 + "domains.edition, " + "configurations.name, " + " (SELECT name FROM domains as aliasdomains" 757 + " WHERE aliasdomains.domain_id = domains.alias), " + "domains.lastaliasupdate " 758 + "FROM domains, configurations " + "WHERE domains.name = ?" 759 + " AND domains.defaultconfig = configurations.config_id"); 760 s.setString(1, domainName); 761 ResultSet res = s.executeQuery(); 762 if (!res.next()) { 763 final String message = "Error reading existing domain '" + domainName 764 + "' due to database inconsistency. " 765 + "Note that this should never happen. Please ask your database admin to check " 766 + "your 'domains' and 'configurations' tables for any inconsistencies."; 767 log.warn(message); 768 throw new IOFailure(message); 769 } 770 int domainId = res.getInt(1); 771 String comments = res.getString(2); 772 String crawlertraps = res.getString(3); 773 long edition = res.getLong(4); 774 String defaultconfig = res.getString(5); 775 String alias = res.getString(6); 776 Date lastAliasUpdate = DBUtils.getDateMaybeNull(res, 7); 777 s.close(); 778 Domain d = new Domain(domainName); 779 d.setComments(comments); 780 // don't throw exception if illegal regexps are found. 781 boolean strictMode = false; 782 String[] traps = crawlertraps.split("\n"); 783 List<String> insertList = new ArrayList<String>(); 784 for (String trap: traps) { 785 if (!trap.isEmpty()) { // Ignore empty traps (NAS-2480) 786 insertList.add(trap); 787 } 788 } 789 log.trace("Found {} crawlertraps for domain '{}' in database", insertList.size(), domainName); 790 d.setCrawlerTraps(insertList, strictMode); 791 d.setID(domainId); 792 d.setEdition(edition); 793 if (alias != null) { 794 d.setAliasInfo(new AliasInfo(domainName, alias, lastAliasUpdate)); 795 } 796 797 readSeedlists(c, d); 798 readPasswords(c, d); 799 readConfigurations(c, d); 800 // Now that configs are in, we can set the default 801 d.setDefaultConfiguration(defaultconfig); 802 readOwnerInfo(c, d); 803 readHistoryInfo(c, d); 804 805 result = d; 806 } catch (SQLException e) { 807 throw new IOFailure("SQL Error while reading domain " + domainName + "\n", e); 808 } 809 810 return result; 811 } 812 813 /** 814 * Read the configurations for the domain. This should not be called until after passwords and seedlists are read. 815 * 816 * @param c A connection to the database 817 * @param d The domain being read. Its ID must be set. 818 * @throws SQLException If database errors occur. 819 */ 820 private void readConfigurations(Connection c, Domain d) throws SQLException { 821 // Read the configurations now that passwords and seedlists exist 822 PreparedStatement s = c.prepareStatement("SELECT " + "config_id, " + "configurations.name, " + "comments, " 823 + "ordertemplates.name, " + "maxobjects, " + "maxrate, " + "maxbytes" 824 + " FROM configurations, ordertemplates " + "WHERE domain_id = ?" 825 + " AND configurations.template_id = " + "ordertemplates.template_id"); 826 s.setLong(1, d.getID()); 827 ResultSet res = s.executeQuery(); 828 while (res.next()) { 829 long domainconfigId = res.getLong(1); 830 String domainconfigName = res.getString(2); 831 String domainConfigComments = res.getString(3); 832 String order = res.getString(4); 833 long maxobjects = res.getLong(5); 834 int maxrate = res.getInt(6); 835 long maxbytes = res.getLong(7); 836 PreparedStatement s1 = c.prepareStatement("SELECT seedlists.name " + "FROM seedlists, config_seedlists " 837 + "WHERE config_seedlists.config_id = ? " + "AND config_seedlists.seedlist_id = " 838 + "seedlists.seedlist_id"); 839 s1.setLong(1, domainconfigId); 840 ResultSet seedlistResultset = s1.executeQuery(); 841 List<SeedList> seedlists = new ArrayList<SeedList>(); 842 while (seedlistResultset.next()) { 843 seedlists.add(d.getSeedList(seedlistResultset.getString(1))); 844 } 845 s1.close(); 846 if (seedlists.isEmpty()) { 847 String message = "Configuration " + domainconfigName + " of " + d + " has no seedlists"; 848 log.warn(message); 849 throw new IOFailure(message); 850 } 851 852 s1 = c.prepareStatement("SELECT passwords.name FROM passwords, config_passwords " 853 + "WHERE config_passwords.config_id = ? AND config_passwords.password_id = passwords.password_id"); 854 s1.setLong(1, domainconfigId); 855 ResultSet passwordResultset = s1.executeQuery(); 856 List<Password> passwords = new ArrayList<Password>(); 857 while (passwordResultset.next()) { 858 passwords.add(d.getPassword(passwordResultset.getString(1))); 859 } 860 DomainConfiguration dc = new DomainConfiguration(domainconfigName, d, seedlists, passwords); 861 dc.setOrderXmlName(order); 862 dc.setMaxObjects(maxobjects); 863 dc.setMaxRequestRate(maxrate); 864 dc.setComments(domainConfigComments); 865 dc.setMaxBytes(maxbytes); 866 dc.setID(domainconfigId); 867 d.addConfiguration(dc); 868 s1.close(); 869 870 // EAV 871 List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.DOMAIN_TREE_ID, (int)domainconfigId); 872 dc.setAttributesAndTypes(attributesAndTypes); 873 } 874 if (!d.getAllConfigurations().hasNext()) { 875 String message = "Loaded domain " + d + " with no configurations"; 876 log.warn(message); 877 throw new IOFailure(message); 878 } 879 } 880 881 @Override 882 public List<Long> findUsedConfigurations(Long domainID) { 883 Connection connection = HarvestDBConnection.get(); 884 try { 885 List<Long> usedConfigurations = new LinkedList<Long>(); 886 887 PreparedStatement readUsedConfigurations = connection 888 .prepareStatement(" SELECT configurations.config_id, configurations.name" + " FROM configurations " 889 + " JOIN harvest_configs USING (config_id) " 890 + " JOIN harvestdefinitions USING (harvest_id) " + " WHERE configurations.domain_id = ? " 891 + "AND harvestdefinitions.isactive = ?"); 892 readUsedConfigurations.setLong(1, domainID); 893 readUsedConfigurations.setBoolean(2, true); 894 ResultSet res = readUsedConfigurations.executeQuery(); 895 while (res.next()) { 896 usedConfigurations.add(res.getLong(1)); 897 } 898 readUsedConfigurations.close(); 899 900 return usedConfigurations; 901 } catch (SQLException e) { 902 throw new IOFailure("SQL Error while reading configuration + seeds lists", e); 903 } finally { 904 HarvestDBConnection.release(connection); 905 } 906 } 907 908 /** 909 * Read owner info entries for the domain. 910 * 911 * @param c A connection to the database 912 * @param d The domain being read. Its ID must be set. 913 * @throws SQLException If database errors occur. 914 */ 915 private void readOwnerInfo(Connection c, Domain d) throws SQLException { 916 // Read owner info 917 PreparedStatement s = c.prepareStatement("SELECT ownerinfo_id, created, info" 918 + " FROM ownerinfo WHERE domain_id = ?"); 919 s.setLong(1, d.getID()); 920 ResultSet res = s.executeQuery(); 921 while (res.next()) { 922 final DomainOwnerInfo ownerinfo = new DomainOwnerInfo(new Date(res.getTimestamp(2).getTime()), 923 res.getString(3)); 924 ownerinfo.setID(res.getLong(1)); 925 d.addOwnerInfo(ownerinfo); 926 } 927 } 928 929 /** 930 * Read history info entries for the domain. 931 * 932 * @param c A connection to the database 933 * @param d The domain being read. Its ID must be set. 934 * @throws SQLException If database errors occur. 935 */ 936 private void readHistoryInfo(Connection c, Domain d) throws SQLException { 937 // Read history info 938 PreparedStatement s = c.prepareStatement("SELECT historyinfo_id, stopreason, " + "objectcount, bytecount, " 939 + "name, job_id, harvest_id, harvest_time " + "FROM historyinfo, configurations " 940 + "WHERE configurations.domain_id = ?" + " AND historyinfo.config_id = configurations.config_id"); 941 s.setLong(1, d.getID()); 942 ResultSet res = s.executeQuery(); 943 while (res.next()) { 944 long hiID = res.getLong(1); 945 int stopreasonNum = res.getInt(2); 946 StopReason stopreason = StopReason.getStopReason(stopreasonNum); 947 long objectCount = res.getLong(3); 948 long byteCount = res.getLong(4); 949 String configName = res.getString(5); 950 Long jobId = res.getLong(6); 951 if (res.wasNull()) { 952 jobId = null; 953 } 954 long harvestId = res.getLong(7); 955 Date harvestTime = new Date(res.getTimestamp(8).getTime()); 956 HarvestInfo hi; 957 // XML DAOs didn't keep the job id in harvestinfo, so some 958 // entries will be null. 959 hi = new HarvestInfo(harvestId, jobId, d.getName(), configName, harvestTime, byteCount, objectCount, 960 stopreason); 961 hi.setID(hiID); 962 d.getHistory().addHarvestInfo(hi); 963 } 964 } 965 966 /** 967 * Read passwords for the domain. 968 * 969 * @param c A connection to the database 970 * @param d The domain being read. Its ID must be set. 971 * @throws SQLException If database errors occur. 972 */ 973 private void readPasswords(Connection c, Domain d) throws SQLException { 974 PreparedStatement s = c.prepareStatement("SELECT password_id, name, comments, url, " 975 + "realm, username, password " + "FROM passwords WHERE domain_id = ?"); 976 s.setLong(1, d.getID()); 977 ResultSet res = s.executeQuery(); 978 while (res.next()) { 979 final Password pwd = new Password(res.getString(2), res.getString(3), res.getString(4), res.getString(5), 980 res.getString(6), res.getString(7)); 981 pwd.setID(res.getLong(1)); 982 d.addPassword(pwd); 983 } 984 } 985 986 /** 987 * Read seedlists for the domain. 988 * 989 * @param c A connection to the database 990 * @param d The domain being read. Its ID must be set. 991 * @throws SQLException If database errors occur. 992 */ 993 private void readSeedlists(Connection c, Domain d) throws SQLException { 994 PreparedStatement s = c.prepareStatement("SELECT seedlist_id, name, comments, seeds" 995 + " FROM seedlists WHERE domain_id = ?"); 996 s.setLong(1, d.getID()); 997 ResultSet res = s.executeQuery(); 998 while (res.next()) { 999 final SeedList seedlist = getSeedListFromResultset(res); 1000 d.addSeedList(seedlist); 1001 } 1002 s.close(); 1003 if (!d.getAllSeedLists().hasNext()) { 1004 final String msg = "Domain " + d + " loaded with no seedlists"; 1005 log.warn(msg); 1006 throw new IOFailure(msg); 1007 } 1008 } 1009 1010 /** 1011 * Make SeedList based on entry from seedlists (id, name, comments, seeds). 1012 * 1013 * @param res a Resultset 1014 * @return a SeedList based on ResultSet entry. 1015 * @throws SQLException if unable to get data from database 1016 */ 1017 private SeedList getSeedListFromResultset(ResultSet res) throws SQLException { 1018 final long seedlistId = res.getLong(1); 1019 final String seedlistName = res.getString(2); 1020 String seedlistComments = res.getString(3); 1021 1022 String seedlistContents = ""; 1023 if (DBSpecifics.getInstance().supportsClob()) { 1024 Clob clob = res.getClob(4); 1025 seedlistContents = clob.getSubString(1, (int) clob.length()); 1026 } else { 1027 seedlistContents = res.getString(4); 1028 } 1029 final SeedList seedlist = new SeedList(seedlistName, seedlistContents); 1030 seedlist.setComments(seedlistComments); 1031 seedlist.setID(seedlistId); 1032 return seedlist; 1033 } 1034 1035 @Override 1036 public synchronized boolean exists(String domainName) { 1037 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1038 if (!DomainUtils.isValidDomainName(domainName)) { 1039 return false; 1040 } 1041 Connection c = HarvestDBConnection.get(); 1042 try { 1043 return exists(c, domainName); 1044 } finally { 1045 HarvestDBConnection.release(c); 1046 } 1047 } 1048 1049 /** 1050 * Return true if a domain with the given name exists. 1051 * 1052 * @param c an open connection to the harvestDatabase 1053 * @param domainName a name of a domain 1054 * @return true if a domain with the given name exists, otherwise false. 1055 */ 1056 private synchronized boolean exists(Connection c, String domainName) { 1057 if (!DomainUtils.isValidDomainName(domainName)) { 1058 return false; 1059 } 1060 return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(*) FROM domains WHERE name = ?", domainName); 1061 } 1062 1063 @Override 1064 public synchronized int getCountDomains() { 1065 Connection c = HarvestDBConnection.get(); 1066 try { 1067 return DBUtils.selectIntValue(c, "SELECT COUNT(*) FROM domains"); 1068 } finally { 1069 HarvestDBConnection.release(c); 1070 } 1071 } 1072 1073 @Override 1074 public synchronized Iterator<Domain> getAllDomains() { 1075 Connection c = HarvestDBConnection.get(); 1076 try { 1077 List<String> domainNames = DBUtils.selectStringList(c, "SELECT name FROM domains ORDER BY name"); 1078 List<Domain> orderedDomains = new LinkedList<Domain>(); 1079 for (String name : domainNames) { 1080 if (DomainUtils.isValidDomainName(name)) { 1081 orderedDomains.add(read(c, name)); 1082 } 1083 } 1084 return orderedDomains.iterator(); 1085 } finally { 1086 HarvestDBConnection.release(c); 1087 } 1088 } 1089 1090 @Override 1091 public Iterator<Domain> getAllDomainsInSnapshotHarvestOrder() { 1092 return getDomainsInSnapshotHarvestOrder(null); 1093 } 1094 1095 @Override 1096 public Iterator<Domain> getDomainsInSnapshotHarvestOrder(Long hid) { 1097 Connection c = HarvestDBConnection.get(); 1098 List<String> domainNames = null; 1099 List<String> domainNamesWithAttributes = null; 1100 try { 1101 if (hid==null) { 1102 log.info("Starting a select of all domains used for Snapshot harvesting"); 1103 // Note: maxbytes are ordered with largest first for symmetry 1104 // with HarvestDefinition.CompareConfigDesc 1105 domainNames = DBUtils.selectStringList(c, "SELECT domains.name" 1106 + " FROM domains, configurations, ordertemplates" 1107 + " WHERE domains.defaultconfig=configurations.config_id" + " AND configurations.template_id" 1108 + "=ordertemplates.template_id" + " ORDER BY" + " ordertemplates.name," 1109 + " configurations.maxbytes DESC," + " domains.name"); 1110 log.info("Retrieved all {} domains used for Snapshot harvesting without searching for attributes for their default configs", domainNames.size()); 1111 domainNamesWithAttributes = DBUtils.selectStringList(c, // Don't order this - it will be ordered later 1112 "SELECT DISTINCT domains.name" 1113 + " FROM domains, configurations, eav_attribute" 1114 + " WHERE domains.defaultconfig=configurations.config_id" 1115 + " AND configurations.config_id=eav_attribute.entity_id"); 1116 log.info("Retrieved all {} domains used for Snapshot harvesting that has attributes for their default configs", domainNamesWithAttributes.size()); 1117 domainNames = domainNames.stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList()); 1118 // Remove the content of domainNamesWithAttributes from domainNames 1119 domainNames.removeAll(domainNamesWithAttributes); 1120 log.info("Removed all {} domains with attributes from the total list, reducing total-list to {}", domainNamesWithAttributes.size(), domainNames.size()); 1121 // Add the remainder of domainNames to domainNamesWithAttributes, so the domain configs with attributes will be handled first. 1122 domainNamesWithAttributes.addAll(domainNames); 1123 log.info("Remainder of total list merged with list of domains w/ attributes is size {}", domainNamesWithAttributes.size()); 1124 } else { 1125 log.info("Starting a select of all domains harvested in previous snapshot harvest #{}", hid); 1126 domainNames = DBUtils.selectStringList(c, "SELECT DISTINCT domains.name" 1127 + " FROM domains, configurations, ordertemplates, historyinfo" 1128 + " WHERE domains.defaultconfig=configurations.config_id" + " AND configurations.template_id" 1129 + "=ordertemplates.template_id" 1130 + " AND configurations.config_id=historyinfo.config_id " 1131 + " AND historyinfo.harvest_id=" + hid); 1132 // NOTE: the ordering has now been skipped to prevent duplicates 1133 // + " ORDER BY" + " ordertemplates.name," 1134 // + " configurations.maxbytes DESC"); 1135 // "," + " domains.name"); 1136 log.info("Retrieved all {} domains harvested in previous snapshot harvest #{}", domainNames.size(), hid); 1137 domainNamesWithAttributes = DBUtils.selectStringList(c, // Don't order this - it will be ordered later 1138 "SELECT DISTINCT domains.name" 1139 + " FROM domains, configurations, eav_attribute, historyinfo" 1140 + " WHERE domains.defaultconfig=configurations.config_id" 1141 + " AND configurations.config_id=eav_attribute.entity_id" 1142 + " AND historyinfo.config_id=configurations.config_id" 1143 + " AND historyinfo.harvest_id=" + hid 1144 ); 1145 log.info("Retrieved all {} domains harvested in previous snapshot harvest that has attributes for their default configs", domainNamesWithAttributes.size()); 1146 domainNames = domainNames.stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList()); 1147 // Remove the content of domainNamesWithAttributes from domainNames 1148 domainNames.removeAll(domainNamesWithAttributes); 1149 log.info("Removed all {} domains with attributes from the total list, reducing total-list to {}", domainNamesWithAttributes.size(), domainNames.size()); 1150 // Add the remainder of domainNames to domainNamesWithAttributes, so the domain configs with attributes will be handled first. 1151 domainNamesWithAttributes.addAll(domainNames); 1152 log.info("Remainder of total list merged with list of domains w/ attributes is size {}", domainNamesWithAttributes.size()); 1153 } 1154 1155 return new FilterIterator<String, Domain>(domainNamesWithAttributes.iterator()) { 1156 public Domain filter(String s) { 1157 return readKnown(s); 1158 } 1159 }; 1160 } finally { 1161 HarvestDBConnection.release(c); 1162 } 1163 } 1164 1165 @Override 1166 public List<String> getDomains(String glob) { 1167 ArgumentNotValid.checkNotNullOrEmpty(glob, "glob"); 1168 // SQL uses % and _ instead of * and ? 1169 String sqlGlob = DBUtils.makeSQLGlob(glob); 1170 Connection c = HarvestDBConnection.get(); 1171 try { 1172 List<String> names = DBUtils.selectStringList(c, "SELECT name FROM domains WHERE name LIKE ? ORDER BY name", sqlGlob); 1173 return names.stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList()); 1174 } finally { 1175 HarvestDBConnection.release(c); 1176 } 1177 } 1178 1179 @Override 1180 public boolean mayDelete(DomainConfiguration config) { 1181 ArgumentNotValid.checkNotNull(config, "config"); 1182 String defaultConfigName = this.getDefaultDomainConfigurationName(config.getDomainName()); 1183 Connection c = HarvestDBConnection.get(); 1184 try { 1185 // Never delete default config and don't delete configs being used. 1186 return !config.getName().equals(defaultConfigName) 1187 && !DBUtils.selectAny(c, "SELECT config_id" + " FROM harvest_configs WHERE config_id = ?", 1188 config.getID()); 1189 } finally { 1190 HarvestDBConnection.release(c); 1191 } 1192 } 1193 1194 @Override 1195 public String getDefaultDomainConfigurationName(String domainName) { 1196 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1197 Connection c = HarvestDBConnection.get(); 1198 try { 1199 return DBUtils.selectStringValue(c, "SELECT configurations.name " + "FROM domains, configurations " 1200 + "WHERE domains.defaultconfig = configurations.config_id" + " AND domains.name = ?", domainName); 1201 } finally { 1202 HarvestDBConnection.release(c); 1203 } 1204 } 1205 1206 @Override 1207 public synchronized SparseDomain readSparse(String domainName) { 1208 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1209 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1210 Connection c = HarvestDBConnection.get(); 1211 try { 1212 List<String> domainConfigurationNames = DBUtils.selectStringList(c, "SELECT configurations.name " 1213 + " FROM configurations, domains " + "WHERE domains.domain_id = configurations.domain_id " 1214 + " AND domains.name = ?", domainName); 1215 if (domainConfigurationNames.size() == 0) { 1216 throw new UnknownID("No domain exists with name '" + domainName + "'"); 1217 } 1218 return new SparseDomain(domainName, domainConfigurationNames); 1219 } finally { 1220 HarvestDBConnection.release(c); 1221 } 1222 } 1223 1224 @Override 1225 public List<AliasInfo> getAliases(String domain) { 1226 ArgumentNotValid.checkNotNullOrEmpty(domain, "String domain"); 1227 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domain), "Cannot read invalid domain name " + domain); 1228 List<AliasInfo> resultSet = new ArrayList<AliasInfo>(); 1229 Connection c = HarvestDBConnection.get(); 1230 PreparedStatement s = null; 1231 // return all <domain, alias, lastaliasupdate> tuples 1232 // where alias = domain 1233 if (!exists(c, domain)) { 1234 log.debug("domain named '{}' does not exist. Returning empty result set", domain); 1235 return resultSet; 1236 } 1237 try { 1238 s = c.prepareStatement("SELECT domains.name, " + "domains.lastaliasupdate " 1239 + " FROM domains, domains as fatherDomains " + " WHERE domains.alias = fatherDomains.domain_id AND" 1240 + " fatherDomains.name = ?" + " ORDER BY domains.name"); 1241 s.setString(1, domain); 1242 ResultSet res = s.executeQuery(); 1243 while (res.next()) { 1244 AliasInfo ai = new AliasInfo(res.getString(1), domain, DBUtils.getDateMaybeNull(res, 2)); 1245 resultSet.add(ai); 1246 } 1247 1248 return resultSet; 1249 } catch (SQLException e) { 1250 throw new IOFailure("Failure getting alias-information" + "\n", e); 1251 } finally { 1252 DBUtils.closeStatementIfOpen(s); 1253 HarvestDBConnection.release(c); 1254 } 1255 } 1256 1257 @Override 1258 public List<AliasInfo> getAllAliases() { 1259 List<AliasInfo> resultSet = new ArrayList<AliasInfo>(); 1260 Connection c = HarvestDBConnection.get(); 1261 PreparedStatement s = null; 1262 // return all <domain, alias, lastaliasupdate> tuples 1263 // where alias is not-null 1264 try { 1265 s = c.prepareStatement("SELECT domains.name, " + "(SELECT name FROM domains as aliasdomains" 1266 + " WHERE aliasdomains.domain_id " + "= domains.alias), " + " domains.lastaliasupdate " 1267 + " FROM domains " + " WHERE domains.alias IS NOT NULL" + " ORDER BY " + " lastaliasupdate ASC"); 1268 ResultSet res = s.executeQuery(); 1269 while (res.next()) { 1270 String domainName = res.getString(1); 1271 String aliasOf = res.getString(2); 1272 Date lastchanged = DBUtils.getDateMaybeNull(res, 3); 1273 AliasInfo ai = new AliasInfo(domainName, aliasOf, lastchanged); 1274 if (DomainUtils.isValidDomainName(domainName) && DomainUtils.isValidDomainName(aliasOf)) { 1275 resultSet.add(ai); 1276 } 1277 } 1278 return resultSet; 1279 } catch (SQLException e) { 1280 throw new IOFailure("Failure getting alias-information" + "\n", e); 1281 } finally { 1282 DBUtils.closeStatementIfOpen(s); 1283 HarvestDBConnection.release(c); 1284 } 1285 } 1286 1287 /** 1288 * Return all TLDs represented by the domains in the domains table. it was asked that a level X TLD belong appear in 1289 * TLD list where the level is <=X for example bidule.bnf.fr belong to .bnf.fr and to .fr it appear in the level 1 1290 * list of TLD and in the level 2 list 1291 * 1292 * @param level maximum level of TLD 1293 * @return a list of TLDs 1294 * @see DomainDAO#getTLDs(int) 1295 */ 1296 @Override 1297 public List<TLDInfo> getTLDs(int level) { 1298 Map<String, TLDInfo> resultMap = new HashMap<String, TLDInfo>(); 1299 Connection c = HarvestDBConnection.get(); 1300 PreparedStatement s = null; 1301 try { 1302 s = c.prepareStatement("SELECT name FROM domains"); 1303 ResultSet res = s.executeQuery(); 1304 while (res.next()) { 1305 String domain = res.getString(1); 1306 if (DomainUtils.isValidDomainName(domain)) { 1307 // getting the TLD level of the domain 1308 int domainTLDLevel = TLDInfo.getTLDLevel(domain); 1309 1310 // restraining to max level 1311 if (domainTLDLevel > level) { 1312 domainTLDLevel = level; 1313 } 1314 1315 // looping from level 1 to level max of the domain 1316 for (int currentLevel = 1; currentLevel <= domainTLDLevel; currentLevel++) { 1317 // getting the tld of the domain by level 1318 String tld = TLDInfo.getMultiLevelTLD(domain, currentLevel); 1319 TLDInfo i = resultMap.get(tld); 1320 if (i == null) { 1321 i = new TLDInfo(tld); 1322 resultMap.put(tld, i); 1323 } 1324 i.addSubdomain(domain); 1325 } 1326 } 1327 } 1328 1329 List<TLDInfo> resultSet = new ArrayList<TLDInfo>(resultMap.values()); 1330 Collections.sort(resultSet); 1331 return resultSet; 1332 1333 } catch (SQLException e) { 1334 throw new IOFailure("Failure getting TLD-information" + "\n", e); 1335 } finally { 1336 DBUtils.closeStatementIfOpen(s); 1337 HarvestDBConnection.release(c); 1338 } 1339 } 1340 1341 @Override 1342 public HarvestInfo getDomainJobInfo(Job j, String domainName, String configName) { 1343 ArgumentNotValid.checkNotNull(j, "j"); 1344 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1345 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1346 ArgumentNotValid.checkNotNullOrEmpty(configName, "configName"); 1347 HarvestInfo resultInfo = null; 1348 1349 Connection connection = HarvestDBConnection.get(); 1350 PreparedStatement s = null; 1351 try { 1352 // Get domain_id for domainName 1353 long domainId = DBUtils.selectLongValue(connection, "SELECT domain_id FROM domains WHERE name=?", 1354 domainName); 1355 1356 s = connection.prepareStatement("SELECT stopreason, " + "objectcount, bytecount, " 1357 + "harvest_time FROM historyinfo WHERE " + "job_id = ? AND " + "config_id = ? AND " 1358 + "harvest_id = ?"); 1359 s.setLong(1, j.getJobID()); 1360 s.setLong(2, DBUtils.selectLongValue(connection, "SELECT config_id FROM configurations " 1361 + "WHERE name = ? AND domain_id=?", configName, domainId)); 1362 s.setLong(3, j.getOrigHarvestDefinitionID()); 1363 ResultSet res = s.executeQuery(); 1364 // If no result, the job may not have been run yet 1365 // return null HarvestInfo 1366 if (res.next()) { 1367 StopReason reason = StopReason.getStopReason(res.getInt(1)); 1368 long objectCount = res.getLong(2); 1369 long byteCount = res.getLong(3); 1370 Date harvestTime = res.getDate(4); 1371 resultInfo = new HarvestInfo(j.getOrigHarvestDefinitionID(), j.getJobID(), domainName, configName, 1372 harvestTime, byteCount, objectCount, reason); 1373 } 1374 1375 return resultInfo; 1376 1377 } catch (SQLException e) { 1378 throw new IOFailure("Failure getting DomainJobInfo" + "\n", e); 1379 } finally { 1380 DBUtils.closeStatementIfOpen(s); 1381 HarvestDBConnection.release(connection); 1382 } 1383 } 1384 1385 @Override 1386 public List<DomainHarvestInfo> listDomainHarvestInfo(String domainName, String orderBy, boolean asc) { 1387 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1388 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1389 Connection c = HarvestDBConnection.get(); 1390 PreparedStatement s = null; 1391 final ArrayList<DomainHarvestInfo> domainHarvestInfos = new ArrayList<DomainHarvestInfo>(); 1392 final String ascOrDesc = asc ? "ASC" : "DESC"; 1393 log.debug("Using ascOrDesc=" + ascOrDesc + " after receiving " + asc); 1394 try { 1395 // For historical reasons, not all historyinfo objects have the 1396 // information required to find the job that made them. Therefore, 1397 // we must left outer join them onto the jobs list to get the 1398 // start date and end date for those where they can be found. 1399 s = c.prepareStatement("SELECT jobs.job_id, hdname, hdid," + " harvest_num," + " configname, startdate," 1400 + " enddate, objectcount, bytecount, stopreason" + " FROM ( " 1401 + " SELECT harvestdefinitions.name AS hdname," + " harvestdefinitions.harvest_id AS hdid," 1402 + " configurations.name AS configname," 1403 + " objectcount, bytecount, job_id, stopreason" 1404 + " FROM domains, configurations, historyinfo, " + " harvestdefinitions" 1405 + " WHERE domains.name = ? " + " AND domains.domain_id = configurations.domain_id" 1406 + " AND historyinfo.config_id = " + "configurations.config_id" 1407 + " AND historyinfo.harvest_id = " + "harvestdefinitions.harvest_id" + " ) AS hist" 1408 + " LEFT OUTER JOIN jobs" + " ON hist.job_id = jobs.job_id ORDER BY " + orderBy + " " + ascOrDesc); 1409 s.setString(1, domainName); 1410 ResultSet res = s.executeQuery(); 1411 while (res.next()) { 1412 final int jobID = res.getInt(1); 1413 final String harvestName = res.getString(2); 1414 final int harvestID = res.getInt(3); 1415 final int harvestNum = res.getInt(4); 1416 final String configName = res.getString(5); 1417 final Date startDate = DBUtils.getDateMaybeNull(res, 6); 1418 final Date endDate = DBUtils.getDateMaybeNull(res, 7); 1419 final long objectCount = res.getLong(8); 1420 final long byteCount = res.getLong(9); 1421 final StopReason reason = StopReason.getStopReason(res.getInt(10)); 1422 domainHarvestInfos.add(new DomainHarvestInfo(domainName, jobID, harvestName, harvestID, harvestNum, 1423 configName, startDate, endDate, byteCount, objectCount, reason)); 1424 } 1425 return domainHarvestInfos; 1426 } catch (SQLException e) { 1427 String message = "SQL error getting domain harvest info for " + domainName + "\n"; 1428 log.warn(message, e); 1429 throw new IOFailure(message, e); 1430 } finally { 1431 DBUtils.closeStatementIfOpen(s); 1432 HarvestDBConnection.release(c); 1433 } 1434 } 1435 1436 /** 1437 * Saves all extended Field values for a Domain in the Database. 1438 * 1439 * @param c Connection to Database 1440 * @param d Domain where loaded extended Field Values will be set 1441 * @throws SQLException If database errors occur. 1442 */ 1443 private void saveExtendedFieldValues(Connection c, Domain d) throws SQLException { 1444 List<ExtendedFieldValue> list = d.getExtendedFieldValues(); 1445 for (int i = 0; i < list.size(); i++) { 1446 ExtendedFieldValue efv = list.get(i); 1447 efv.setInstanceID(d.getID()); 1448 1449 ExtendedFieldValueDBDAO dao = (ExtendedFieldValueDBDAO) ExtendedFieldValueDAO.getInstance(); 1450 if (efv.getExtendedFieldValueID() != null) { 1451 dao.update(c, efv, false); 1452 } else { 1453 dao.create(c, efv, false); 1454 } 1455 } 1456 } 1457 1458 @Override 1459 public DomainConfiguration getDomainConfiguration(String domainName, String configName) { 1460 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1461 DomainHistory history = getDomainHistory(domainName); 1462 List<String> crawlertraps = getCrawlertraps(domainName); 1463 1464 Connection c = HarvestDBConnection.get(); 1465 List<DomainConfiguration> foundConfigs = new ArrayList<DomainConfiguration>(); 1466 PreparedStatement s = null; 1467 try { 1468 // Read the configurations now that passwords and seedlists exist 1469 // TODO Seriously? Use a join. 1470 s = c.prepareStatement("SELECT config_id, " + "configurations.name, " + "comments, " 1471 + "ordertemplates.name, " + "maxobjects, " + "maxrate, " + "maxbytes" 1472 + " FROM configurations, ordertemplates " + "WHERE domain_id = (SELECT domain_id FROM domains " 1473 + " WHERE name=?)" + " AND configurations.name = ?" + " AND configurations.template_id = " 1474 + "ordertemplates.template_id"); 1475 s.setString(1, domainName); 1476 s.setString(2, configName); 1477 ResultSet res = s.executeQuery(); 1478 while (res.next()) { 1479 long domainconfigId = res.getLong(1); 1480 String domainconfigName = res.getString(2); 1481 String domainConfigComments = res.getString(3); 1482 final String order = res.getString(4); 1483 long maxobjects = res.getLong(5); 1484 int maxrate = res.getInt(6); 1485 long maxbytes = res.getLong(7); 1486 PreparedStatement s1 = c.prepareStatement("SELECT seedlists.seedlist_id, seedlists.name, " 1487 + " seedlists.comments, seedlists.seeds " + "FROM seedlists, config_seedlists " 1488 + "WHERE config_seedlists.config_id = ? " + "AND config_seedlists.seedlist_id = " 1489 + "seedlists.seedlist_id"); 1490 s1.setLong(1, domainconfigId); 1491 ResultSet seedlistResultset = s1.executeQuery(); 1492 List<SeedList> seedlists = new ArrayList<SeedList>(); 1493 while (seedlistResultset.next()) { 1494 SeedList seedlist = getSeedListFromResultset(seedlistResultset); 1495 seedlists.add(seedlist); 1496 } 1497 s1.close(); 1498 if (seedlists.isEmpty()) { 1499 String message = "Configuration " + domainconfigName + " of domain '" + domainName 1500 + " has no seedlists"; 1501 log.warn(message); 1502 throw new IOFailure(message); 1503 } 1504 1505 PreparedStatement s2 = c.prepareStatement("SELECT passwords.password_id, " 1506 + "passwords.name, passwords.comments, " + "passwords.url, passwords.realm, " 1507 + "passwords.username, passwords.password " + "FROM passwords, config_passwords " 1508 + "WHERE config_passwords.config_id = ? " + "AND config_passwords.password_id = " 1509 + "passwords.password_id"); 1510 s2.setLong(1, domainconfigId); 1511 ResultSet passwordResultset = s2.executeQuery(); 1512 List<Password> passwords = new ArrayList<Password>(); 1513 while (passwordResultset.next()) { 1514 final Password pwd = new Password(passwordResultset.getString(2), passwordResultset.getString(3), 1515 passwordResultset.getString(4), passwordResultset.getString(5), 1516 passwordResultset.getString(6), passwordResultset.getString(7)); 1517 pwd.setID(passwordResultset.getLong(1)); 1518 passwords.add(pwd); 1519 } 1520 1521 DomainConfiguration dc = new DomainConfiguration(domainconfigName, domainName, history, crawlertraps, 1522 seedlists, passwords); 1523 dc.setOrderXmlName(order); 1524 dc.setMaxObjects(maxobjects); 1525 dc.setMaxRequestRate(maxrate); 1526 dc.setComments(domainConfigComments); 1527 dc.setMaxBytes(maxbytes); 1528 dc.setID(domainconfigId); 1529 foundConfigs.add(dc); 1530 s2.close(); 1531 1532 // EAV 1533 List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.DOMAIN_TREE_ID, (int)domainconfigId); 1534 dc.setAttributesAndTypes(attributesAndTypes); 1535 } // While 1536 } catch (SQLException e) { 1537 throw new IOFailure("Error while fetching DomainConfigration: ", e); 1538 } finally { 1539 DBUtils.closeStatementIfOpen(s); 1540 HarvestDBConnection.release(c); 1541 } 1542 return foundConfigs.get(0); 1543 } 1544 1545 /** 1546 * Retrieve the crawlertraps for a specific domain. TODO should this method be public? 1547 * 1548 * @param domainName the name of a domain. 1549 * @return the crawlertraps for given domain. 1550 */ 1551 private List<String> getCrawlertraps(String domainName) { 1552 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1553 Connection c = HarvestDBConnection.get(); 1554 String traps = null; 1555 PreparedStatement s = null; 1556 try { 1557 s = c.prepareStatement("SELECT crawlertraps FROM domains WHERE name = ?"); 1558 s.setString(1, domainName); 1559 ResultSet crawlertrapsResultset = s.executeQuery(); 1560 if (crawlertrapsResultset.next()) { 1561 traps = crawlertrapsResultset.getString(1); 1562 } else { 1563 throw new IOFailure("Unable to find crawlertraps for domain '" + domainName + "'. " 1564 + "The domain doesn't seem to exist."); 1565 } 1566 } catch (SQLException e) { 1567 throw new IOFailure("Error while fetching crawlertraps for domain '" + domainName + "': ", e); 1568 } finally { 1569 DBUtils.closeStatementIfOpen(s); 1570 HarvestDBConnection.release(c); 1571 } 1572 return Arrays.asList(traps.split("\n")); 1573 } 1574 1575 @Override 1576 public Iterator<HarvestInfo> getHarvestInfoBasedOnPreviousHarvestDefinition( 1577 final HarvestDefinition previousHarvestDefinition) { 1578 ArgumentNotValid.checkNotNull(previousHarvestDefinition, "previousHarvestDefinition"); 1579 // For each domainConfig, get harvest infos if there is any for the 1580 // previous harvest definition 1581 log.debug("We start the Iterator<HarvestInfo> process with getting an iterator of DomainConfigs in the previous HD#{}", previousHarvestDefinition.getOid()); 1582 Iterator<DomainConfiguration> previousDomainConfigs = previousHarvestDefinition.getDomainConfigurations(); 1583 log.debug("Now finished getting an iterator of DomainConfigs in the previous HD#{}. We can now return the FilterIterator<DomainConfiguration, HarvestInfo>", previousHarvestDefinition.getOid()); 1584 return new FilterIterator<DomainConfiguration, HarvestInfo>(previousDomainConfigs) { 1585 /** 1586 * @see FilterIterator#filter(Object) 1587 */ 1588 protected HarvestInfo filter(DomainConfiguration o) { 1589 DomainConfiguration config = o; 1590 DomainHistory domainHistory = getDomainHistory(config.getDomainName()); 1591 HarvestInfo hi = domainHistory.getSpecifiedHarvestInfo(previousHarvestDefinition.getOid(), 1592 config.getName()); 1593 return hi; 1594 } 1595 }; // Here ends the above return-statement 1596 } 1597 1598 /** 1599 * Retrieve HarvestInfo for a given harvestdefinition and domain combination. 1600 * @param harvestDefinition a given harvestdefinition 1601 * @param domain a given domain 1602 * @return null, if no HarvestInfo found for the given harvestdefinition and domain combination, otherwise it returns the first matching HarvestInfo found and gives a warning if more than one match exist. 1603 */ 1604 @Override 1605 public HarvestInfo getHarvestInfoForDomainInHarvest(final HarvestDefinition harvestDefinition, final Domain domain) { 1606 PreparedStatement s = null; 1607 Connection c = HarvestDBConnection.get(); 1608 try { 1609 s = c.prepareStatement("SELECT h.stopreason, h.objectcount, h.bytecount, c.name, h.job_id, h.harvest_time FROM historyinfo as h, configurations as c WHERE " 1610 + " c.config_id=h.config_id AND c.domain_id=? AND h.harvest_id=?"); 1611 s.setLong(1, domain.getID()); 1612 s.setLong(2, harvestDefinition.getOid()); 1613 ResultSet res = s.executeQuery(); 1614 List<HarvestInfo> infoFoundForDomain = new ArrayList<HarvestInfo>(); 1615 while (res.next()) { 1616 int stopreasonNum = res.getInt(1); 1617 StopReason stopreason = StopReason.getStopReason(stopreasonNum); 1618 long objectCount = res.getLong(2); 1619 long byteCount = res.getLong(3); 1620 String configName = res.getString(4); 1621 Long jobId = res.getLong(5); 1622 if (res.wasNull()) { 1623 jobId = null; 1624 } 1625 long harvestId = harvestDefinition.getOid(); 1626 Date harvestTime = new Date(res.getTimestamp(6).getTime()); 1627 1628 HarvestInfo hi = new HarvestInfo(harvestId, jobId, domain.getName(), configName, harvestTime, byteCount, objectCount, stopreason); 1629 infoFoundForDomain.add(hi); 1630 } 1631 if (infoFoundForDomain.isEmpty()) { 1632 return null; 1633 } else if (infoFoundForDomain.size() == 1) { 1634 return infoFoundForDomain.get(0); 1635 } else { 1636 HarvestInfo selected = infoFoundForDomain.get(0); 1637 Long latest = selected.getDate().getTime(); 1638 for (int i=1; i < infoFoundForDomain.size(); i++) { 1639 if (infoFoundForDomain.get(i).getDate().getTime() > latest) { 1640 latest = infoFoundForDomain.get(i).getDate().getTime(); 1641 selected = infoFoundForDomain.get(i); 1642 } 1643 } 1644 log.warn("Found {} harvestInfo entries for domain '{}' and harvestdefinition '{}'. Selecting the latest entry: {}", infoFoundForDomain.size(), domain.getName(), 1645 harvestDefinition.getName(), selected); 1646 return selected; 1647 } 1648 } catch (SQLException e) { 1649 throw new IOFailure("Error while fetching HarvestInfo for domain '" + domain.getName() + "' in harvest '" + harvestDefinition.getName() + "':", e); 1650 } finally { 1651 DBUtils.closeStatementIfOpen(s); 1652 HarvestDBConnection.release(c); 1653 } 1654 } 1655 1656 1657 1658 @Override 1659 public DomainHistory getDomainHistory(String domainName) { 1660 ArgumentNotValid.checkNotNullOrEmpty(domainName, "String domainName"); 1661 ArgumentNotValid.checkTrue(DomainUtils.isValidDomainName(domainName), "Cannot read invalid domain name " + domainName); 1662 Connection c = HarvestDBConnection.get(); 1663 DomainHistory history = new DomainHistory(); 1664 // Read history info 1665 PreparedStatement s = null; 1666 try { 1667 s = c.prepareStatement("SELECT historyinfo_id, stopreason, " + "objectcount, bytecount, " 1668 + "name, job_id, harvest_id, harvest_time " + "FROM historyinfo, configurations " 1669 + "WHERE configurations.domain_id = " + "(SELECT domain_id FROM domains WHERE name=?)" 1670 + " AND historyinfo.config_id " + " = configurations.config_id"); 1671 s.setString(1, domainName); 1672 ResultSet res = s.executeQuery(); 1673 while (res.next()) { 1674 long hiID = res.getLong(1); 1675 int stopreasonNum = res.getInt(2); 1676 StopReason stopreason = StopReason.getStopReason(stopreasonNum); 1677 long objectCount = res.getLong(3); 1678 long byteCount = res.getLong(4); 1679 String configName = res.getString(5); 1680 Long jobId = res.getLong(6); 1681 if (res.wasNull()) { 1682 jobId = null; 1683 } 1684 long harvestId = res.getLong(7); 1685 Date harvestTime = new Date(res.getTimestamp(8).getTime()); 1686 HarvestInfo hi; 1687 1688 hi = new HarvestInfo(harvestId, jobId, domainName, configName, harvestTime, byteCount, objectCount, 1689 stopreason); 1690 hi.setID(hiID); 1691 history.addHarvestInfo(hi); 1692 } 1693 } catch (SQLException e) { 1694 throw new IOFailure("Error while fetching DomainHistory for domain '" + domainName + "': ", e); 1695 } finally { 1696 DBUtils.closeStatementIfOpen(s); 1697 HarvestDBConnection.release(c); 1698 } 1699 1700 return history; 1701 } 1702 1703 @Override 1704 public List<String> getDomains(String glob, String searchField) { 1705 ArgumentNotValid.checkNotNullOrEmpty(glob, "glob"); 1706 ArgumentNotValid.checkNotNullOrEmpty(searchField, "searchField"); 1707 // SQL uses % and _ instead of * and ? 1708 String sqlGlob = DBUtils.makeSQLGlob(glob); 1709 1710 Connection c = HarvestDBConnection.get(); 1711 try { 1712 return DBUtils.selectStringList(c, "SELECT name FROM domains WHERE " + searchField.toLowerCase() 1713 + " LIKE ?", sqlGlob).stream().filter(DomainUtils::isValidDomainName).collect(Collectors.toList()); 1714 } finally { 1715 HarvestDBConnection.release(c); 1716 } 1717 } 1718 1719 @Override 1720 public void renameAndUpdateConfig(Domain domain, DomainConfiguration domainConf, 1721 String configOldName) { 1722 Connection connection = HarvestDBConnection.get(); 1723 Long configId = DBUtils.selectLongValue(connection, 1724 "SELECT config_id FROM configurations WHERE domain_id = ? and name = ?", domain.getID(), configOldName); 1725 1726 try { 1727 PreparedStatement s = connection.prepareStatement("UPDATE configurations SET name = ?, comments = ?, " 1728 + "template_id = ( SELECT template_id FROM ordertemplates " + "WHERE name = ? ), " + "maxobjects = ?, " 1729 + "maxrate = ?, " + "maxbytes = ? " + "WHERE config_id = ? AND domain_id = ?"); 1730 s.setString(1, domainConf.getName()); 1731 DBUtils.setComments(s, 2, domainConf, Constants.MAX_COMMENT_SIZE); 1732 s.setString(3, domainConf.getOrderXmlName()); 1733 s.setLong(4, domainConf.getMaxObjects()); 1734 s.setInt(5, domainConf.getMaxRequestRate()); 1735 s.setLong(6, domainConf.getMaxBytes()); 1736 s.setLong(7, configId); 1737 s.setLong(8, domain.getID()); 1738 s.executeUpdate(); 1739 s.clearParameters(); 1740 updateConfigPasswordsEntries(connection, domain, domainConf); 1741 updateConfigSeedlistsEntries(connection, domain, domainConf); 1742 s.close(); 1743 } catch (SQLException e) { 1744 throw new IOFailure("Error while renaming configuration '" + configOldName + "' to: " + domainConf.getName(), e); 1745 } finally { 1746 HarvestDBConnection.release(connection); 1747 } 1748 } 1749 1750 @Override 1751 public List<String> getAllDomainNames() { 1752 Connection c = HarvestDBConnection.get(); 1753 try { 1754 return DBUtils.selectStringList(c, "SELECT name FROM domains").stream().filter(DomainUtils::isValidDomainName).collect( 1755 Collectors.toList()); 1756 } finally { 1757 HarvestDBConnection.release(c); 1758 } 1759 } 1760}