001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.ResultSet; 029import java.sql.SQLException; 030import java.sql.Timestamp; 031import java.sql.Types; 032import java.text.Collator; 033import java.util.ArrayList; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.Date; 037import java.util.HashMap; 038import java.util.HashSet; 039import java.util.Iterator; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.Set; 044import java.util.StringTokenizer; 045 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049import dk.netarkivet.common.exceptions.ArgumentNotValid; 050import dk.netarkivet.common.exceptions.IOFailure; 051import dk.netarkivet.common.exceptions.IllegalState; 052import dk.netarkivet.common.exceptions.NotImplementedException; 053import dk.netarkivet.common.exceptions.PermissionDenied; 054import dk.netarkivet.common.exceptions.UnknownID; 055import dk.netarkivet.common.utils.DBUtils; 056import dk.netarkivet.common.utils.ExceptionUtils; 057import dk.netarkivet.common.utils.FilterIterator; 058import dk.netarkivet.common.utils.StringUtils; 059import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory; 060import dk.netarkivet.harvester.datamodel.eav.EAV; 061import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType; 062import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedField; 063import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO; 064import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDefaultValue; 065import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes; 066import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue; 067import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDAO; 068import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDBDAO; 069import dk.netarkivet.harvester.webinterface.HarvestStatusQuery; 070 071/** 072 * A database-oriented implementation of the HarvestDefinitionDAO. 073 * <p> 074 * The statements to create the tables are located in: 075 * <ul> 076 * <li><em>Derby:</em> scripts/sql/createfullhddb.sql</li> 077 * <li><em>MySQL:</em> scripts/sql/createfullhddb.mysql</li> 078 * <li><em>PostgreSQL:</em> scripts/postgresql/netarchivesuite_init.sql</li> 079 * </ul> 080 */ 081public class HarvestDefinitionDBDAO extends HarvestDefinitionDAO { 082 083 /** The logger. */ 084 private static final Logger log = LoggerFactory.getLogger(HarvestDefinitionDBDAO.class); 085 086 /** 087 * Comparator used for sorting the UI list of {@link SparseDomainConfiguration}s. Sorts first by domain name 088 * alphabetical order, next by configuration name. 089 */ 090 private static class SparseDomainConfigurationComparator implements Comparator<SparseDomainConfiguration> { 091 092 @Override 093 public int compare(SparseDomainConfiguration sdc1, SparseDomainConfiguration sdc2) { 094 int domComp = sdc1.getDomainName().compareTo(sdc2.getDomainName()); 095 if (0 == domComp) { 096 return sdc1.getConfigurationName().compareTo(sdc2.getConfigurationName()); 097 } 098 return domComp; 099 } 100 } 101 102 /** Create a new HarvestDefinitionDAO using database. */ 103 HarvestDefinitionDBDAO() { 104 Connection connection = HarvestDBConnection.get(); 105 try { 106 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.FULLHARVESTS); 107 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTDEFINITIONS); 108 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.PARTIALHARVESTS); 109 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTCONFIGS); 110 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDTYPE); 111 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELD); 112 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDVALUE); 113 } finally { 114 HarvestDBConnection.release(connection); 115 } 116 } 117 118 /** 119 * Create a harvest definition in Database. The harvest definition object should not have its ID set unless we are 120 * in the middle of migrating. 121 * 122 * @param harvestDefinition A new harvest definition to store in the database. 123 * @return The harvestId for the just created harvest definition. 124 * @see HarvestDefinitionDAO#create(HarvestDefinition) 125 */ 126 @Override 127 public synchronized Long create(HarvestDefinition harvestDefinition) { 128 Long id = harvestDefinition.getOid(); 129 try (Connection connection = HarvestDBConnection.get();) { 130 Date submissiondate = new Date(); 131 final int edition = 1; 132 try { 133 if (id == null) { 134 id = generateNextID(connection); 135 } 136 137 connection.setAutoCommit(false); 138 try (PreparedStatement s = connection.prepareStatement("INSERT INTO harvestdefinitions " 139 + "( harvest_id, name, comments, numevents, submitted, isactive, edition, audience ) " 140 + "VALUES ( ?, ?, ?, ?, ?, ?, ?,? )");) { 141 s.setLong(1, id); 142 DBUtils.setName(s, 2, harvestDefinition, Constants.MAX_NAME_SIZE); 143 DBUtils.setComments(s, 3, harvestDefinition, Constants.MAX_COMMENT_SIZE); 144 s.setLong(4, harvestDefinition.getNumEvents()); 145 // Don't set on object, as we may yet rollback 146 s.setTimestamp(5, new Timestamp(submissiondate.getTime())); 147 s.setBoolean(6, harvestDefinition.getActive()); 148 s.setLong(7, edition); 149 s.setString(8, harvestDefinition.getAudience()); 150 s.executeUpdate(); 151 } 152 if (harvestDefinition instanceof FullHarvest) { 153 FullHarvest fh = (FullHarvest) harvestDefinition; 154 try (PreparedStatement s = connection.prepareStatement("INSERT INTO fullharvests " 155 + "( harvest_id, maxobjects, maxbytes, maxjobrunningtime, previoushd, isindexready)" 156 + "VALUES ( ?, ?, ?, ?, ?, ? )");) { 157 s.setLong(1, id); 158 s.setLong(2, fh.getMaxCountObjects()); 159 s.setLong(3, fh.getMaxBytes()); 160 s.setLong(4, fh.getMaxJobRunningTime()); 161 if (fh.getPreviousHarvestDefinition() != null) { 162 s.setLong(5, fh.getPreviousHarvestDefinition().getOid()); 163 } else { 164 s.setNull(5, Types.BIGINT); 165 } 166 s.setBoolean(6, fh.getIndexReady()); 167 s.executeUpdate(); 168 } 169 } else if (harvestDefinition instanceof PartialHarvest) { 170 PartialHarvest ph = (PartialHarvest) harvestDefinition; 171 // Get schedule id 172 long scheduleId = DBUtils.selectLongValue(connection, 173 "SELECT schedule_id FROM schedules WHERE name = ?", ph.getSchedule().getName()); 174 try (PreparedStatement s = connection 175 .prepareStatement("INSERT INTO partialharvests ( harvest_id, schedule_id, nextdate ) " 176 + "VALUES ( ?, ?, ? )");) { 177 s.setLong(1, id); 178 s.setLong(2, scheduleId); 179 DBUtils.setDateMaybeNull(s, 3, ph.getNextDate()); 180 s.executeUpdate(); 181 createHarvestConfigsEntries(connection, ph, id); 182 } 183 } else { 184 String message = "Harvest definition " + harvestDefinition + " is of unknown class " 185 + harvestDefinition.getClass(); 186 log.warn(message); 187 throw new ArgumentNotValid(message); 188 } 189 connection.commit(); 190 191 // Now that we have committed, set new data on object. 192 harvestDefinition.setSubmissionDate(submissiondate); 193 harvestDefinition.setEdition(edition); 194 harvestDefinition.setOid(id); 195 196 // saving after receiving id 197 saveExtendedFieldValues(connection, harvestDefinition); 198 199 } catch (SQLException e) { 200 String message = "SQL error creating harvest definition " + harvestDefinition + " in database" + "\n" 201 + ExceptionUtils.getSQLExceptionCause(e); 202 log.warn(message, e); 203 throw new IOFailure(message, e); 204 } finally { 205 DBUtils.rollbackIfNeeded(connection, "creating", harvestDefinition); 206 } 207 } catch (SQLException e) { 208 log.error("Unable to close db resources", e); 209 } 210 211 return id; 212 } 213 214 /** 215 * Create the entries in the harvest_configs table that connect PartialHarvests and their configurations. 216 * 217 * @param c harvest definition DB connection 218 * @param ph The harvest to insert entries for. 219 * @param id The id of the harvest -- this may not yet be set on ph 220 * @throws SQLException If a database error occurs during the create process. 221 */ 222 private void createHarvestConfigsEntries(Connection c, PartialHarvest ph, long id) throws SQLException { 223 try (PreparedStatement s = c.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ?");) { 224 s.setLong(1, id); 225 s.executeUpdate(); 226 } 227 try (PreparedStatement s = c.prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) " 228 + "SELECT ?, config_id FROM configurations, domains " 229 + "WHERE domains.name = ? AND configurations.name = ?" 230 + " AND domains.domain_id = configurations.domain_id");) { 231 Iterator<DomainConfiguration> dcs = ph.getDomainConfigurations(); 232 while (dcs.hasNext()) { 233 DomainConfiguration dc = dcs.next(); 234 s.setLong(1, id); 235 s.setString(2, dc.getDomainName()); 236 s.setString(3, dc.getName()); 237 s.executeUpdate(); 238 } 239 } 240 } 241 242 /** 243 * Generates the next id of a harvest definition. this implementation retrieves the maximum value of harvest_id in 244 * the DB, and returns this value + 1. 245 * 246 * @param c An open connection to the harvestDatabase 247 * @return The next available ID 248 */ 249 private synchronized Long generateNextID(Connection c) { 250 Long maxVal = DBUtils.selectLongValue(c, "SELECT max(harvest_id) FROM harvestdefinitions"); 251 if (maxVal == null) { 252 maxVal = 0L; 253 } 254 return maxVal + 1L; 255 } 256 257 /** 258 * Read the stored harvest definition for the given ID. 259 * 260 * @param harvestDefinitionID An ID number for a harvest definition 261 * @return A harvest definition that has been read from persistent storage. 262 * @throws UnknownID if no entry with that ID exists in the database 263 * @throws IOFailure If DB-failure occurs? 264 * @see HarvestDefinitionDAO#read(Long) 265 */ 266 @Override 267 public synchronized HarvestDefinition read(Long harvestDefinitionID) throws UnknownID, IOFailure { 268 Connection c = HarvestDBConnection.get(); 269 try { 270 return read(c, harvestDefinitionID); 271 } finally { 272 HarvestDBConnection.release(c); 273 } 274 } 275 276 /** 277 * Read the stored harvest definition for the given ID. 278 * 279 * @param c The used database connection 280 * @param harvestDefinitionID ID number for a harvest definition 281 * @return A harvest definition that has been read from persistent storage. 282 * @throws UnknownID if no entry with that ID exists in the database 283 * @throws IOFailure If DB-failure occurs? 284 * @see HarvestDefinitionDAO#read(Long) 285 */ 286 private HarvestDefinition read(Connection c, Long harvestDefinitionID) throws UnknownID, IOFailure { 287 if (!exists(c, harvestDefinitionID)) { 288 String message = "Unknown harvest definition " + harvestDefinitionID; 289 log.debug(message); 290 throw new UnknownID(message); 291 } 292 log.debug("Reading harvestdefinition w/ id {}", harvestDefinitionID); 293 PreparedStatement s = null; 294 try { 295 s = c.prepareStatement( 296 "SELECT name, comments, numevents, submitted, " + "previoushd, maxobjects, maxbytes, " 297 + "maxjobrunningtime, isindexready, isactive, edition, audience " 298 + "FROM harvestdefinitions, fullharvests " + "WHERE harvestdefinitions.harvest_id = ?" 299 + " AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id"); 300 s.setLong(1, harvestDefinitionID); 301 ResultSet res = s.executeQuery(); 302 if (res.next()) { 303 // Found full harvest 304 log.debug("fullharvest found w/id " + harvestDefinitionID); 305 final String name = res.getString(1); 306 final String comments = res.getString(2); 307 final int numEvents = res.getInt(3); 308 final Date submissionDate = new Date(res.getTimestamp(4).getTime()); 309 final long maxObjects = res.getLong(6); 310 final long maxBytes = res.getLong(7); 311 final long maxJobRunningtime = res.getLong(8); 312 final boolean isIndexReady = res.getBoolean(9); 313 FullHarvest fh; 314 final long prevhd = res.getLong(5); 315 if (!res.wasNull()) { 316 fh = new FullHarvest(name, comments, prevhd, maxObjects, maxBytes, maxJobRunningtime, isIndexReady, 317 DAOProviderFactory.getHarvestDefinitionDAOProvider(), 318 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 319 DAOProviderFactory.getDomainDAOProvider()); 320 } else { 321 fh = new FullHarvest(name, comments, null, maxObjects, maxBytes, maxJobRunningtime, isIndexReady, 322 DAOProviderFactory.getHarvestDefinitionDAOProvider(), 323 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 324 DAOProviderFactory.getDomainDAOProvider()); 325 } 326 fh.setSubmissionDate(submissionDate); 327 fh.setNumEvents(numEvents); 328 fh.setActive(res.getBoolean(10)); 329 fh.setOid(harvestDefinitionID); 330 fh.setEdition(res.getLong(11)); 331 fh.setAudience(res.getString(12)); 332 333 readExtendedFieldValues(fh); 334 335 // We found a FullHarvest object, just return it. 336 log.debug("Returned FullHarvest object w/ id {}", harvestDefinitionID); 337 return fh; 338 } 339 s.close(); 340 // No full harvest with that ID, try selective harvest 341 s = c.prepareStatement("SELECT harvestdefinitions.name," + " harvestdefinitions.comments," 342 + " harvestdefinitions.numevents," + " harvestdefinitions.submitted," 343 + " harvestdefinitions.isactive," + " harvestdefinitions.edition," 344 + " harvestdefinitions.audience," + " schedules.name," 345 + " partialharvests.nextdate, " + " harvestdefinitions.channel_id " 346 + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id = ?" 347 + " AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id" 348 + " AND schedules.schedule_id " + "= partialharvests.schedule_id"); 349 s.setLong(1, harvestDefinitionID); 350 res = s.executeQuery(); 351 boolean foundPartialHarvest = res.next(); 352 if (foundPartialHarvest) { 353 log.debug("Partialharvest found w/ id " + harvestDefinitionID); 354 // Have to get configs before creating object, so storing data 355 // here. 356 final String name = res.getString(1); 357 final String comments = res.getString(2); 358 final int numEvents = res.getInt(3); 359 final Date submissionDate = new Date(res.getTimestamp(4).getTime()); 360 final boolean active = res.getBoolean(5); 361 final long edition = res.getLong(6); 362 final String audience = res.getString(7); 363 final String scheduleName = res.getString(8); 364 final Date nextDate = DBUtils.getDateMaybeNull(res, 9); 365 final Long channelId = DBUtils.getLongMaybeNull(res, 10); 366 s.close(); 367 // Found partial harvest -- have to find configurations. 368 // To avoid holding on to the readlock while getting domains, 369 // we grab the strings first, then look up domains and configs. 370 final DomainDAO domainDao = DomainDAO.getInstance(); 371 List<SparseDomainConfiguration> configs = new ArrayList<SparseDomainConfiguration>(); 372 s = c.prepareStatement( 373 "SELECT domains.name, configurations.name " + "FROM domains, configurations, harvest_configs " 374 + "WHERE harvest_id = ?" + " AND configurations.config_id " 375 + "= harvest_configs.config_id" + " AND configurations.domain_id = domains.domain_id"); 376 s.setLong(1, harvestDefinitionID); 377 res = s.executeQuery(); 378 while (res.next()) { 379 configs.add(new SparseDomainConfiguration(res.getString(1), res.getString(2))); 380 } 381 s.close(); 382 List<DomainConfiguration> configurations = new ArrayList<DomainConfiguration>(); 383 for (SparseDomainConfiguration domainConfig : configs) { 384 configurations.add(domainDao.getDomainConfiguration(domainConfig.getDomainName(), 385 domainConfig.getConfigurationName())); 386 } 387 388 Schedule schedule = ScheduleDAO.getInstance().read(scheduleName); 389 390 PartialHarvest ph = new PartialHarvest(configurations, schedule, name, comments, audience); 391 392 ph.setNumEvents(numEvents); 393 ph.setSubmissionDate(submissionDate); 394 ph.setActive(active); 395 ph.setEdition(edition); 396 ph.setNextDate(nextDate); 397 ph.setOid(harvestDefinitionID); 398 if (channelId != null) { 399 ph.setChannelId(channelId); 400 } 401 402 readExtendedFieldValues(ph); 403 404 return ph; 405 } else { 406 throw new IllegalState( 407 "No entries in fullharvests or partialharvests found for id " + harvestDefinitionID); 408 } 409 } catch (SQLException e) { 410 throw new IOFailure("SQL Error while reading harvest definition " + harvestDefinitionID + "\n" 411 + ExceptionUtils.getSQLExceptionCause(e), e); 412 } 413 } 414 415 /** 416 * Update an existing harvest definition with new info. 417 * 418 * @param hd An updated harvest definition 419 * @see HarvestDefinitionDAO#update(HarvestDefinition) 420 */ 421 public synchronized void update(HarvestDefinition hd) { 422 ArgumentNotValid.checkNotNull(hd, "HarvestDefinition hd"); 423 if (hd.getOid() == null || !exists(hd.getOid())) { 424 final String message = "Cannot update non-existing " + "harvestdefinition '" + hd.getName() + "'"; 425 log.debug(message); 426 throw new PermissionDenied(message); 427 } 428 HarvestDefinition preHD = null; 429 if (hd instanceof FullHarvest) { 430 preHD = ((FullHarvest) hd).getPreviousHarvestDefinition(); 431 } 432 433 Connection c = HarvestDBConnection.get(); 434 PreparedStatement s = null; 435 try { 436 c.setAutoCommit(false); 437 s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, " 438 + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? " 439 + "WHERE harvest_id = ? AND edition = ?"); 440 DBUtils.setName(s, 1, hd, Constants.MAX_NAME_SIZE); 441 DBUtils.setComments(s, 2, hd, Constants.MAX_COMMENT_SIZE); 442 s.setInt(3, hd.getNumEvents()); 443 s.setTimestamp(4, new Timestamp(hd.getSubmissionDate().getTime())); 444 s.setBoolean(5, hd.getActive()); 445 long nextEdition = hd.getEdition() + 1; 446 s.setLong(6, nextEdition); 447 s.setString(7, hd.getAudience()); 448 s.setLong(8, hd.getOid()); 449 s.setLong(9, hd.getEdition()); 450 451 int rows = s.executeUpdate(); 452 // Since the HD exists, no rows indicates bad edition 453 if (rows == 0) { 454 String message = "Somebody else must have updated " + hd + " since edition " + hd.getEdition() 455 + ", not updating"; 456 log.debug(message); 457 throw new PermissionDenied(message); 458 } 459 s.close(); 460 if (hd instanceof FullHarvest) { 461 FullHarvest fh = (FullHarvest) hd; 462 s = c.prepareStatement("UPDATE fullharvests SET previoushd = ?, " + "maxobjects = ?, " 463 + "maxbytes = ?, " + "maxjobrunningtime = ?, isindexready = ? " + "WHERE harvest_id = ?"); 464 if (preHD != null) { 465 s.setLong(1, preHD.getOid()); 466 } else { 467 s.setNull(1, Types.BIGINT); 468 } 469 s.setLong(2, fh.getMaxCountObjects()); 470 s.setLong(3, fh.getMaxBytes()); 471 s.setLong(4, fh.getMaxJobRunningTime()); 472 s.setBoolean(5, fh.getIndexReady()); 473 s.setLong(6, fh.getOid()); 474 475 rows = s.executeUpdate(); 476 log.debug("{} fullharvests records updated", rows); 477 } else if (hd instanceof PartialHarvest) { 478 PartialHarvest ph = (PartialHarvest) hd; 479 s = c.prepareStatement("UPDATE partialharvests SET " + "schedule_id = " 480 + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? " 481 + "WHERE harvest_id = ?"); 482 s.setString(1, ph.getSchedule().getName()); 483 DBUtils.setDateMaybeNull(s, 2, ph.getNextDate()); 484 s.setLong(3, ph.getOid()); 485 rows = s.executeUpdate(); 486 log.debug("{} partialharvests records updated", rows); 487 s.close(); 488 // FIXME The updates to harvest_configs table should be done 489 // in method removeDomainConfiguration(), and not here. 490 // The following deletes ALL harvest_configs entries for 491 // this PartialHarvest, and creates the entries for the 492 // PartialHarvest again!! 493 createHarvestConfigsEntries(c, ph, ph.getOid()); 494 } else { 495 String message = "Harvest definition " + hd + " has unknown class " + hd.getClass(); 496 log.warn(message); 497 throw new ArgumentNotValid(message); 498 } 499 saveExtendedFieldValues(c, hd); 500 501 c.commit(); 502 hd.setEdition(nextEdition); 503 } catch (SQLException e) { 504 throw new IOFailure( 505 "SQL error while updating harvest definition " + hd + "\n" + ExceptionUtils.getSQLExceptionCause(e), 506 e); 507 } finally { 508 DBUtils.closeStatementIfOpen(s); 509 DBUtils.rollbackIfNeeded(c, "updating", hd); 510 HarvestDBConnection.release(c); 511 } 512 } 513 514 /** 515 * Activates or deactivates a partial harvest definition. This method is actually to be used not to have to read 516 * from the DB big harvest definitions and optimize the activation / deactivation, it is sort of a lightweight 517 * version of update. 518 * 519 * @param harvestDefinition the harvest definition object. 520 */ 521 @Override 522 public synchronized void flipActive(SparsePartialHarvest harvestDefinition) { 523 ArgumentNotValid.checkNotNull(harvestDefinition, "HarvestDefinition harvestDefinition"); 524 525 Connection c = HarvestDBConnection.get(); 526 PreparedStatement s = null; 527 try { 528 if (harvestDefinition.getOid() == null || !exists(c, harvestDefinition.getOid())) { 529 final String message = "Cannot update non-existing " + "harvestdefinition '" 530 + harvestDefinition.getName() + "'"; 531 log.debug(message); 532 throw new PermissionDenied(message); 533 } 534 535 c.setAutoCommit(false); 536 s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, " 537 + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? " 538 + "WHERE harvest_id = ? AND edition = ?"); 539 DBUtils.setName(s, 1, harvestDefinition, Constants.MAX_NAME_SIZE); 540 DBUtils.setComments(s, 2, harvestDefinition, Constants.MAX_COMMENT_SIZE); 541 s.setInt(3, harvestDefinition.getNumEvents()); 542 s.setTimestamp(4, new Timestamp(harvestDefinition.getSubmissionDate().getTime())); 543 s.setBoolean(5, !harvestDefinition.isActive()); 544 long nextEdition = harvestDefinition.getEdition() + 1; 545 s.setLong(6, nextEdition); 546 s.setString(7, harvestDefinition.getAudience()); 547 s.setLong(8, harvestDefinition.getOid()); 548 s.setLong(9, harvestDefinition.getEdition()); 549 int rows = s.executeUpdate(); 550 // Since the HD exists, no rows indicates bad edition 551 if (rows == 0) { 552 String message = "Somebody else must have updated " + harvestDefinition + " since edition " 553 + harvestDefinition.getEdition() + ", not updating"; 554 log.debug(message); 555 throw new PermissionDenied(message); 556 } 557 s.close(); 558 559 // Now pull more strings 560 s = c.prepareStatement("UPDATE partialharvests SET schedule_id = " 561 + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? " 562 + "WHERE harvest_id = ?"); 563 s.setString(1, harvestDefinition.getScheduleName()); 564 DBUtils.setDateMaybeNull(s, 2, harvestDefinition.getNextDate()); 565 s.setLong(3, harvestDefinition.getOid()); 566 rows = s.executeUpdate(); 567 log.debug("{} partialharvests records updated", rows); 568 s.close(); 569 c.commit(); 570 } catch (SQLException e) { 571 throw new IOFailure("SQL error while updating harvest definition " + harvestDefinition + "\n" 572 + ExceptionUtils.getSQLExceptionCause(e), e); 573 } finally { 574 DBUtils.rollbackIfNeeded(c, "updating", harvestDefinition); 575 HarvestDBConnection.release(c); 576 } 577 } 578 579 @Override 580 public synchronized boolean exists(String name) { 581 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 582 Connection c = HarvestDBConnection.get(); 583 try { 584 return 1 == DBUtils.selectIntValue(c, 585 "SELECT COUNT(harvest_id) " + "FROM harvestdefinitions WHERE name = ?", name); 586 } finally { 587 HarvestDBConnection.release(c); 588 } 589 } 590 591 @Override 592 public synchronized boolean exists(Long oid) { 593 ArgumentNotValid.checkNotNull(oid, "Long oid"); 594 Connection c = HarvestDBConnection.get(); 595 try { 596 return exists(c, oid); 597 } finally { 598 HarvestDBConnection.release(c); 599 } 600 } 601 602 /** 603 * Check if a harvestdefinition exists with the given id. 604 * 605 * @param c An open connection to the harvestDatabase 606 * @param oid A potential identifier for a harvestdefinition 607 * @return true If a harvestdefinition exists with the given id. 608 * @see HarvestDefinitionDAO#exists(Long) 609 */ 610 private boolean exists(Connection c, Long oid) { 611 return 1 == DBUtils.selectIntValue(c, 612 "SELECT COUNT(harvest_id) " + "FROM harvestdefinitions WHERE harvest_id = ?", oid); 613 } 614 615 /** 616 * Get a list of all existing harvest definitions ordered by name. 617 * 618 * @return An iterator that give the existing harvest definitions in turn 619 */ 620 @Override 621 public synchronized Iterator<HarvestDefinition> getAllHarvestDefinitions() { 622 Connection c = HarvestDBConnection.get(); 623 try { 624 List<Long> hds = DBUtils.selectLongList(c, "SELECT harvest_id FROM harvestdefinitions ORDER BY name"); 625 log.debug("Getting an iterator for all stored harvestdefinitions."); 626 627 List<HarvestDefinition> orderedList = new LinkedList<HarvestDefinition>(); 628 for (Long id : hds) { 629 orderedList.add(read(c, id)); 630 } 631 return orderedList.iterator(); 632 } finally { 633 HarvestDBConnection.release(c); 634 } 635 } 636 637 /** 638 * Gets default configurations for all domains that are not aliases. 639 * <p> 640 * This method currently gives an iterator that reads in all domains, although only on demand, that is: when calling 641 * "hasNext". 642 * 643 * @return Iterator containing the default DomainConfiguration for all domains that are not aliases 644 */ 645 @Override 646 public synchronized Iterator<DomainConfiguration> getSnapShotConfigurations() { 647 return new FilterIterator<Domain, DomainConfiguration>( 648 DomainDAO.getInstance().getAllDomainsInSnapshotHarvestOrder()) { 649 public DomainConfiguration filter(Domain domain) { 650 if (domain.getAliasInfo() == null || domain.getAliasInfo().isExpired()) { 651 return domain.getDefaultConfiguration(); 652 } else { 653 return null; 654 } 655 } 656 }; 657 } 658 659 /** 660 * Returns a list of IDs of harvest definitions that are ready to be scheduled. 661 * 662 * @param now The current date 663 * @return List of ready harvest definitions. No check is performed for whether these are already in the middle of 664 * being scheduled. 665 */ 666 @Override 667 public Iterable<Long> getReadyHarvestDefinitions(Date now) { 668 ArgumentNotValid.checkNotNull(now, "Date now"); 669 Connection connection = HarvestDBConnection.get(); 670 try { 671 List<Long> ids = DBUtils.selectLongList(connection, 672 "SELECT fullharvests.harvest_id" + " FROM fullharvests, harvestdefinitions" 673 + " WHERE harvestdefinitions.harvest_id = fullharvests.harvest_id" + " AND isactive = ? " 674 + " AND numevents < 1 AND isindexready = ?", 675 true, true); 676 ids.addAll(DBUtils.selectLongList(connection, 677 "SELECT partialharvests.harvest_id" + " FROM partialharvests, harvestdefinitions" 678 + " WHERE harvestdefinitions.harvest_id = partialharvests.harvest_id" 679 + " AND isactive = ? AND nextdate IS NOT NULL AND nextdate < ?", 680 true, now)); 681 Set<Long> distinctIds = new HashSet<>(); 682 distinctIds.addAll(ids); 683 if (distinctIds.size() != ids.size()) { 684 log.warn("Query returned multiple identical ids {}. These have been sanitized.", ids); 685 return distinctIds; 686 } else { 687 return ids; 688 } 689 } finally { 690 HarvestDBConnection.release(connection); 691 } 692 } 693 694 /** 695 * Get the harvest definition that has the given name, if any. 696 * 697 * @param name The name of a harvest definition. 698 * @return The HarvestDefinition object with that name, or null if none has that name. 699 */ 700 @Override 701 public synchronized HarvestDefinition getHarvestDefinition(String name) { 702 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 703 log.debug("Reading harvestdefinition w/ name '{}'", name); 704 Connection c = HarvestDBConnection.get(); 705 PreparedStatement s = null; 706 try { 707 s = c.prepareStatement("SELECT harvest_id FROM harvestdefinitions WHERE name = ?"); 708 s.setString(1, name); 709 ResultSet res = s.executeQuery(); 710 if (res.next()) { 711 long harvestDefinitionID = res.getLong(1); 712 s.close(); 713 return read(c, harvestDefinitionID); 714 } 715 return null; 716 } catch (SQLException e) { 717 throw new IOFailure("SQL error while getting HD by name" + "\n" + ExceptionUtils.getSQLExceptionCause(e), 718 e); 719 } finally { 720 DBUtils.closeStatementIfOpen(s); 721 HarvestDBConnection.release(c); 722 } 723 } 724 725 @Override 726 public List<HarvestRunInfo> getHarvestRunInfo(long harvestID) { 727 Connection c = HarvestDBConnection.get(); 728 PreparedStatement s = null; 729 try { 730 ResultSet res = null; 731 Map<Integer, HarvestRunInfo> runInfos = new HashMap<Integer, HarvestRunInfo>(); 732 List<HarvestRunInfo> infoList = new ArrayList<HarvestRunInfo>(); 733 734 // Select dates and counts for all different statues 735 // for each run 736 s = c.prepareStatement("SELECT name, harvest_num, status, MIN(startdate), MAX(enddate), COUNT(job_id)" 737 + " FROM jobs, harvestdefinitions" 738 + " WHERE harvestdefinitions.harvest_id = ? AND jobs.harvest_id = harvestdefinitions.harvest_id" 739 + " GROUP BY name, harvest_num, status ORDER BY harvest_num DESC"); 740 s.setLong(1, harvestID); 741 res = s.executeQuery(); 742 while (res.next()) { 743 int runNr = res.getInt(2); 744 HarvestRunInfo info = runInfos.get(runNr); 745 if (info == null) { 746 String name = res.getString(1); 747 info = new HarvestRunInfo(harvestID, name, runNr); 748 // Put into hash for easy access when updating. 749 runInfos.put(runNr, info); 750 // Add to return list in order given by DB 751 infoList.add(info); 752 } 753 JobStatus status = JobStatus.fromOrdinal(res.getInt(3)); 754 // For started states, check start date 755 if (status != JobStatus.NEW && status != JobStatus.SUBMITTED && status != JobStatus.RESUBMITTED) { 756 Date startDate = DBUtils.getDateMaybeNull(res, 4); 757 if (info.getStartDate() == null || (startDate != null && startDate.before(info.getStartDate()))) { 758 info.setStartDate(startDate); 759 } 760 } 761 // For finished jobs, check end date 762 if (status == JobStatus.DONE || status == JobStatus.FAILED) { 763 Date endDate = DBUtils.getDateMaybeNull(res, 5); 764 if (info.getEndDate() == null || (endDate != null && endDate.after(info.getEndDate()))) { 765 info.setEndDate(endDate); 766 } 767 } 768 int count = res.getInt(6); 769 info.setStatusCount(status, count); 770 } 771 s.close(); 772 s = c.prepareStatement("SELECT jobs.harvest_num, SUM(historyinfo.bytecount), " 773 + "SUM(historyinfo.objectcount)," + "COUNT(jobs.status)" + " FROM jobs, historyinfo " 774 + " WHERE jobs.harvest_id = ? AND historyinfo.job_id = jobs.job_id" + " GROUP BY jobs.harvest_num" 775 + " ORDER BY jobs.harvest_num"); 776 s.setLong(1, harvestID); 777 res = s.executeQuery(); 778 779 while (res.next()) { 780 final int harvestNum = res.getInt(1); 781 HarvestRunInfo info = runInfos.get(harvestNum); 782 if (info != null) { 783 info.setBytesHarvested(res.getLong(2)); 784 info.setDocsHarvested(res.getLong(3)); 785 } else { 786 log.debug("Harvestnum {} for harvestID {} is skipped. Must have arrived between selects", 787 harvestNum, harvestID); 788 } 789 } 790 791 // Make sure that jobs that aren't really done don't have end date. 792 for (HarvestRunInfo info : infoList) { 793 if (info.getJobCount(JobStatus.STARTED) != 0 || info.getJobCount(JobStatus.NEW) != 0 794 || info.getJobCount(JobStatus.SUBMITTED) != 0) { 795 info.setEndDate(null); 796 } 797 } 798 return infoList; 799 } catch (SQLException e) { 800 String message = "SQL error asking for harvest run info on " + harvestID + " in database" + "\n" 801 + ExceptionUtils.getSQLExceptionCause(e); 802 log.warn(message, e); 803 throw new IOFailure(message, e); 804 } finally { 805 DBUtils.closeStatementIfOpen(s); 806 HarvestDBConnection.release(c); 807 } 808 } 809 810 /** 811 * Get all domain,configuration pairs for a harvest definition in sparse version for GUI purposes. 812 * 813 * @param harvestDefinitionID The ID of the harvest definition. 814 * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions. 815 * @throws ArgumentNotValid on null argument. 816 */ 817 @Override 818 public List<SparseDomainConfiguration> getSparseDomainConfigurations(Long harvestDefinitionID) { 819 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 820 Connection c = HarvestDBConnection.get(); 821 try { 822 return getSparseDomainConfigurations(c, harvestDefinitionID); 823 } finally { 824 HarvestDBConnection.release(c); 825 } 826 } 827 828 /** 829 * Get all domain,configuration pairs for a harvest definition in sparse version. 830 * 831 * @param c a connection to the harvest database 832 * @param harvestDefinitionID The ID of the harvest definition. 833 * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions. 834 */ 835 private List<SparseDomainConfiguration> getSparseDomainConfigurations(Connection c, Long harvestDefinitionID) { 836 try (PreparedStatement s = c.prepareStatement( 837 "SELECT domains.name, configurations.name " + "FROM domains, configurations," + " harvest_configs " 838 + "WHERE harvest_id = ? AND configurations.config_id = harvest_configs.config_id" 839 + " AND configurations.domain_id = domains.domain_id");) { 840 s.setLong(1, harvestDefinitionID); 841 ResultSet res = s.executeQuery(); 842 List<SparseDomainConfiguration> resultList = new ArrayList<SparseDomainConfiguration>(); 843 while (res.next()) { 844 SparseDomainConfiguration sdc = new SparseDomainConfiguration(res.getString(1), res.getString(2)); 845 resultList.add(sdc); 846 } 847 848 Collections.sort(resultList, new SparseDomainConfigurationComparator()); 849 return resultList; 850 } catch (SQLException e) { 851 throw new IOFailure("SQL error getting sparse domains" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e); 852 } 853 } 854 855 /** 856 * Get all sparse versions of partial harvests for GUI purposes ordered by name. 857 * 858 * @return An iterable (possibly empty) of SparsePartialHarvests 859 */ 860 public Iterable<SparsePartialHarvest> getSparsePartialHarvestDefinitions(boolean excludeInactive) { 861 String query = "SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.name," 862 + " harvestdefinitions.comments," + " harvestdefinitions.numevents," 863 + " harvestdefinitions.submitted," + " harvestdefinitions.isactive," 864 + " harvestdefinitions.edition," + " schedules.name," + " partialharvests.nextdate, " 865 + " harvestdefinitions.audience, " + " harvestdefinitions.channel_id " 866 + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id " 867 + " = partialharvests.harvest_id" + " AND (harvestdefinitions.isactive " + " = ?" 868 // This linie is duplicated to allow to select both active 869 // and inactive HD's. 870 + " OR harvestdefinitions" + ".isactive " + " = ?)" + " AND schedules.schedule_id " 871 + " = partialharvests.schedule_id " + "ORDER BY harvestdefinitions.name"; 872 try (Connection c = HarvestDBConnection.get(); 873 PreparedStatement s = DBUtils.prepareStatement(c, query, true, excludeInactive);) { 874 ResultSet res = s.executeQuery(); 875 List<SparsePartialHarvest> harvests = new ArrayList<SparsePartialHarvest>(); 876 while (res.next()) { 877 SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), res.getString(2), res.getString(3), 878 res.getInt(4), new Date(res.getTimestamp(5).getTime()), res.getBoolean(6), res.getLong(7), 879 res.getString(8), DBUtils.getDateMaybeNull(res, 9), res.getString(10), 880 DBUtils.getLongMaybeNull(res, 11)); 881 harvests.add(sph); 882 } 883 return harvests; 884 } catch (SQLException e) { 885 throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e); 886 } 887 } 888 889 /** 890 * Get a sparse version of a partial harvest for GUI purposes. 891 * 892 * @param harvestName Name of harvest definition. 893 * @return Sparse version of partial harvest or null for none. 894 * @throws ArgumentNotValid on null or empty name. 895 */ 896 @Override 897 public SparsePartialHarvest getSparsePartialHarvest(String harvestName) { 898 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 899 try (Connection c = HarvestDBConnection.get(); 900 PreparedStatement s = c.prepareStatement("SELECT harvestdefinitions.harvest_id," 901 + " harvestdefinitions.comments," + " harvestdefinitions.numevents," 902 + " harvestdefinitions.submitted," + " harvestdefinitions.isactive," 903 + " harvestdefinitions.edition," + " schedules.name," 904 + " partialharvests.nextdate, " + " harvestdefinitions.audience, " 905 + " harvestdefinitions.channel_id " 906 + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.name = ?" 907 + " AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id" 908 + " AND schedules.schedule_id " + "= partialharvests.schedule_id");) { 909 s.setString(1, harvestName); 910 ResultSet res = s.executeQuery(); 911 if (res.next()) { 912 SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), harvestName, res.getString(2), 913 res.getInt(3), new Date(res.getTimestamp(4).getTime()), res.getBoolean(5), res.getLong(6), 914 res.getString(7), DBUtils.getDateMaybeNull(res, 8), res.getString(9), 915 DBUtils.getLongMaybeNull(res, 10)); 916 sph.setExtendedFieldValues(getExtendedFieldValues(sph.getOid())); 917 return sph; 918 } else { 919 return null; 920 } 921 } catch (SQLException e) { 922 throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e); 923 } 924 } 925 926 /** 927 * Get all sparse versions of full harvests for GUI purposes. 928 * 929 * @return An iterable (possibly empty) of SparseFullHarvests 930 */ 931 public Iterable<SparseFullHarvest> getAllSparseFullHarvestDefinitions() { 932 try (Connection c = HarvestDBConnection.get(); 933 PreparedStatement s = c.prepareStatement("SELECT harvestdefinitions.harvest_id," 934 + " harvestdefinitions.name," + " harvestdefinitions.comments," 935 + " harvestdefinitions.numevents," + " harvestdefinitions.isactive," 936 + " harvestdefinitions.edition," + " fullharvests.maxobjects," 937 + " fullharvests.maxbytes," + " fullharvests.maxjobrunningtime," 938 + " fullharvests.previoushd, " + " harvestdefinitions.channel_id " 939 + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.harvest_id " 940 + " = fullharvests.harvest_id" + " ORDER BY harvestdefinitions.name");) { 941 ResultSet res = s.executeQuery(); 942 List<SparseFullHarvest> harvests = new ArrayList<SparseFullHarvest>(); 943 while (res.next()) { 944 SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), res.getString(2), res.getString(3), 945 res.getInt(4), res.getBoolean(5), res.getLong(6), res.getLong(7), res.getLong(8), 946 res.getLong(9), DBUtils.getLongMaybeNull(res, 10), DBUtils.getLongMaybeNull(res, 11)); 947 // EAV 948 long oid = sfh.getOid(); 949 List<AttributeAndType> attributesAndTypes = EAV.getInstance() 950 .getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int) oid); 951 sfh.setAttributesAndTypes(attributesAndTypes); 952 harvests.add(sfh); 953 } 954 return harvests; 955 } catch (SQLException e) { 956 throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e); 957 } 958 } 959 960 /** 961 * Get the name of a harvest given its ID. 962 * 963 * @param harvestDefinitionID The ID of a harvest 964 * @return The name of the given harvest. 965 * @throws ArgumentNotValid on null argument 966 * @throws UnknownID if no harvest has the given ID. 967 * @throws IOFailure on any other error talking to the database 968 */ 969 @Override 970 public String getHarvestName(Long harvestDefinitionID) { 971 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 972 try (Connection c = HarvestDBConnection.get(); 973 PreparedStatement s = c.prepareStatement("SELECT name FROM harvestdefinitions WHERE harvest_id = ?");) { 974 s.setLong(1, harvestDefinitionID); 975 ResultSet res = s.executeQuery(); 976 String name = null; 977 while (res.next()) { 978 if (name != null) { 979 throw new IOFailure("Found more than one name for harvest definition " + harvestDefinitionID + ": '" 980 + name + "' and '" + res.getString(1) + "'"); 981 } 982 name = res.getString(1); 983 } 984 if (name == null) { 985 throw new UnknownID("No name found for harvest definition " + harvestDefinitionID); 986 } 987 return name; 988 } catch (SQLException e) { 989 throw new IOFailure("An error occurred finding the name for " + "harvest definition " + harvestDefinitionID 990 + "\n" + ExceptionUtils.getSQLExceptionCause(e), e); 991 } 992 } 993 994 /** 995 * Get whether a given harvest is a snapshot or selective harvest. 996 * 997 * @param harvestDefinitionID ID of a harvest 998 * @return True if the given harvest is a snapshot harvest, false otherwise. 999 * @throws ArgumentNotValid on null argument 1000 * @throws UnknownID if no harvest has the given ID. 1001 */ 1002 @Override 1003 public boolean isSnapshot(Long harvestDefinitionID) { 1004 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 1005 try (Connection connection = HarvestDBConnection.get();) { 1006 boolean isSnapshot = DBUtils.selectAny(connection, 1007 "SELECT harvest_id FROM fullharvests WHERE harvest_id = ?", harvestDefinitionID); 1008 if (isSnapshot) { 1009 return true; 1010 } 1011 boolean isSelective = DBUtils.selectAny(connection, 1012 "SELECT harvest_id FROM partialharvests " + "WHERE harvest_id = ?", harvestDefinitionID); 1013 if (isSelective) { 1014 return false; 1015 } 1016 throw new UnknownID("Failed to find harvest definition with id " + harvestDefinitionID); 1017 } catch (SQLException e) { 1018 throw new RuntimeException("Unable to close DB connection"); 1019 } 1020 } 1021 1022 /** 1023 * Get a sparse version of a full harvest for GUI purposes. 1024 * 1025 * @param harvestName Name of harvest definition. 1026 * @return Sparse version of full harvest or null for none. 1027 * @throws ArgumentNotValid on null or empty name. 1028 * @throws UnknownID if no harvest has the given ID. 1029 * @throws IOFailure on any other error talking to the database 1030 */ 1031 @Override 1032 public SparseFullHarvest getSparseFullHarvest(String harvestName) { 1033 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1034 try (Connection c = HarvestDBConnection.get(); 1035 PreparedStatement s = c.prepareStatement( 1036 "SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.comments," 1037 + " harvestdefinitions.numevents," + " harvestdefinitions.isactive," 1038 + " harvestdefinitions.edition," + " fullharvests.maxobjects," 1039 + " fullharvests.maxbytes," + " fullharvests.maxjobrunningtime," 1040 + " fullharvests.previoushd, " + " harvestdefinitions.channel_id " 1041 + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.name = ?" 1042 + " AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id");) { 1043 s.setString(1, harvestName); 1044 ResultSet res = s.executeQuery(); 1045 if (res.next()) { 1046 SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), harvestName, res.getString(2), 1047 res.getInt(3), res.getBoolean(4), res.getLong(5), res.getLong(6), res.getLong(7), 1048 res.getLong(8), DBUtils.getLongMaybeNull(res, 9), DBUtils.getLongMaybeNull(res, 10)); 1049 // EAV 1050 long oid = sfh.getOid(); 1051 List<AttributeAndType> attributesAndTypes = EAV.getInstance() 1052 .getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int) oid); 1053 sfh.setAttributesAndTypes(attributesAndTypes); 1054 sfh.setExtendedFieldValues(getExtendedFieldValues(sfh.getOid())); 1055 return sfh; 1056 } else { 1057 return null; 1058 } 1059 } catch (SQLException e) { 1060 throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e); 1061 } 1062 } 1063 1064 /** 1065 * Get a sorted list of all domain names of a HarvestDefinition. 1066 * 1067 * @param harvestName of HarvestDefinition 1068 * @return List of all domains of the HarvestDefinition. 1069 */ 1070 @Override 1071 public List<String> getListOfDomainsOfHarvestDefinition(String harvestName) { 1072 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1073 try (Connection c = HarvestDBConnection.get(); 1074 PreparedStatement s = c.prepareStatement( 1075 // Note: the DISTINCT below is put in deliberately to fix 1076 // bug 1878: Seeds for domain is shown twice on page 1077 // History/Harveststatus-seeds.jsp 1078 "SELECT DISTINCT domains.name" + " FROM domains," + " configurations," 1079 + " harvest_configs," + " harvestdefinitions" 1080 + " WHERE configurations.domain_id = domains.domain_id" 1081 + " AND harvest_configs.config_id = " + "configurations.config_id" 1082 + " AND harvest_configs.harvest_id = " + "harvestdefinitions.harvest_id" 1083 + " AND harvestdefinitions.name = ?" + " ORDER BY domains.name");) { 1084 s.setString(1, harvestName); 1085 ResultSet res = s.executeQuery(); 1086 List<String> domains = new ArrayList<String>(); 1087 1088 while (res.next()) { 1089 domains.add(res.getString(1)); 1090 } 1091 return domains; 1092 } catch (SQLException e) { 1093 throw new IOFailure("SQL error getting seeds of a domain of a harvest definition" + "\n" 1094 + ExceptionUtils.getSQLExceptionCause(e), e); 1095 } 1096 } 1097 1098 /** 1099 * Get a list of all seeds of a Domain in a HarvestDefinition. 1100 * 1101 * @param harvestName of HarvestDefinition 1102 * @param domainName of Domain 1103 * @param needSort boolean if sort results 1104 * @return List of all seeds of the Domain in the HarvestDefinition. 1105 */ 1106 @Override 1107 public List<String> getListOfSeedsOfDomainOfHarvestDefinition(String harvestName, String domainName, 1108 boolean needSort) { 1109 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1110 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1111 try (Connection c = HarvestDBConnection.get(); 1112 PreparedStatement s = c.prepareStatement("SELECT seedlists.seeds" + " FROM configurations," 1113 + " harvest_configs," + " harvestdefinitions," + " seedlists," 1114 + " config_seedlists," + " domains" + " WHERE config_seedlists.seedlist_id " 1115 + "= seedlists.seedlist_id" + " AND configurations.config_id " + "= config_seedlists.config_id" 1116 + " AND configurations.config_id " + "= harvest_configs.config_id" 1117 + " AND harvest_configs.harvest_id " + "= harvestdefinitions.harvest_id" 1118 + " AND configurations.domain_id = domains.domain_id" + " AND domains.name = ?" 1119 + " AND harvestdefinitions.name = ?");) { 1120 s.setString(1, domainName); 1121 s.setString(2, harvestName); 1122 ResultSet res = s.executeQuery(); 1123 List<String> seeds = new ArrayList<String>(); 1124 1125 while (res.next()) { 1126 String seedsOfDomain = res.getString(1); 1127 1128 StringTokenizer st = new StringTokenizer(seedsOfDomain, "\n"); 1129 1130 while (st.hasMoreTokens()) { 1131 String seed = st.nextToken(); 1132 1133 boolean isDuplicate = false; 1134 for (String entry : seeds) { 1135 if (entry.equals(seed)) { 1136 isDuplicate = true; 1137 break; 1138 } 1139 } 1140 if (!isDuplicate) { // duplicates will not be added 1141 seeds.add(seed); 1142 } 1143 } 1144 } 1145 1146 if (needSort) 1147 Collections.sort(seeds, Collator.getInstance()); 1148 1149 return seeds; 1150 } catch (SQLException e) { 1151 throw new IOFailure("SQL error getting seeds of a domain\n" + ExceptionUtils.getSQLExceptionCause(e), e); 1152 } 1153 } 1154 1155 @Override 1156 public Set<Long> getJobIdsForSnapshotDeduplicationIndex(Long harvestId) { 1157 ArgumentNotValid.checkNotNull(harvestId, "Long harvestId"); 1158 Set<Long> jobIds = new HashSet<Long>(); 1159 if (!isSnapshot(harvestId)) { 1160 throw new NotImplementedException("This functionality only works for snapshot harvests"); 1161 } 1162 List<Long> harvestDefinitions = getPreviousFullHarvests(harvestId); 1163 try (Connection c = HarvestDBConnection.get();) { 1164 List<Long> jobs = new ArrayList<Long>(); 1165 if (!harvestDefinitions.isEmpty()) { 1166 // Select all jobs from a given list of harvest definitions 1167 jobs.addAll(DBUtils.selectLongList(c, "SELECT jobs.job_id FROM jobs WHERE jobs.harvest_id IN (" 1168 + StringUtils.conjoin(",", harvestDefinitions) + ")")); 1169 } 1170 jobIds.addAll(jobs); 1171 } catch (SQLException e) { 1172 log.error("Unable to close DB connection", e); 1173 } 1174 1175 return jobIds; 1176 } 1177 1178 /** 1179 * Get list of harvests previous to this one. 1180 * 1181 * @param thisHarvest The id of this harvestdefinition 1182 * @return a list of IDs belonging to harvests previous to this one. 1183 */ 1184 private List<Long> getPreviousFullHarvests(Long thisHarvest) { 1185 List<Long> results = new ArrayList<Long>(); 1186 try (Connection c = HarvestDBConnection.get();) { 1187 // Follow the chain of originating IDs back 1188 for (Long originatingHarvest = thisHarvest; originatingHarvest != null; 1189 // Compute next originatingHarvest 1190 originatingHarvest = DBUtils.selectFirstLongValueIfAny(c, 1191 "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?", 1192 originatingHarvest)) { 1193 if (!originatingHarvest.equals(thisHarvest)) { 1194 results.add(originatingHarvest); 1195 } 1196 } 1197 1198 // Find the first harvest in the chain (but last in the list). 1199 Long firstHarvest = thisHarvest; 1200 if (!results.isEmpty()) { 1201 firstHarvest = results.get(results.size() - 1); 1202 } 1203 1204 // Find the last harvest in the chain before 1205 Long olderHarvest = DBUtils.selectFirstLongValueIfAny(c, 1206 "SELECT fullharvests.harvest_id" + " FROM fullharvests, harvestdefinitions," 1207 + " harvestdefinitions AS currenthd" + " WHERE currenthd.harvest_id=?" 1208 + " AND fullharvests.harvest_id " + "= harvestdefinitions.harvest_id" 1209 + " AND harvestdefinitions.submitted " + "< currenthd.submitted" 1210 + " ORDER BY harvestdefinitions.submitted " + HarvestStatusQuery.SORT_ORDER.DESC.name(), 1211 firstHarvest); 1212 // Follow the chain of originating IDs back 1213 for (Long originatingHarvest = olderHarvest; originatingHarvest != null; originatingHarvest = DBUtils 1214 .selectFirstLongValueIfAny(c, 1215 "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?", 1216 originatingHarvest)) { 1217 results.add(originatingHarvest); 1218 } 1219 } catch (SQLException e) { 1220 log.warn("Exception thrown while updating fullharvests.isindexready field: {}", 1221 ExceptionUtils.getSQLExceptionCause(e), e); 1222 } 1223 return results; 1224 } 1225 1226 @Override 1227 public void setIndexIsReady(Long harvestId, boolean newValue) { 1228 if (!isSnapshot(harvestId)) { 1229 throw new NotImplementedException("Not implemented for non snapshot harvests"); 1230 } else { 1231 try (Connection c = HarvestDBConnection.get(); 1232 PreparedStatement s = c 1233 .prepareStatement("UPDATE fullharvests SET isindexready=? WHERE harvest_id=?");) { 1234 s.setBoolean(1, newValue); 1235 s.setLong(2, harvestId); 1236 int rows = s.executeUpdate(); 1237 log.debug(rows + " entries of table fullharvests updated"); 1238 } catch (SQLException e) { 1239 log.warn("Exception thrown while updating fullharvests.isindexready field: {}", 1240 ExceptionUtils.getSQLExceptionCause(e), e); 1241 } 1242 } 1243 } 1244 1245 /* 1246 * Removes the entry in harvest_configs, that binds a certain domainconfiguration to this PartialHarvest. TODO maybe 1247 * update the edition as well. 1248 */ 1249 @Override 1250 public void removeDomainConfiguration(Long harvestId, SparseDomainConfiguration key) { 1251 ArgumentNotValid.checkNotNull(key, "DomainConfigurationKey key"); 1252 if (harvestId == null) { 1253 // Don't need to do anything, if PartialHarvest is not 1254 // yet stored in database 1255 log.warn("No removal of domainConfiguration, " + "as harvestId is null"); 1256 return; 1257 } 1258 Connection connection = HarvestDBConnection.get(); 1259 PreparedStatement s = null; 1260 try { 1261 s = connection.prepareStatement( 1262 "DELETE FROM harvest_configs WHERE harvest_id = ? " + "AND config_id = (SELECT config_id " 1263 + " FROM configurations, domains " + "WHERE domains.name = ? AND configurations.name = ?" 1264 + " AND domains.domain_id = configurations.domain_id)"); 1265 s.setLong(1, harvestId); 1266 s.setString(2, key.getDomainName()); 1267 s.setString(3, key.getConfigurationName()); 1268 s.executeUpdate(); 1269 } catch (SQLException e) { 1270 log.warn("Exception thrown while removing domainconfiguration: {}", ExceptionUtils.getSQLExceptionCause(e), 1271 e); 1272 } finally { 1273 DBUtils.closeStatementIfOpen(s); 1274 DBUtils.rollbackIfNeeded(connection, 1275 "removing DomainConfiguration from harvest w/id " + harvestId + " failed", harvestId); 1276 HarvestDBConnection.release(connection); 1277 } 1278 } 1279 1280 @Override 1281 public void updateNextdate(long harvestId, Date nextdate) { 1282 ArgumentNotValid.checkNotNull(harvestId, "Long harvest ID"); 1283 ArgumentNotValid.checkNotNull(nextdate, "Date nextdate"); 1284 if (harvestId < 0) { 1285 // Don't need to do anything, if PartialHarvest is not 1286 // yet stored in database 1287 return; 1288 } 1289 Connection connection = HarvestDBConnection.get(); 1290 PreparedStatement s = null; 1291 try { 1292 connection.setAutoCommit(false); 1293 s = connection.prepareStatement("UPDATE partialharvests SET nextdate = ? " + "WHERE harvest_id = ?"); 1294 DBUtils.setDateMaybeNull(s, 1, nextdate); 1295 s.setLong(2, harvestId); 1296 s.executeUpdate(); 1297 connection.commit(); 1298 } catch (SQLException e) { 1299 log.warn("Exception thrown while updating nextdate: {}", ExceptionUtils.getSQLExceptionCause(e), e); 1300 } finally { 1301 DBUtils.closeStatementIfOpen(s); 1302 DBUtils.rollbackIfNeeded(connection, "Updating nextdate from", harvestId); 1303 HarvestDBConnection.release(connection); 1304 } 1305 } 1306 1307 @Override 1308 public void addDomainConfiguration(PartialHarvest ph, SparseDomainConfiguration dcKey) { 1309 ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph"); 1310 ArgumentNotValid.checkNotNull(dcKey, "DomainConfigurationKey dcKey"); 1311 1312 try (Connection connection = HarvestDBConnection.get(); 1313 PreparedStatement s = connection.prepareStatement("INSERT INTO harvest_configs " 1314 + "( harvest_id, config_id ) " + "SELECT ?, config_id FROM configurations, domains " 1315 + "WHERE domains.name = ? AND configurations.name = ?" 1316 + " AND domains.domain_id = configurations.domain_id"); 1317 1318 ) { 1319 s.setLong(1, ph.getOid()); 1320 s.setString(2, dcKey.getDomainName()); 1321 s.setString(3, dcKey.getConfigurationName()); 1322 s.executeUpdate(); 1323 s.close(); 1324 } catch (SQLException e) { 1325 log.warn("Exception thrown while adding domainConfiguration: {}", ExceptionUtils.getSQLExceptionCause(e), 1326 e); 1327 } 1328 } 1329 1330 @Override 1331 public void resetDomainConfigurations(PartialHarvest ph, List<DomainConfiguration> dcList) { 1332 ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph"); 1333 ArgumentNotValid.checkNotNull(dcList, "List<DomainConfiguration> dcList"); 1334 1335 try (Connection connection = HarvestDBConnection.get()) { 1336 createHarvestConfigsEntries(connection, ph, ph.getOid()); 1337 } catch (SQLException e) { 1338 log.warn("Exception thrown while resetting domainConfigurations: {}", 1339 ExceptionUtils.getSQLExceptionCause(e), e); 1340 } 1341 } 1342 1343 @Override 1344 public void mapToHarvestChannel(long harvestDefinitionId, HarvestChannel channel) { 1345 ArgumentNotValid.checkNotNull(channel, "HarvestChannel channel"); 1346 1347 try (Connection connection = HarvestDBConnection.get(); 1348 PreparedStatement s = connection 1349 .prepareStatement("UPDATE harvestdefinitions SET channel_id=? WHERE harvest_id=?");) { 1350 s.setLong(1, channel.getId()); 1351 s.setLong(2, harvestDefinitionId); 1352 if (s.executeUpdate() != 1) { 1353 throw new IOFailure("Could not map harvest channel " + channel.getId() + " to harvest definition " 1354 + harvestDefinitionId); 1355 } 1356 s.close(); 1357 } catch (SQLException e) { 1358 log.warn("Exception thrown while mapping to harvest channel: {}", ExceptionUtils.getSQLExceptionCause(e), 1359 e); 1360 } 1361 } 1362 1363 /** 1364 * Saves all extended Field values for a HarvestDefinition in the Database. 1365 * 1366 * @param c Connection to Database 1367 * @param h HarvestDefinition where loaded extended Field Values will be set 1368 * @throws SQLException If database errors occur. 1369 */ 1370 private void saveExtendedFieldValues(Connection c, HarvestDefinition h) throws SQLException { 1371 List<ExtendedFieldValue> list = h.getExtendedFieldValues(); 1372 for (int i = 0; i < list.size(); i++) { 1373 ExtendedFieldValue efv = list.get(i); 1374 efv.setInstanceID(h.getOid()); 1375 1376 ExtendedFieldValueDBDAO dao = (ExtendedFieldValueDBDAO) ExtendedFieldValueDAO.getInstance(); 1377 if (efv.getExtendedFieldValueID() != null) { 1378 dao.update(c, efv, false); 1379 } else { 1380 dao.create(c, efv, false); 1381 } 1382 } 1383 } 1384 1385 /** 1386 * Reads all extended Field values from the database for a HarvestDefinition. 1387 * 1388 * @param h HarvestDefinition where loaded extended Field Values will be set 1389 * @throws SQLException If database errors occur. 1390 */ 1391 private void readExtendedFieldValues(HarvestDefinition h) throws SQLException { 1392 h.setExtendedFieldValues(getExtendedFieldValues(h.getOid())); 1393 } 1394 1395 /** 1396 * Reads all extended Field values from the database for a HarvestDefinitionOid. 1397 * 1398 * @param aOid HarvestDefinition where loaded extended Field Values will be set 1399 * @return a list of ExtendedFieldValues belonging to the given harvest oid 1400 * @throws SQLException If database errors occur. 1401 */ 1402 private List<ExtendedFieldValue> getExtendedFieldValues(Long aOid) throws SQLException { 1403 List<ExtendedFieldValue> extendedFieldValues = new ArrayList<ExtendedFieldValue>(); 1404 1405 ExtendedFieldDAO dao = ExtendedFieldDAO.getInstance(); 1406 List<ExtendedField> list = dao.getAll(ExtendedFieldTypes.HARVESTDEFINITION); 1407 1408 for (int i = 0; i < list.size(); i++) { 1409 ExtendedField ef = list.get(i); 1410 1411 ExtendedFieldValueDAO dao2 = ExtendedFieldValueDAO.getInstance(); 1412 ExtendedFieldValue efv = dao2.read(ef.getExtendedFieldID(), aOid); 1413 1414 if (efv == null) { 1415 efv = new ExtendedFieldValue(); 1416 efv.setExtendedFieldID(ef.getExtendedFieldID()); 1417 efv.setInstanceID(aOid); 1418 efv.setContent( 1419 new ExtendedFieldDefaultValue(ef.getDefaultValue(), ef.getFormattingPattern(), ef.getDatatype()) 1420 .getDBValue()); 1421 } 1422 1423 extendedFieldValues.add(efv); 1424 } 1425 1426 return extendedFieldValues; 1427 } 1428}