001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.ResultSet; 029import java.sql.SQLException; 030import java.sql.Timestamp; 031import java.sql.Types; 032import java.text.Collator; 033import java.util.ArrayList; 034import java.util.Collections; 035import java.util.Comparator; 036import java.util.Date; 037import java.util.HashMap; 038import java.util.HashSet; 039import java.util.Iterator; 040import java.util.LinkedList; 041import java.util.List; 042import java.util.Map; 043import java.util.Set; 044import java.util.StringTokenizer; 045 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049import dk.netarkivet.common.exceptions.ArgumentNotValid; 050import dk.netarkivet.common.exceptions.IOFailure; 051import dk.netarkivet.common.exceptions.IllegalState; 052import dk.netarkivet.common.exceptions.NotImplementedException; 053import dk.netarkivet.common.exceptions.PermissionDenied; 054import dk.netarkivet.common.exceptions.UnknownID; 055import dk.netarkivet.common.utils.DBUtils; 056import dk.netarkivet.common.utils.ExceptionUtils; 057import dk.netarkivet.common.utils.FilterIterator; 058import dk.netarkivet.common.utils.StringUtils; 059import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory; 060import dk.netarkivet.harvester.datamodel.eav.EAV; 061import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType; 062import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedField; 063import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO; 064import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDefaultValue; 065import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes; 066import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue; 067import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDAO; 068import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDBDAO; 069import dk.netarkivet.harvester.webinterface.HarvestStatusQuery; 070 071/** 072 * A database-oriented implementation of the HarvestDefinitionDAO. 073 * <p> 074 * The statements to create the tables are located in: 075 * <ul> 076 * <li><em>Derby:</em> scripts/sql/createfullhddb.sql</li> 077 * <li><em>MySQL:</em> scripts/sql/createfullhddb.mysql</li> 078 * <li><em>PostgreSQL:</em> scripts/postgresql/netarchivesuite_init.sql</li> 079 * </ul> 080 */ 081public class HarvestDefinitionDBDAO extends HarvestDefinitionDAO { 082 083 /** The logger. */ 084 private static final Logger log = LoggerFactory.getLogger(HarvestDefinitionDBDAO.class); 085 086 /** 087 * Comparator used for sorting the UI list of {@link SparseDomainConfiguration}s. Sorts first by domain name 088 * alphabetical order, next by configuration name. 089 */ 090 private static class SparseDomainConfigurationComparator implements Comparator<SparseDomainConfiguration> { 091 092 @Override 093 public int compare(SparseDomainConfiguration sdc1, SparseDomainConfiguration sdc2) { 094 int domComp = sdc1.getDomainName().compareTo(sdc2.getDomainName()); 095 if (0 == domComp) { 096 return sdc1.getConfigurationName().compareTo(sdc2.getConfigurationName()); 097 } 098 return domComp; 099 } 100 } 101 102 /** Create a new HarvestDefinitionDAO using database. */ 103 HarvestDefinitionDBDAO() { 104 Connection connection = HarvestDBConnection.get(); 105 try { 106 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.FULLHARVESTS); 107 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTDEFINITIONS); 108 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.PARTIALHARVESTS); 109 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTCONFIGS); 110 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDTYPE); 111 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELD); 112 HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDVALUE); 113 } finally { 114 HarvestDBConnection.release(connection); 115 } 116 } 117 118 /** 119 * Create a harvest definition in Database. The harvest definition object should not have its ID set unless we are 120 * in the middle of migrating. 121 * 122 * @param harvestDefinition A new harvest definition to store in the database. 123 * @return The harvestId for the just created harvest definition. 124 * @see HarvestDefinitionDAO#create(HarvestDefinition) 125 */ 126 @Override 127 public synchronized Long create(HarvestDefinition harvestDefinition) { 128 Long id = harvestDefinition.getOid(); 129 try (Connection connection = HarvestDBConnection.get();) { 130 Date submissiondate = new Date(); 131 final int edition = 1; 132 try { 133 if (id == null) { 134 id = generateNextID(connection); 135 } 136 137 connection.setAutoCommit(false); 138 try ( PreparedStatement s = connection.prepareStatement("INSERT INTO harvestdefinitions " 139 + "( harvest_id, name, comments, numevents, submitted, isactive, edition, audience ) " 140 + "VALUES ( ?, ?, ?, ?, ?, ?, ?,? )");) { 141 s.setLong(1, id); 142 DBUtils.setName(s, 2, harvestDefinition, Constants.MAX_NAME_SIZE); 143 DBUtils.setComments(s, 3, harvestDefinition, Constants.MAX_COMMENT_SIZE); 144 s.setLong(4, harvestDefinition.getNumEvents()); 145 // Don't set on object, as we may yet rollback 146 s.setTimestamp(5, new Timestamp(submissiondate.getTime())); 147 s.setBoolean(6, harvestDefinition.getActive()); 148 s.setLong(7, edition); 149 s.setString(8, harvestDefinition.getAudience()); 150 s.executeUpdate(); 151 } 152 if (harvestDefinition instanceof FullHarvest) { 153 FullHarvest fh = (FullHarvest) harvestDefinition; 154 try ( PreparedStatement s = connection.prepareStatement( 155 "INSERT INTO fullharvests " 156 + "( harvest_id, maxobjects, maxbytes, maxjobrunningtime, previoushd, isindexready)" 157 + "VALUES ( ?, ?, ?, ?, ?, ? )"); ) { 158 s.setLong(1, id); 159 s.setLong(2, fh.getMaxCountObjects()); 160 s.setLong(3, fh.getMaxBytes()); 161 s.setLong(4, fh.getMaxJobRunningTime()); 162 if (fh.getPreviousHarvestDefinition() != null) { 163 s.setLong(5, fh.getPreviousHarvestDefinition().getOid()); 164 } else { 165 s.setNull(5, Types.BIGINT); 166 } 167 s.setBoolean(6, fh.getIndexReady()); 168 s.executeUpdate(); 169 } 170 } else if (harvestDefinition instanceof PartialHarvest) { 171 PartialHarvest ph = (PartialHarvest) harvestDefinition; 172 // Get schedule id 173 long scheduleId = DBUtils.selectLongValue(connection, 174 "SELECT schedule_id FROM schedules WHERE name = ?", ph.getSchedule().getName()); 175 try ( PreparedStatement s = connection.prepareStatement("INSERT INTO partialharvests ( harvest_id, schedule_id, nextdate ) " 176 + "VALUES ( ?, ?, ? )"); ) { 177 s.setLong(1, id); 178 s.setLong(2, scheduleId); 179 DBUtils.setDateMaybeNull(s, 3, ph.getNextDate()); 180 s.executeUpdate(); 181 createHarvestConfigsEntries(connection, ph, id); 182 } 183 } else { 184 String message = "Harvest definition " + harvestDefinition + " is of unknown class " 185 + harvestDefinition.getClass(); 186 log.warn(message); 187 throw new ArgumentNotValid(message); 188 } 189 connection.commit(); 190 191 // Now that we have committed, set new data on object. 192 harvestDefinition.setSubmissionDate(submissiondate); 193 harvestDefinition.setEdition(edition); 194 harvestDefinition.setOid(id); 195 196 // saving after receiving id 197 saveExtendedFieldValues(connection, harvestDefinition); 198 199 } catch (SQLException e) { 200 String message = "SQL error creating harvest definition " + harvestDefinition + " in database" + "\n" 201 + ExceptionUtils.getSQLExceptionCause(e); 202 log.warn(message, e); 203 throw new IOFailure(message, e); 204 } finally { 205 DBUtils.rollbackIfNeeded(connection, "creating", harvestDefinition); 206 } 207 } catch (SQLException e) { 208 log.error("Unable to close db resources", e); 209 } 210 211 return id; 212 } 213 214 /** 215 * Create the entries in the harvest_configs table that connect PartialHarvests and their configurations. 216 * 217 * @param c harvest definition DB connection 218 * @param ph The harvest to insert entries for. 219 * @param id The id of the harvest -- this may not yet be set on ph 220 * @throws SQLException If a database error occurs during the create process. 221 */ 222 private void createHarvestConfigsEntries(Connection c, PartialHarvest ph, long id) throws SQLException { 223 try (PreparedStatement s = c.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ?");) { 224 s.setLong(1, id); 225 s.executeUpdate(); 226 } 227 try (PreparedStatement s = c.prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) " 228 + "SELECT ?, config_id FROM configurations, domains " 229 + "WHERE domains.name = ? AND configurations.name = ?" 230 + " AND domains.domain_id = configurations.domain_id"); 231 ) { 232 Iterator<DomainConfiguration> dcs = ph.getDomainConfigurations(); 233 while (dcs.hasNext()) { 234 DomainConfiguration dc = dcs.next(); 235 s.setLong(1, id); 236 s.setString(2, dc.getDomainName()); 237 s.setString(3, dc.getName()); 238 s.executeUpdate(); 239 } 240 } 241 } 242 243 /** 244 * Generates the next id of a harvest definition. this implementation retrieves the maximum value of harvest_id in 245 * the DB, and returns this value + 1. 246 * 247 * @param c An open connection to the harvestDatabase 248 * @return The next available ID 249 */ 250 private synchronized Long generateNextID(Connection c) { 251 Long maxVal = DBUtils.selectLongValue(c, "SELECT max(harvest_id) FROM harvestdefinitions"); 252 if (maxVal == null) { 253 maxVal = 0L; 254 } 255 return maxVal + 1L; 256 } 257 258 /** 259 * Read the stored harvest definition for the given ID. 260 * 261 * @param harvestDefinitionID An ID number for a harvest definition 262 * @return A harvest definition that has been read from persistent storage. 263 * @throws UnknownID if no entry with that ID exists in the database 264 * @throws IOFailure If DB-failure occurs? 265 * @see HarvestDefinitionDAO#read(Long) 266 */ 267 @Override 268 public synchronized HarvestDefinition read(Long harvestDefinitionID) throws UnknownID, IOFailure { 269 Connection c = HarvestDBConnection.get(); 270 try { 271 return read(c, harvestDefinitionID); 272 } finally { 273 HarvestDBConnection.release(c); 274 } 275 } 276 277 /** 278 * Read the stored harvest definition for the given ID. 279 * 280 * @param c The used database connection 281 * @param harvestDefinitionID ID number for a harvest definition 282 * @return A harvest definition that has been read from persistent storage. 283 * @throws UnknownID if no entry with that ID exists in the database 284 * @throws IOFailure If DB-failure occurs? 285 * @see HarvestDefinitionDAO#read(Long) 286 */ 287 private HarvestDefinition read(Connection c, Long harvestDefinitionID) throws UnknownID, IOFailure { 288 if (!exists(c, harvestDefinitionID)) { 289 String message = "Unknown harvest definition " + harvestDefinitionID; 290 log.debug(message); 291 throw new UnknownID(message); 292 } 293 log.debug("Reading harvestdefinition w/ id {}", harvestDefinitionID); 294 PreparedStatement s = null; 295 try { 296 s = c.prepareStatement("SELECT name, comments, numevents, submitted, " 297 + "previoushd, maxobjects, maxbytes, " 298 + "maxjobrunningtime, isindexready, isactive, edition, audience " 299 + "FROM harvestdefinitions, fullharvests " + "WHERE harvestdefinitions.harvest_id = ?" 300 + " AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id"); 301 s.setLong(1, harvestDefinitionID); 302 ResultSet res = s.executeQuery(); 303 if (res.next()) { 304 // Found full harvest 305 log.debug("fullharvest found w/id " + harvestDefinitionID); 306 final String name = res.getString(1); 307 final String comments = res.getString(2); 308 final int numEvents = res.getInt(3); 309 final Date submissionDate = new Date(res.getTimestamp(4).getTime()); 310 final long maxObjects = res.getLong(6); 311 final long maxBytes = res.getLong(7); 312 final long maxJobRunningtime = res.getLong(8); 313 final boolean isIndexReady = res.getBoolean(9); 314 FullHarvest fh; 315 final long prevhd = res.getLong(5); 316 if (!res.wasNull()) { 317 fh = new FullHarvest(name, comments, prevhd, maxObjects, maxBytes, maxJobRunningtime, isIndexReady, 318 DAOProviderFactory.getHarvestDefinitionDAOProvider(), 319 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 320 DAOProviderFactory.getDomainDAOProvider()); 321 } else { 322 fh = new FullHarvest(name, comments, null, maxObjects, maxBytes, maxJobRunningtime, isIndexReady, 323 DAOProviderFactory.getHarvestDefinitionDAOProvider(), 324 DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(), 325 DAOProviderFactory.getDomainDAOProvider()); 326 } 327 fh.setSubmissionDate(submissionDate); 328 fh.setNumEvents(numEvents); 329 fh.setActive(res.getBoolean(10)); 330 fh.setOid(harvestDefinitionID); 331 fh.setEdition(res.getLong(11)); 332 fh.setAudience(res.getString(12)); 333 334 readExtendedFieldValues(fh); 335 336 // We found a FullHarvest object, just return it. 337 log.debug("Returned FullHarvest object w/ id {}", harvestDefinitionID); 338 return fh; 339 } 340 s.close(); 341 // No full harvest with that ID, try selective harvest 342 s = c.prepareStatement("SELECT harvestdefinitions.name," + " harvestdefinitions.comments," 343 + " harvestdefinitions.numevents," + " harvestdefinitions.submitted," 344 + " harvestdefinitions.isactive," + " harvestdefinitions.edition," 345 + " harvestdefinitions.audience," + " schedules.name," 346 + " partialharvests.nextdate, " + " harvestdefinitions.channel_id " 347 + "FROM harvestdefinitions, partialharvests, schedules" 348 + " WHERE harvestdefinitions.harvest_id = ?" + " AND harvestdefinitions.harvest_id " 349 + "= partialharvests.harvest_id" + " AND schedules.schedule_id " 350 + "= partialharvests.schedule_id"); 351 s.setLong(1, harvestDefinitionID); 352 res = s.executeQuery(); 353 boolean foundPartialHarvest = res.next(); 354 if (foundPartialHarvest) { 355 log.debug("Partialharvest found w/ id " + harvestDefinitionID); 356 // Have to get configs before creating object, so storing data 357 // here. 358 final String name = res.getString(1); 359 final String comments = res.getString(2); 360 final int numEvents = res.getInt(3); 361 final Date submissionDate = new Date(res.getTimestamp(4).getTime()); 362 final boolean active = res.getBoolean(5); 363 final long edition = res.getLong(6); 364 final String audience = res.getString(7); 365 final String scheduleName = res.getString(8); 366 final Date nextDate = DBUtils.getDateMaybeNull(res, 9); 367 final Long channelId = DBUtils.getLongMaybeNull(res, 10); 368 s.close(); 369 // Found partial harvest -- have to find configurations. 370 // To avoid holding on to the readlock while getting domains, 371 // we grab the strings first, then look up domains and configs. 372 final DomainDAO domainDao = DomainDAO.getInstance(); 373 List<SparseDomainConfiguration> configs = new ArrayList<SparseDomainConfiguration>(); 374 s = c.prepareStatement("SELECT domains.name, configurations.name " 375 + "FROM domains, configurations, harvest_configs " + "WHERE harvest_id = ?" 376 + " AND configurations.config_id " + "= harvest_configs.config_id" 377 + " AND configurations.domain_id = domains.domain_id"); 378 s.setLong(1, harvestDefinitionID); 379 res = s.executeQuery(); 380 while (res.next()) { 381 configs.add(new SparseDomainConfiguration(res.getString(1), res.getString(2))); 382 } 383 s.close(); 384 List<DomainConfiguration> configurations = new ArrayList<DomainConfiguration>(); 385 for (SparseDomainConfiguration domainConfig : configs) { 386 configurations.add(domainDao.getDomainConfiguration(domainConfig.getDomainName(), 387 domainConfig.getConfigurationName())); 388 } 389 390 Schedule schedule = ScheduleDAO.getInstance().read(scheduleName); 391 392 PartialHarvest ph = new PartialHarvest(configurations, schedule, name, comments, audience); 393 394 ph.setNumEvents(numEvents); 395 ph.setSubmissionDate(submissionDate); 396 ph.setActive(active); 397 ph.setEdition(edition); 398 ph.setNextDate(nextDate); 399 ph.setOid(harvestDefinitionID); 400 if (channelId != null) { 401 ph.setChannelId(channelId); 402 } 403 404 readExtendedFieldValues(ph); 405 406 return ph; 407 } else { 408 throw new IllegalState("No entries in fullharvests or partialharvests found for id " 409 + harvestDefinitionID); 410 } 411 } catch (SQLException e) { 412 throw new IOFailure("SQL Error while reading harvest definition " + harvestDefinitionID + "\n" 413 + ExceptionUtils.getSQLExceptionCause(e), e); 414 } 415 } 416 417 /** 418 * Update an existing harvest definition with new info. 419 * 420 * @param hd An updated harvest definition 421 * @see HarvestDefinitionDAO#update(HarvestDefinition) 422 */ 423 public synchronized void update(HarvestDefinition hd) { 424 ArgumentNotValid.checkNotNull(hd, "HarvestDefinition hd"); 425 if (hd.getOid() == null || !exists(hd.getOid())) { 426 final String message = "Cannot update non-existing " + "harvestdefinition '" + hd.getName() + "'"; 427 log.debug(message); 428 throw new PermissionDenied(message); 429 } 430 HarvestDefinition preHD = null; 431 if (hd instanceof FullHarvest) { 432 preHD = ((FullHarvest) hd).getPreviousHarvestDefinition(); 433 } 434 435 Connection c = HarvestDBConnection.get(); 436 PreparedStatement s = null; 437 try { 438 c.setAutoCommit(false); 439 s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, " 440 + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? " 441 + "WHERE harvest_id = ? AND edition = ?"); 442 DBUtils.setName(s, 1, hd, Constants.MAX_NAME_SIZE); 443 DBUtils.setComments(s, 2, hd, Constants.MAX_COMMENT_SIZE); 444 s.setInt(3, hd.getNumEvents()); 445 s.setTimestamp(4, new Timestamp(hd.getSubmissionDate().getTime())); 446 s.setBoolean(5, hd.getActive()); 447 long nextEdition = hd.getEdition() + 1; 448 s.setLong(6, nextEdition); 449 s.setString(7, hd.getAudience()); 450 s.setLong(8, hd.getOid()); 451 s.setLong(9, hd.getEdition()); 452 453 int rows = s.executeUpdate(); 454 // Since the HD exists, no rows indicates bad edition 455 if (rows == 0) { 456 String message = "Somebody else must have updated " + hd + " since edition " + hd.getEdition() 457 + ", not updating"; 458 log.debug(message); 459 throw new PermissionDenied(message); 460 } 461 s.close(); 462 if (hd instanceof FullHarvest) { 463 FullHarvest fh = (FullHarvest) hd; 464 s = c.prepareStatement("UPDATE fullharvests SET previoushd = ?, " + "maxobjects = ?, " 465 + "maxbytes = ?, " + "maxjobrunningtime = ?, isindexready = ? " + "WHERE harvest_id = ?"); 466 if (preHD != null) { 467 s.setLong(1, preHD.getOid()); 468 } else { 469 s.setNull(1, Types.BIGINT); 470 } 471 s.setLong(2, fh.getMaxCountObjects()); 472 s.setLong(3, fh.getMaxBytes()); 473 s.setLong(4, fh.getMaxJobRunningTime()); 474 s.setBoolean(5, fh.getIndexReady()); 475 s.setLong(6, fh.getOid()); 476 477 rows = s.executeUpdate(); 478 log.debug("{} fullharvests records updated", rows); 479 } else if (hd instanceof PartialHarvest) { 480 PartialHarvest ph = (PartialHarvest) hd; 481 s = c.prepareStatement("UPDATE partialharvests SET " + "schedule_id = " 482 + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? " 483 + "WHERE harvest_id = ?"); 484 s.setString(1, ph.getSchedule().getName()); 485 DBUtils.setDateMaybeNull(s, 2, ph.getNextDate()); 486 s.setLong(3, ph.getOid()); 487 rows = s.executeUpdate(); 488 log.debug("{} partialharvests records updated", rows); 489 s.close(); 490 // FIXME The updates to harvest_configs table should be done 491 // in method removeDomainConfiguration(), and not here. 492 // The following deletes ALL harvest_configs entries for 493 // this PartialHarvest, and creates the entries for the 494 // PartialHarvest again!! 495 createHarvestConfigsEntries(c, ph, ph.getOid()); 496 } else { 497 String message = "Harvest definition " + hd + " has unknown class " + hd.getClass(); 498 log.warn(message); 499 throw new ArgumentNotValid(message); 500 } 501 saveExtendedFieldValues(c, hd); 502 503 c.commit(); 504 hd.setEdition(nextEdition); 505 } catch (SQLException e) { 506 throw new IOFailure("SQL error while updating harvest definition " + hd + "\n" 507 + ExceptionUtils.getSQLExceptionCause(e), e); 508 } finally { 509 DBUtils.closeStatementIfOpen(s); 510 DBUtils.rollbackIfNeeded(c, "updating", hd); 511 HarvestDBConnection.release(c); 512 } 513 } 514 515 /** 516 * Activates or deactivates a partial harvest definition. This method is actually to be used not to have to read 517 * from the DB big harvest definitions and optimize the activation / deactivation, it is sort of a lightweight 518 * version of update. 519 * 520 * @param harvestDefinition the harvest definition object. 521 */ 522 @Override 523 public synchronized void flipActive(SparsePartialHarvest harvestDefinition) { 524 ArgumentNotValid.checkNotNull(harvestDefinition, "HarvestDefinition harvestDefinition"); 525 526 Connection c = HarvestDBConnection.get(); 527 PreparedStatement s = null; 528 try { 529 if (harvestDefinition.getOid() == null || !exists(c, harvestDefinition.getOid())) { 530 final String message = "Cannot update non-existing " + "harvestdefinition '" 531 + harvestDefinition.getName() + "'"; 532 log.debug(message); 533 throw new PermissionDenied(message); 534 } 535 536 c.setAutoCommit(false); 537 s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, " 538 + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? " 539 + "WHERE harvest_id = ? AND edition = ?"); 540 DBUtils.setName(s, 1, harvestDefinition, Constants.MAX_NAME_SIZE); 541 DBUtils.setComments(s, 2, harvestDefinition, Constants.MAX_COMMENT_SIZE); 542 s.setInt(3, harvestDefinition.getNumEvents()); 543 s.setTimestamp(4, new Timestamp(harvestDefinition.getSubmissionDate().getTime())); 544 s.setBoolean(5, !harvestDefinition.isActive()); 545 long nextEdition = harvestDefinition.getEdition() + 1; 546 s.setLong(6, nextEdition); 547 s.setString(7, harvestDefinition.getAudience()); 548 s.setLong(8, harvestDefinition.getOid()); 549 s.setLong(9, harvestDefinition.getEdition()); 550 int rows = s.executeUpdate(); 551 // Since the HD exists, no rows indicates bad edition 552 if (rows == 0) { 553 String message = "Somebody else must have updated " + harvestDefinition + " since edition " 554 + harvestDefinition.getEdition() + ", not updating"; 555 log.debug(message); 556 throw new PermissionDenied(message); 557 } 558 s.close(); 559 560 // Now pull more strings 561 s = c.prepareStatement("UPDATE partialharvests SET schedule_id = " 562 + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? " 563 + "WHERE harvest_id = ?"); 564 s.setString(1, harvestDefinition.getScheduleName()); 565 DBUtils.setDateMaybeNull(s, 2, harvestDefinition.getNextDate()); 566 s.setLong(3, harvestDefinition.getOid()); 567 rows = s.executeUpdate(); 568 log.debug("{} partialharvests records updated", rows); 569 s.close(); 570 c.commit(); 571 } catch (SQLException e) { 572 throw new IOFailure("SQL error while updating harvest definition " + harvestDefinition + "\n" 573 + ExceptionUtils.getSQLExceptionCause(e), e); 574 } finally { 575 DBUtils.rollbackIfNeeded(c, "updating", harvestDefinition); 576 HarvestDBConnection.release(c); 577 } 578 } 579 580 @Override 581 public synchronized boolean exists(String name) { 582 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 583 Connection c = HarvestDBConnection.get(); 584 try { 585 return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(harvest_id) " 586 + "FROM harvestdefinitions WHERE name = ?", name); 587 } finally { 588 HarvestDBConnection.release(c); 589 } 590 } 591 592 @Override 593 public synchronized boolean exists(Long oid) { 594 ArgumentNotValid.checkNotNull(oid, "Long oid"); 595 Connection c = HarvestDBConnection.get(); 596 try { 597 return exists(c, oid); 598 } finally { 599 HarvestDBConnection.release(c); 600 } 601 } 602 603 /** 604 * Check if a harvestdefinition exists with the given id. 605 * 606 * @param c An open connection to the harvestDatabase 607 * @param oid A potential identifier for a harvestdefinition 608 * @return true If a harvestdefinition exists with the given id. 609 * @see HarvestDefinitionDAO#exists(Long) 610 */ 611 private boolean exists(Connection c, Long oid) { 612 return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(harvest_id) " 613 + "FROM harvestdefinitions WHERE harvest_id = ?", oid); 614 } 615 616 /** 617 * Get a list of all existing harvest definitions ordered by name. 618 * 619 * @return An iterator that give the existing harvest definitions in turn 620 */ 621 @Override 622 public synchronized Iterator<HarvestDefinition> getAllHarvestDefinitions() { 623 Connection c = HarvestDBConnection.get(); 624 try { 625 List<Long> hds = DBUtils.selectLongList(c, "SELECT harvest_id FROM harvestdefinitions ORDER BY name"); 626 log.debug("Getting an iterator for all stored harvestdefinitions."); 627 628 List<HarvestDefinition> orderedList = new LinkedList<HarvestDefinition>(); 629 for (Long id : hds) { 630 orderedList.add(read(c, id)); 631 } 632 return orderedList.iterator(); 633 } finally { 634 HarvestDBConnection.release(c); 635 } 636 } 637 638 /** 639 * Gets default configurations for all domains that are not aliases. 640 * <p> 641 * This method currently gives an iterator that reads in all domains, although only on demand, that is: when calling 642 * "hasNext". 643 * 644 * @return Iterator containing the default DomainConfiguration for all domains that are not aliases 645 */ 646 @Override 647 public synchronized Iterator<DomainConfiguration> getSnapShotConfigurations() { 648 return new FilterIterator<Domain, DomainConfiguration>(DomainDAO.getInstance() 649 .getAllDomainsInSnapshotHarvestOrder()) { 650 public DomainConfiguration filter(Domain domain) { 651 if (domain.getAliasInfo() == null || domain.getAliasInfo().isExpired()) { 652 return domain.getDefaultConfiguration(); 653 } else { 654 return null; 655 } 656 } 657 }; 658 } 659 660 /** 661 * Returns a list of IDs of harvest definitions that are ready to be scheduled. 662 * 663 * @param now The current date 664 * @return List of ready harvest definitions. No check is performed for whether these are already in the middle of 665 * being scheduled. 666 */ 667 @Override 668 public Iterable<Long> getReadyHarvestDefinitions(Date now) { 669 ArgumentNotValid.checkNotNull(now, "Date now"); 670 Connection connection = HarvestDBConnection.get(); 671 try { 672 List<Long> ids = DBUtils.selectLongList(connection, "SELECT fullharvests.harvest_id" 673 + " FROM fullharvests, harvestdefinitions" 674 + " WHERE harvestdefinitions.harvest_id = fullharvests.harvest_id" + " AND isactive = ? " 675 + " AND numevents < 1 AND isindexready = ?", true, true); 676 ids.addAll(DBUtils.selectLongList(connection, "SELECT partialharvests.harvest_id" 677 + " FROM partialharvests, harvestdefinitions" 678 + " WHERE harvestdefinitions.harvest_id = partialharvests.harvest_id" 679 + " AND isactive = ? AND nextdate IS NOT NULL AND nextdate < ?", true, now)); 680 Set<Long> distinctIds = new HashSet<>(); 681 distinctIds.addAll(ids); 682 if (distinctIds.size() != ids.size()) { 683 log.warn("Query returned multiple identical ids {}. These have been sanitized.", ids); 684 return distinctIds; 685 } else { 686 return ids; 687 } 688 } finally { 689 HarvestDBConnection.release(connection); 690 } 691 } 692 693 /** 694 * Get the harvest definition that has the given name, if any. 695 * 696 * @param name The name of a harvest definition. 697 * @return The HarvestDefinition object with that name, or null if none has that name. 698 */ 699 @Override 700 public synchronized HarvestDefinition getHarvestDefinition(String name) { 701 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 702 log.debug("Reading harvestdefinition w/ name '{}'", name); 703 Connection c = HarvestDBConnection.get(); 704 PreparedStatement s = null; 705 try { 706 s = c.prepareStatement("SELECT harvest_id FROM harvestdefinitions WHERE name = ?"); 707 s.setString(1, name); 708 ResultSet res = s.executeQuery(); 709 if (res.next()) { 710 long harvestDefinitionID = res.getLong(1); 711 s.close(); 712 return read(c, harvestDefinitionID); 713 } 714 return null; 715 } catch (SQLException e) { 716 throw new IOFailure("SQL error while getting HD by name" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e); 717 } finally { 718 DBUtils.closeStatementIfOpen(s); 719 HarvestDBConnection.release(c); 720 } 721 } 722 723 @Override 724 public List<HarvestRunInfo> getHarvestRunInfo(long harvestID) { 725 Connection c = HarvestDBConnection.get(); 726 PreparedStatement s = null; 727 try { 728 ResultSet res = null; 729 Map<Integer, HarvestRunInfo> runInfos = new HashMap<Integer, HarvestRunInfo>(); 730 List<HarvestRunInfo> infoList = new ArrayList<HarvestRunInfo>(); 731 732 // Select dates and counts for all different statues 733 // for each run 734 s = c.prepareStatement("SELECT name, harvest_num, status, MIN(startdate), MAX(enddate), COUNT(job_id)" 735 + " FROM jobs, harvestdefinitions" 736 + " WHERE harvestdefinitions.harvest_id = ? AND jobs.harvest_id = harvestdefinitions.harvest_id" 737 + " GROUP BY name, harvest_num, status ORDER BY harvest_num DESC"); 738 s.setLong(1, harvestID); 739 res = s.executeQuery(); 740 while (res.next()) { 741 int runNr = res.getInt(2); 742 HarvestRunInfo info = runInfos.get(runNr); 743 if (info == null) { 744 String name = res.getString(1); 745 info = new HarvestRunInfo(harvestID, name, runNr); 746 // Put into hash for easy access when updating. 747 runInfos.put(runNr, info); 748 // Add to return list in order given by DB 749 infoList.add(info); 750 } 751 JobStatus status = JobStatus.fromOrdinal(res.getInt(3)); 752 // For started states, check start date 753 if (status != JobStatus.NEW && status != JobStatus.SUBMITTED && status != JobStatus.RESUBMITTED) { 754 Date startDate = DBUtils.getDateMaybeNull(res, 4); 755 if (info.getStartDate() == null || (startDate != null && startDate.before(info.getStartDate()))) { 756 info.setStartDate(startDate); 757 } 758 } 759 // For finished jobs, check end date 760 if (status == JobStatus.DONE || status == JobStatus.FAILED) { 761 Date endDate = DBUtils.getDateMaybeNull(res, 5); 762 if (info.getEndDate() == null || (endDate != null && endDate.after(info.getEndDate()))) { 763 info.setEndDate(endDate); 764 } 765 } 766 int count = res.getInt(6); 767 info.setStatusCount(status, count); 768 } 769 s.close(); 770 s = c.prepareStatement("SELECT jobs.harvest_num, SUM(historyinfo.bytecount), " 771 + "SUM(historyinfo.objectcount)," + "COUNT(jobs.status)" + " FROM jobs, historyinfo " 772 + " WHERE jobs.harvest_id = ? AND historyinfo.job_id = jobs.job_id" + " GROUP BY jobs.harvest_num" 773 + " ORDER BY jobs.harvest_num"); 774 s.setLong(1, harvestID); 775 res = s.executeQuery(); 776 777 while (res.next()) { 778 final int harvestNum = res.getInt(1); 779 HarvestRunInfo info = runInfos.get(harvestNum); 780 if (info != null) { 781 info.setBytesHarvested(res.getLong(2)); 782 info.setDocsHarvested(res.getLong(3)); 783 } else { 784 log.debug("Harvestnum {} for harvestID {} is skipped. Must have arrived between selects", 785 harvestNum, harvestID); 786 } 787 } 788 789 // Make sure that jobs that aren't really done don't have end date. 790 for (HarvestRunInfo info : infoList) { 791 if (info.getJobCount(JobStatus.STARTED) != 0 || info.getJobCount(JobStatus.NEW) != 0 792 || info.getJobCount(JobStatus.SUBMITTED) != 0) { 793 info.setEndDate(null); 794 } 795 } 796 return infoList; 797 } catch (SQLException e) { 798 String message = "SQL error asking for harvest run info on " + harvestID + " in database" + "\n" 799 + ExceptionUtils.getSQLExceptionCause(e); 800 log.warn(message, e); 801 throw new IOFailure(message, e); 802 } finally { 803 DBUtils.closeStatementIfOpen(s); 804 HarvestDBConnection.release(c); 805 } 806 } 807 808 /** 809 * Get all domain,configuration pairs for a harvest definition in sparse version for GUI purposes. 810 * 811 * @param harvestDefinitionID The ID of the harvest definition. 812 * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions. 813 * @throws ArgumentNotValid on null argument. 814 */ 815 @Override 816 public List<SparseDomainConfiguration> getSparseDomainConfigurations(Long harvestDefinitionID) { 817 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 818 Connection c = HarvestDBConnection.get(); 819 try { 820 return getSparseDomainConfigurations(c, harvestDefinitionID); 821 } finally { 822 HarvestDBConnection.release(c); 823 } 824 } 825 826 /** 827 * Get all domain,configuration pairs for a harvest definition in sparse version. 828 * 829 * @param c a connection to the harvest database 830 * @param harvestDefinitionID The ID of the harvest definition. 831 * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions. 832 */ 833 private List<SparseDomainConfiguration> getSparseDomainConfigurations(Connection c, Long harvestDefinitionID) { 834 try (PreparedStatement s = c 835 .prepareStatement("SELECT domains.name, configurations.name " + "FROM domains, configurations," 836 + " harvest_configs " 837 + "WHERE harvest_id = ? AND configurations.config_id = harvest_configs.config_id" 838 + " AND configurations.domain_id = domains.domain_id"); 839 ) { 840 s.setLong(1, harvestDefinitionID); 841 ResultSet res = s.executeQuery(); 842 List<SparseDomainConfiguration> resultList = new ArrayList<SparseDomainConfiguration>(); 843 while (res.next()) { 844 SparseDomainConfiguration sdc = new SparseDomainConfiguration(res.getString(1), res.getString(2)); 845 resultList.add(sdc); 846 } 847 848 Collections.sort(resultList, new SparseDomainConfigurationComparator()); 849 return resultList; 850 } catch (SQLException e) { 851 throw new IOFailure("SQL error getting sparse domains" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e); 852 } 853 } 854 855 /** 856 * Get all sparse versions of partial harvests for GUI purposes ordered by name. 857 * 858 * @return An iterable (possibly empty) of SparsePartialHarvests 859 */ 860 public Iterable<SparsePartialHarvest> getSparsePartialHarvestDefinitions(boolean excludeInactive) { 861 String query = "SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.name," 862 + " harvestdefinitions.comments," + " harvestdefinitions.numevents," 863 + " harvestdefinitions.submitted," + " harvestdefinitions.isactive," 864 + " harvestdefinitions.edition," + " schedules.name," + " partialharvests.nextdate, " 865 + " harvestdefinitions.audience, " + " harvestdefinitions.channel_id " 866 + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id " 867 + " = partialharvests.harvest_id" + " AND (harvestdefinitions.isactive " 868 + " = ?" 869 // This linie is duplicated to allow to select both active 870 // and inactive HD's. 871 + " OR harvestdefinitions" + ".isactive " + " = ?)" + " AND schedules.schedule_id " 872 + " = partialharvests.schedule_id " + "ORDER BY harvestdefinitions.name"; 873 try ( 874 Connection c = HarvestDBConnection.get(); 875 PreparedStatement s = DBUtils.prepareStatement(c, query, true, excludeInactive); 876 ) { 877 ResultSet res = s.executeQuery(); 878 List<SparsePartialHarvest> harvests = new ArrayList<SparsePartialHarvest>(); 879 while (res.next()) { 880 SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), res.getString(2), res.getString(3), 881 res.getInt(4), new Date(res.getTimestamp(5).getTime()), res.getBoolean(6), res.getLong(7), 882 res.getString(8), DBUtils.getDateMaybeNull(res, 9), res.getString(10), 883 DBUtils.getLongMaybeNull(res, 11)); 884 harvests.add(sph); 885 } 886 return harvests; 887 } catch (SQLException e) { 888 throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e); 889 } 890 } 891 892 /** 893 * Get a sparse version of a partial harvest for GUI purposes. 894 * 895 * @param harvestName Name of harvest definition. 896 * @return Sparse version of partial harvest or null for none. 897 * @throws ArgumentNotValid on null or empty name. 898 */ 899 @Override 900 public SparsePartialHarvest getSparsePartialHarvest(String harvestName) { 901 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 902 try ( 903 Connection c = HarvestDBConnection.get(); 904 PreparedStatement s = c 905 .prepareStatement("SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.comments," 906 + " harvestdefinitions.numevents," + " harvestdefinitions.submitted," 907 + " harvestdefinitions.isactive," + " harvestdefinitions.edition," 908 + " schedules.name," + " partialharvests.nextdate, " 909 + " harvestdefinitions.audience, " + " harvestdefinitions.channel_id " 910 + "FROM harvestdefinitions, partialharvests, schedules" 911 + " WHERE harvestdefinitions.name = ?" 912 + " AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id" 913 + " AND schedules.schedule_id " + "= partialharvests.schedule_id"); 914 ) { 915 s.setString(1, harvestName); 916 ResultSet res = s.executeQuery(); 917 if (res.next()) { 918 SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), harvestName, res.getString(2), 919 res.getInt(3), new Date(res.getTimestamp(4).getTime()), res.getBoolean(5), res.getLong(6), 920 res.getString(7), DBUtils.getDateMaybeNull(res, 8), res.getString(9), DBUtils.getLongMaybeNull( 921 res, 10)); 922 sph.setExtendedFieldValues(getExtendedFieldValues(sph.getOid())); 923 return sph; 924 } else { 925 return null; 926 } 927 } catch (SQLException e) { 928 throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e); 929 } 930 } 931 932 /** 933 * Get all sparse versions of full harvests for GUI purposes. 934 * 935 * @return An iterable (possibly empty) of SparseFullHarvests 936 */ 937 public Iterable<SparseFullHarvest> getAllSparseFullHarvestDefinitions() { 938 try ( 939 Connection c = HarvestDBConnection.get(); 940 PreparedStatement s = c 941 .prepareStatement("SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.name," 942 + " harvestdefinitions.comments," + " harvestdefinitions.numevents," 943 + " harvestdefinitions.isactive," + " harvestdefinitions.edition," 944 + " fullharvests.maxobjects," + " fullharvests.maxbytes," 945 + " fullharvests.maxjobrunningtime," + " fullharvests.previoushd, " 946 + " harvestdefinitions.channel_id " + "FROM harvestdefinitions, fullharvests" 947 + " WHERE harvestdefinitions.harvest_id " + " = fullharvests.harvest_id" 948 + " ORDER BY harvestdefinitions.name"); 949 ) { 950 ResultSet res = s.executeQuery(); 951 List<SparseFullHarvest> harvests = new ArrayList<SparseFullHarvest>(); 952 while (res.next()) { 953 SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), res.getString(2), res.getString(3), 954 res.getInt(4), res.getBoolean(5), res.getLong(6), res.getLong(7), res.getLong(8), 955 res.getLong(9), DBUtils.getLongMaybeNull(res, 10), DBUtils.getLongMaybeNull(res, 11)); 956 // EAV 957 long oid = sfh.getOid(); 958 List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)oid); 959 sfh.setAttributesAndTypes(attributesAndTypes); 960 harvests.add(sfh); 961 } 962 return harvests; 963 } catch (SQLException e) { 964 throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e); 965 } 966 } 967 968 /** 969 * Get the name of a harvest given its ID. 970 * 971 * @param harvestDefinitionID The ID of a harvest 972 * @return The name of the given harvest. 973 * @throws ArgumentNotValid on null argument 974 * @throws UnknownID if no harvest has the given ID. 975 * @throws IOFailure on any other error talking to the database 976 */ 977 @Override 978 public String getHarvestName(Long harvestDefinitionID) { 979 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 980 try ( 981 Connection c = HarvestDBConnection.get(); 982 PreparedStatement s = c.prepareStatement("SELECT name FROM harvestdefinitions WHERE harvest_id = ?"); 983 ) { 984 s.setLong(1, harvestDefinitionID); 985 ResultSet res = s.executeQuery(); 986 String name = null; 987 while (res.next()) { 988 if (name != null) { 989 throw new IOFailure("Found more than one name for harvest definition " + harvestDefinitionID 990 + ": '" + name + "' and '" + res.getString(1) + "'"); 991 } 992 name = res.getString(1); 993 } 994 if (name == null) { 995 throw new UnknownID("No name found for harvest definition " + harvestDefinitionID); 996 } 997 return name; 998 } catch (SQLException e) { 999 throw new IOFailure("An error occurred finding the name for " + "harvest definition " + harvestDefinitionID 1000 + "\n" + ExceptionUtils.getSQLExceptionCause(e), e); 1001 } 1002 } 1003 1004 /** 1005 * Get whether a given harvest is a snapshot or selective harvest. 1006 * 1007 * @param harvestDefinitionID ID of a harvest 1008 * @return True if the given harvest is a snapshot harvest, false otherwise. 1009 * @throws ArgumentNotValid on null argument 1010 * @throws UnknownID if no harvest has the given ID. 1011 */ 1012 @Override 1013 public boolean isSnapshot(Long harvestDefinitionID) { 1014 ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID"); 1015 try (Connection connection = HarvestDBConnection.get();) { 1016 boolean isSnapshot = DBUtils.selectAny(connection, 1017 "SELECT harvest_id FROM fullharvests WHERE harvest_id = ?", harvestDefinitionID); 1018 if (isSnapshot) { 1019 return true; 1020 } 1021 boolean isSelective = DBUtils.selectAny(connection, "SELECT harvest_id FROM partialharvests " 1022 + "WHERE harvest_id = ?", harvestDefinitionID); 1023 if (isSelective) { 1024 return false; 1025 } 1026 throw new UnknownID("Failed to find harvest definition with id " + harvestDefinitionID); 1027 } catch (SQLException e) { 1028 throw new RuntimeException("Unable to close DB connection"); 1029 } 1030 } 1031 1032 /** 1033 * Get a sparse version of a full harvest for GUI purposes. 1034 * 1035 * @param harvestName Name of harvest definition. 1036 * @return Sparse version of full harvest or null for none. 1037 * @throws ArgumentNotValid on null or empty name. 1038 * @throws UnknownID if no harvest has the given ID. 1039 * @throws IOFailure on any other error talking to the database 1040 */ 1041 @Override 1042 public SparseFullHarvest getSparseFullHarvest(String harvestName) { 1043 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1044 try (Connection c = HarvestDBConnection.get(); 1045 PreparedStatement s = c 1046 .prepareStatement("SELECT harvestdefinitions.harvest_id," + " harvestdefinitions.comments," 1047 + " harvestdefinitions.numevents," + " harvestdefinitions.isactive," 1048 + " harvestdefinitions.edition," + " fullharvests.maxobjects," 1049 + " fullharvests.maxbytes," + " fullharvests.maxjobrunningtime," 1050 + " fullharvests.previoushd, " + " harvestdefinitions.channel_id " 1051 + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.name = ?" 1052 + " AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id"); 1053 ) { 1054 s.setString(1, harvestName); 1055 ResultSet res = s.executeQuery(); 1056 if (res.next()) { 1057 SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), harvestName, res.getString(2), 1058 res.getInt(3), res.getBoolean(4), res.getLong(5), res.getLong(6), res.getLong(7), 1059 res.getLong(8), DBUtils.getLongMaybeNull(res, 9), DBUtils.getLongMaybeNull(res, 10)); 1060 // EAV 1061 long oid = sfh.getOid(); 1062 List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)oid); 1063 sfh.setAttributesAndTypes(attributesAndTypes); 1064 sfh.setExtendedFieldValues(getExtendedFieldValues(sfh.getOid())); 1065 return sfh; 1066 } else { 1067 return null; 1068 } 1069 } catch (SQLException e) { 1070 throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e); 1071 } 1072 } 1073 1074 /** 1075 * Get a sorted list of all domain names of a HarvestDefinition. 1076 * 1077 * @param harvestName of HarvestDefinition 1078 * @return List of all domains of the HarvestDefinition. 1079 */ 1080 @Override 1081 public List<String> getListOfDomainsOfHarvestDefinition(String harvestName) { 1082 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1083 try (Connection c = HarvestDBConnection.get(); 1084 PreparedStatement s = c.prepareStatement( 1085 // Note: the DISTINCT below is put in deliberately to fix 1086 // bug 1878: Seeds for domain is shown twice on page 1087 // History/Harveststatus-seeds.jsp 1088 "SELECT DISTINCT domains.name" + " FROM domains," + " configurations," 1089 + " harvest_configs," + " harvestdefinitions" 1090 + " WHERE configurations.domain_id = domains.domain_id" 1091 + " AND harvest_configs.config_id = " 1092 + "configurations.config_id" + " AND harvest_configs.harvest_id = " 1093 + "harvestdefinitions.harvest_id" + " AND harvestdefinitions.name = ?" 1094 + " ORDER BY domains.name"); 1095 ) { 1096 s.setString(1, harvestName); 1097 ResultSet res = s.executeQuery(); 1098 List<String> domains = new ArrayList<String>(); 1099 1100 while (res.next()) { 1101 domains.add(res.getString(1)); 1102 } 1103 return domains; 1104 } catch (SQLException e) { 1105 throw new IOFailure("SQL error getting seeds of a domain of a harvest definition" + "\n" 1106 + ExceptionUtils.getSQLExceptionCause(e), e); 1107 } 1108 } 1109 1110 /** 1111 * Get a sorted list of all seeds of a Domain in a HarvestDefinition. 1112 * 1113 * @param harvestName of HarvestDefinition 1114 * @param domainName of Domain 1115 * @return List of all seeds of the Domain in the HarvestDefinition. 1116 */ 1117 @Override 1118 public List<String> getListOfSeedsOfDomainOfHarvestDefinition(String harvestName, String domainName) { 1119 ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName"); 1120 ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName"); 1121 try (Connection c = HarvestDBConnection.get(); 1122 PreparedStatement s = c.prepareStatement( 1123 "SELECT seedlists.seeds" + " FROM configurations," + " harvest_configs," 1124 + " harvestdefinitions," + " seedlists," + " config_seedlists," 1125 + " domains" + " WHERE config_seedlists.seedlist_id " + "= seedlists.seedlist_id" 1126 + " AND configurations.config_id " + "= config_seedlists.config_id" 1127 + " AND configurations.config_id " + "= harvest_configs.config_id" 1128 + " AND harvest_configs.harvest_id " + "= harvestdefinitions.harvest_id" 1129 + " AND configurations.domain_id = domains.domain_id" + " AND domains.name = ?" 1130 + " AND harvestdefinitions.name = ?"); 1131 ) { 1132 s.setString(1, domainName); 1133 s.setString(2, harvestName); 1134 ResultSet res = s.executeQuery(); 1135 List<String> seeds = new ArrayList<String>(); 1136 1137 while (res.next()) { 1138 String seedsOfDomain = res.getString(1); 1139 1140 StringTokenizer st = new StringTokenizer(seedsOfDomain, "\n"); 1141 1142 while (st.hasMoreTokens()) { 1143 String seed = st.nextToken(); 1144 1145 boolean isDuplicate = false; 1146 for (String entry : seeds) { 1147 if (entry.equals(seed)) { 1148 isDuplicate = true; 1149 break; 1150 } 1151 } 1152 if (!isDuplicate) { // duplicates will not be added 1153 seeds.add(seed); 1154 } 1155 } 1156 } 1157 1158 Collections.sort(seeds, Collator.getInstance()); 1159 1160 return seeds; 1161 } catch (SQLException e) { 1162 throw new IOFailure("SQL error getting seeds of a domain\n" + ExceptionUtils.getSQLExceptionCause(e), e); 1163 } 1164 } 1165 1166 @Override 1167 public Set<Long> getJobIdsForSnapshotDeduplicationIndex(Long harvestId) { 1168 ArgumentNotValid.checkNotNull(harvestId, "Long harvestId"); 1169 Set<Long> jobIds = new HashSet<Long>(); 1170 if (!isSnapshot(harvestId)) { 1171 throw new NotImplementedException("This functionality only works for snapshot harvests"); 1172 } 1173 List<Long> harvestDefinitions = getPreviousFullHarvests(harvestId); 1174 try (Connection c = HarvestDBConnection.get();) { 1175 List<Long> jobs = new ArrayList<Long>(); 1176 if (!harvestDefinitions.isEmpty()) { 1177 // Select all jobs from a given list of harvest definitions 1178 jobs.addAll(DBUtils.selectLongList(c, "SELECT jobs.job_id FROM jobs WHERE jobs.harvest_id IN (" 1179 + StringUtils.conjoin(",", harvestDefinitions) + ")")); 1180 } 1181 jobIds.addAll(jobs); 1182 } catch (SQLException e) { 1183 log.error("Unable to close DB connection", e); 1184 } 1185 1186 return jobIds; 1187 } 1188 1189 /** 1190 * Get list of harvests previous to this one. 1191 * 1192 * @param thisHarvest The id of this harvestdefinition 1193 * @return a list of IDs belonging to harvests previous to this one. 1194 */ 1195 private List<Long> getPreviousFullHarvests(Long thisHarvest) { 1196 List<Long> results = new ArrayList<Long>(); 1197 try (Connection c = HarvestDBConnection.get();) { 1198 // Follow the chain of originating IDs back 1199 for (Long originatingHarvest = thisHarvest; originatingHarvest != null; 1200 // Compute next originatingHarvest 1201 originatingHarvest = DBUtils.selectFirstLongValueIfAny(c, "SELECT previoushd FROM fullharvests" 1202 + " WHERE fullharvests.harvest_id=?", originatingHarvest)) { 1203 if (!originatingHarvest.equals(thisHarvest)) { 1204 results.add(originatingHarvest); 1205 } 1206 } 1207 1208 // Find the first harvest in the chain (but last in the list). 1209 Long firstHarvest = thisHarvest; 1210 if (!results.isEmpty()) { 1211 firstHarvest = results.get(results.size() - 1); 1212 } 1213 1214 // Find the last harvest in the chain before 1215 Long olderHarvest = DBUtils.selectFirstLongValueIfAny(c, "SELECT fullharvests.harvest_id" 1216 + " FROM fullharvests, harvestdefinitions," + " harvestdefinitions AS currenthd" 1217 + " WHERE currenthd.harvest_id=?" + " AND fullharvests.harvest_id " 1218 + "= harvestdefinitions.harvest_id" 1219 + " AND harvestdefinitions.submitted " + "< currenthd.submitted" 1220 + " ORDER BY harvestdefinitions.submitted " + HarvestStatusQuery.SORT_ORDER.DESC.name(), 1221 firstHarvest); 1222 // Follow the chain of originating IDs back 1223 for (Long originatingHarvest = olderHarvest; originatingHarvest != null; originatingHarvest = DBUtils 1224 .selectFirstLongValueIfAny(c, "SELECT previoushd FROM fullharvests" 1225 + " WHERE fullharvests.harvest_id=?", originatingHarvest)) { 1226 results.add(originatingHarvest); 1227 } 1228 } catch (SQLException e) { 1229 log.warn("Exception thrown while updating fullharvests.isindexready field: {}", 1230 ExceptionUtils.getSQLExceptionCause(e), e); 1231 } 1232 return results; 1233 } 1234 1235 @Override 1236 public void setIndexIsReady(Long harvestId, boolean newValue) { 1237 if (!isSnapshot(harvestId)) { 1238 throw new NotImplementedException("Not implemented for non snapshot harvests"); 1239 } else { 1240 try ( 1241 Connection c = HarvestDBConnection.get(); 1242 PreparedStatement s = c.prepareStatement( 1243 "UPDATE fullharvests SET isindexready=? WHERE harvest_id=?"); 1244 ) { 1245 s.setBoolean(1, newValue); 1246 s.setLong(2, harvestId); 1247 int rows = s.executeUpdate(); 1248 log.debug(rows + " entries of table fullharvests updated"); 1249 } catch (SQLException e) { 1250 log.warn("Exception thrown while updating fullharvests.isindexready field: {}", 1251 ExceptionUtils.getSQLExceptionCause(e), e); 1252 } 1253 } 1254 } 1255 1256 /* 1257 * Removes the entry in harvest_configs, that binds a certain domainconfiguration to this PartialHarvest. TODO maybe 1258 * update the edition as well. 1259 */ 1260 @Override 1261 public void removeDomainConfiguration(Long harvestId, SparseDomainConfiguration key) { 1262 ArgumentNotValid.checkNotNull(key, "DomainConfigurationKey key"); 1263 if (harvestId == null) { 1264 // Don't need to do anything, if PartialHarvest is not 1265 // yet stored in database 1266 log.warn("No removal of domainConfiguration, " + "as harvestId is null"); 1267 return; 1268 } 1269 Connection connection = HarvestDBConnection.get(); 1270 PreparedStatement s = null; 1271 try { 1272 s = connection.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ? " 1273 + "AND config_id = (SELECT config_id " + " FROM configurations, domains " 1274 + "WHERE domains.name = ? AND configurations.name = ?" 1275 + " AND domains.domain_id = configurations.domain_id)"); 1276 s.setLong(1, harvestId); 1277 s.setString(2, key.getDomainName()); 1278 s.setString(3, key.getConfigurationName()); 1279 s.executeUpdate(); 1280 } catch (SQLException e) { 1281 log.warn("Exception thrown while removing domainconfiguration: {}", ExceptionUtils.getSQLExceptionCause(e), 1282 e); 1283 } finally { 1284 DBUtils.closeStatementIfOpen(s); 1285 DBUtils.rollbackIfNeeded(connection, "removing DomainConfiguration from harvest w/id " + harvestId 1286 + " failed", harvestId); 1287 HarvestDBConnection.release(connection); 1288 } 1289 } 1290 1291 @Override 1292 public void updateNextdate(long harvestId, Date nextdate) { 1293 ArgumentNotValid.checkNotNull(harvestId, "Long harvest ID"); 1294 ArgumentNotValid.checkNotNull(nextdate, "Date nextdate"); 1295 if (harvestId < 0) { 1296 // Don't need to do anything, if PartialHarvest is not 1297 // yet stored in database 1298 return; 1299 } 1300 Connection connection = HarvestDBConnection.get(); 1301 PreparedStatement s = null; 1302 try { 1303 connection.setAutoCommit(false); 1304 s = connection.prepareStatement("UPDATE partialharvests SET nextdate = ? " + "WHERE harvest_id = ?"); 1305 DBUtils.setDateMaybeNull(s, 1, nextdate); 1306 s.setLong(2, harvestId); 1307 s.executeUpdate(); 1308 connection.commit(); 1309 } catch (SQLException e) { 1310 log.warn("Exception thrown while updating nextdate: {}", ExceptionUtils.getSQLExceptionCause(e), e); 1311 } finally { 1312 DBUtils.closeStatementIfOpen(s); 1313 DBUtils.rollbackIfNeeded(connection, "Updating nextdate from", harvestId); 1314 HarvestDBConnection.release(connection); 1315 } 1316 } 1317 1318 @Override 1319 public void addDomainConfiguration(PartialHarvest ph, SparseDomainConfiguration dcKey) { 1320 ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph"); 1321 ArgumentNotValid.checkNotNull(dcKey, "DomainConfigurationKey dcKey"); 1322 1323 try ( 1324 Connection connection = HarvestDBConnection.get(); 1325 PreparedStatement s = connection 1326 .prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) " 1327 + "SELECT ?, config_id FROM configurations, domains " 1328 + "WHERE domains.name = ? AND configurations.name = ?" 1329 + " AND domains.domain_id = configurations.domain_id"); 1330 1331 ) { 1332 s.setLong(1, ph.getOid()); 1333 s.setString(2, dcKey.getDomainName()); 1334 s.setString(3, dcKey.getConfigurationName()); 1335 s.executeUpdate(); 1336 s.close(); 1337 } catch (SQLException e) { 1338 log.warn("Exception thrown while adding domainConfiguration: {}", ExceptionUtils.getSQLExceptionCause(e), e); 1339 } 1340 } 1341 1342 @Override 1343 public void resetDomainConfigurations(PartialHarvest ph, List<DomainConfiguration> dcList) { 1344 ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph"); 1345 ArgumentNotValid.checkNotNull(dcList, "List<DomainConfiguration> dcList"); 1346 1347 try (Connection connection = HarvestDBConnection.get()) { 1348 createHarvestConfigsEntries(connection, ph, ph.getOid()); 1349 } catch (SQLException e) { 1350 log.warn("Exception thrown while resetting domainConfigurations: {}", 1351 ExceptionUtils.getSQLExceptionCause(e), e); 1352 } 1353 } 1354 1355 @Override 1356 public void mapToHarvestChannel(long harvestDefinitionId, HarvestChannel channel) { 1357 ArgumentNotValid.checkNotNull(channel, "HarvestChannel channel"); 1358 1359 try ( 1360 Connection connection = HarvestDBConnection.get(); 1361 PreparedStatement s = connection.prepareStatement( 1362 "UPDATE harvestdefinitions SET channel_id=? WHERE harvest_id=?"); 1363 ) { 1364 s.setLong(1, channel.getId()); 1365 s.setLong(2, harvestDefinitionId); 1366 if (s.executeUpdate() != 1) { 1367 throw new IOFailure("Could not map harvest channel " + channel.getId() + " to harvest definition " 1368 + harvestDefinitionId); 1369 } 1370 s.close(); 1371 } catch (SQLException e) { 1372 log.warn("Exception thrown while mapping to harvest channel: {}", ExceptionUtils.getSQLExceptionCause(e), e); 1373 } 1374 } 1375 1376 /** 1377 * Saves all extended Field values for a HarvestDefinition in the Database. 1378 * 1379 * @param c Connection to Database 1380 * @param h HarvestDefinition where loaded extended Field Values will be set 1381 * @throws SQLException If database errors occur. 1382 */ 1383 private void saveExtendedFieldValues(Connection c, HarvestDefinition h) throws SQLException { 1384 List<ExtendedFieldValue> list = h.getExtendedFieldValues(); 1385 for (int i = 0; i < list.size(); i++) { 1386 ExtendedFieldValue efv = list.get(i); 1387 efv.setInstanceID(h.getOid()); 1388 1389 ExtendedFieldValueDBDAO dao = (ExtendedFieldValueDBDAO) ExtendedFieldValueDAO.getInstance(); 1390 if (efv.getExtendedFieldValueID() != null) { 1391 dao.update(c, efv, false); 1392 } else { 1393 dao.create(c, efv, false); 1394 } 1395 } 1396 } 1397 1398 /** 1399 * Reads all extended Field values from the database for a HarvestDefinition. 1400 * 1401 * @param h HarvestDefinition where loaded extended Field Values will be set 1402 * @throws SQLException If database errors occur. 1403 */ 1404 private void readExtendedFieldValues(HarvestDefinition h) throws SQLException { 1405 h.setExtendedFieldValues(getExtendedFieldValues(h.getOid())); 1406 } 1407 1408 /** 1409 * Reads all extended Field values from the database for a HarvestDefinitionOid. 1410 * 1411 * @param aOid HarvestDefinition where loaded extended Field Values will be set 1412 * @return a list of ExtendedFieldValues belonging to the given harvest oid 1413 * @throws SQLException If database errors occur. 1414 */ 1415 private List<ExtendedFieldValue> getExtendedFieldValues(Long aOid) throws SQLException { 1416 List<ExtendedFieldValue> extendedFieldValues = new ArrayList<ExtendedFieldValue>(); 1417 1418 ExtendedFieldDAO dao = ExtendedFieldDAO.getInstance(); 1419 List<ExtendedField> list = dao.getAll(ExtendedFieldTypes.HARVESTDEFINITION); 1420 1421 for (int i = 0; i < list.size(); i++) { 1422 ExtendedField ef = list.get(i); 1423 1424 ExtendedFieldValueDAO dao2 = ExtendedFieldValueDAO.getInstance(); 1425 ExtendedFieldValue efv = dao2.read(ef.getExtendedFieldID(), aOid); 1426 1427 if (efv == null) { 1428 efv = new ExtendedFieldValue(); 1429 efv.setExtendedFieldID(ef.getExtendedFieldID()); 1430 efv.setInstanceID(aOid); 1431 efv.setContent(new ExtendedFieldDefaultValue(ef.getDefaultValue(), ef.getFormattingPattern(), ef 1432 .getDatatype()).getDBValue()); 1433 } 1434 1435 extendedFieldValues.add(efv); 1436 } 1437 1438 return extendedFieldValues; 1439 } 1440}