001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.sql.Connection;
027import java.sql.PreparedStatement;
028import java.sql.ResultSet;
029import java.sql.SQLException;
030import java.sql.Timestamp;
031import java.sql.Types;
032import java.text.Collator;
033import java.util.ArrayList;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.Date;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.Iterator;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.Set;
044import java.util.StringTokenizer;
045
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049import dk.netarkivet.common.exceptions.ArgumentNotValid;
050import dk.netarkivet.common.exceptions.IOFailure;
051import dk.netarkivet.common.exceptions.IllegalState;
052import dk.netarkivet.common.exceptions.NotImplementedException;
053import dk.netarkivet.common.exceptions.PermissionDenied;
054import dk.netarkivet.common.exceptions.UnknownID;
055import dk.netarkivet.common.utils.DBUtils;
056import dk.netarkivet.common.utils.ExceptionUtils;
057import dk.netarkivet.common.utils.FilterIterator;
058import dk.netarkivet.common.utils.StringUtils;
059import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory;
060import dk.netarkivet.harvester.datamodel.eav.EAV;
061import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType;
062import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedField;
063import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO;
064import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDefaultValue;
065import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes;
066import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue;
067import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDAO;
068import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDBDAO;
069import dk.netarkivet.harvester.webinterface.HarvestStatusQuery;
070
071/**
072 * A database-oriented implementation of the HarvestDefinitionDAO.
073 * <p>
074 * The statements to create the tables are located in:
075 * <ul>
076 * <li><em>Derby:</em> scripts/sql/createfullhddb.sql</li>
077 * <li><em>MySQL:</em> scripts/sql/createfullhddb.mysql</li>
078 * <li><em>PostgreSQL:</em> scripts/postgresql/netarchivesuite_init.sql</li>
079 * </ul>
080 */
081public class HarvestDefinitionDBDAO extends HarvestDefinitionDAO {
082
083    /** The logger. */
084    private static final Logger log = LoggerFactory.getLogger(HarvestDefinitionDBDAO.class);
085
086    /**
087     * Comparator used for sorting the UI list of {@link SparseDomainConfiguration}s. Sorts first by domain name
088     * alphabetical order, next by configuration name.
089     */
090    private static class SparseDomainConfigurationComparator implements Comparator<SparseDomainConfiguration> {
091
092        @Override
093        public int compare(SparseDomainConfiguration sdc1, SparseDomainConfiguration sdc2) {
094            int domComp = sdc1.getDomainName().compareTo(sdc2.getDomainName());
095            if (0 == domComp) {
096                return sdc1.getConfigurationName().compareTo(sdc2.getConfigurationName());
097            }
098            return domComp;
099        }
100    }
101
102    /** Create a new HarvestDefinitionDAO using database. */
103    HarvestDefinitionDBDAO() {
104        Connection connection = HarvestDBConnection.get();
105        try {
106            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.FULLHARVESTS);
107            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTDEFINITIONS);
108            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.PARTIALHARVESTS);
109            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTCONFIGS);
110            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDTYPE);
111            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELD);
112            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDVALUE);
113        } finally {
114            HarvestDBConnection.release(connection);
115        }
116    }
117
118    /**
119     * Create a harvest definition in Database. The harvest definition object should not have its ID set unless we are
120     * in the middle of migrating.
121     *
122     * @param harvestDefinition A new harvest definition to store in the database.
123     * @return The harvestId for the just created harvest definition.
124     * @see HarvestDefinitionDAO#create(HarvestDefinition)
125     */
126    @Override
127    public synchronized Long create(HarvestDefinition harvestDefinition) {
128        Long id = harvestDefinition.getOid();
129        try (Connection connection = HarvestDBConnection.get();) {
130            Date submissiondate = new Date();
131            final int edition = 1;
132            try {
133                if (id == null) {
134                    id = generateNextID(connection);
135                }
136
137                connection.setAutoCommit(false);
138                try ( PreparedStatement s = connection.prepareStatement("INSERT INTO harvestdefinitions "
139                                + "( harvest_id, name, comments, numevents, submitted,  isactive, edition, audience ) "
140                                + "VALUES ( ?, ?, ?, ?, ?, ?, ?,? )");) {
141                    s.setLong(1, id);
142                    DBUtils.setName(s, 2, harvestDefinition, Constants.MAX_NAME_SIZE);
143                    DBUtils.setComments(s, 3, harvestDefinition, Constants.MAX_COMMENT_SIZE);
144                    s.setLong(4, harvestDefinition.getNumEvents());
145                    // Don't set on object, as we may yet rollback
146                    s.setTimestamp(5, new Timestamp(submissiondate.getTime()));
147                    s.setBoolean(6, harvestDefinition.getActive());
148                    s.setLong(7, edition);
149                    s.setString(8, harvestDefinition.getAudience());
150                    s.executeUpdate();
151                }
152                if (harvestDefinition instanceof FullHarvest) {
153                    FullHarvest fh = (FullHarvest) harvestDefinition;
154                    try ( PreparedStatement s = connection.prepareStatement(
155                            "INSERT INTO fullharvests "
156                            + "( harvest_id, maxobjects, maxbytes, maxjobrunningtime, previoushd, isindexready)"
157                            + "VALUES ( ?, ?, ?, ?, ?, ? )"); ) {
158                    s.setLong(1, id);
159                    s.setLong(2, fh.getMaxCountObjects());
160                    s.setLong(3, fh.getMaxBytes());
161                    s.setLong(4, fh.getMaxJobRunningTime());
162                    if (fh.getPreviousHarvestDefinition() != null) {
163                        s.setLong(5, fh.getPreviousHarvestDefinition().getOid());
164                    } else {
165                        s.setNull(5, Types.BIGINT);
166                    }
167                    s.setBoolean(6, fh.getIndexReady());
168                    s.executeUpdate();
169                }
170                } else if (harvestDefinition instanceof PartialHarvest) {
171                    PartialHarvest ph = (PartialHarvest) harvestDefinition;
172                    // Get schedule id
173                    long scheduleId = DBUtils.selectLongValue(connection,
174                            "SELECT schedule_id FROM schedules WHERE name = ?", ph.getSchedule().getName());
175                    try ( PreparedStatement s = connection.prepareStatement("INSERT INTO partialharvests ( harvest_id, schedule_id, nextdate ) "
176                            + "VALUES ( ?, ?, ? )"); ) {
177                        s.setLong(1, id);
178                        s.setLong(2, scheduleId);
179                        DBUtils.setDateMaybeNull(s, 3, ph.getNextDate());
180                        s.executeUpdate();
181                        createHarvestConfigsEntries(connection, ph, id);
182                    }
183                } else {
184                    String message = "Harvest definition " + harvestDefinition + " is of unknown class "
185                            + harvestDefinition.getClass();
186                    log.warn(message);
187                    throw new ArgumentNotValid(message);
188                }
189                connection.commit();
190
191                // Now that we have committed, set new data on object.
192                harvestDefinition.setSubmissionDate(submissiondate);
193                harvestDefinition.setEdition(edition);
194                harvestDefinition.setOid(id);
195
196                // saving after receiving id
197                saveExtendedFieldValues(connection, harvestDefinition);
198
199            } catch (SQLException e) {
200                String message = "SQL error creating harvest definition " + harvestDefinition + " in database" + "\n"
201                        + ExceptionUtils.getSQLExceptionCause(e);
202                log.warn(message, e);
203                throw new IOFailure(message, e);
204            } finally {
205                DBUtils.rollbackIfNeeded(connection, "creating", harvestDefinition);
206            }
207        } catch (SQLException e) {
208            log.error("Unable to close db resources", e);
209        }
210
211        return id;
212    }
213
214    /**
215     * Create the entries in the harvest_configs table that connect PartialHarvests and their configurations.
216     *
217     * @param c harvest definition DB connection
218     * @param ph The harvest to insert entries for.
219     * @param id The id of the harvest -- this may not yet be set on ph
220     * @throws SQLException If a database error occurs during the create process.
221     */
222    private void createHarvestConfigsEntries(Connection c, PartialHarvest ph, long id) throws SQLException {
223        try (PreparedStatement s = c.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ?");) {
224            s.setLong(1, id);
225            s.executeUpdate();
226        }
227        try (PreparedStatement s = c.prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) "
228                + "SELECT ?, config_id FROM configurations, domains "
229                + "WHERE domains.name = ? AND configurations.name = ?"
230                + "  AND domains.domain_id = configurations.domain_id");
231        ) {
232            Iterator<DomainConfiguration> dcs = ph.getDomainConfigurations();
233            while (dcs.hasNext()) {
234                DomainConfiguration dc = dcs.next();
235                s.setLong(1, id);
236                s.setString(2, dc.getDomainName());
237                s.setString(3, dc.getName());
238                s.executeUpdate();
239            }
240        }
241    }
242
243    /**
244     * Generates the next id of a harvest definition. this implementation retrieves the maximum value of harvest_id in
245     * the DB, and returns this value + 1.
246     *
247     * @param c An open connection to the harvestDatabase
248     * @return The next available ID
249     */
250    private synchronized Long generateNextID(Connection c) {
251        Long maxVal = DBUtils.selectLongValue(c, "SELECT max(harvest_id) FROM harvestdefinitions");
252        if (maxVal == null) {
253            maxVal = 0L;
254        }
255        return maxVal + 1L;
256    }
257
258    /**
259     * Read the stored harvest definition for the given ID.
260     *
261     * @param harvestDefinitionID An ID number for a harvest definition
262     * @return A harvest definition that has been read from persistent storage.
263     * @throws UnknownID if no entry with that ID exists in the database
264     * @throws IOFailure If DB-failure occurs?
265     * @see HarvestDefinitionDAO#read(Long)
266     */
267    @Override
268    public synchronized HarvestDefinition read(Long harvestDefinitionID) throws UnknownID, IOFailure {
269        Connection c = HarvestDBConnection.get();
270        try {
271            return read(c, harvestDefinitionID);
272        } finally {
273            HarvestDBConnection.release(c);
274        }
275    }
276
277    /**
278     * Read the stored harvest definition for the given ID.
279     *
280     * @param c The used database connection
281     * @param harvestDefinitionID ID number for a harvest definition
282     * @return A harvest definition that has been read from persistent storage.
283     * @throws UnknownID if no entry with that ID exists in the database
284     * @throws IOFailure If DB-failure occurs?
285     * @see HarvestDefinitionDAO#read(Long)
286     */
287    private HarvestDefinition read(Connection c, Long harvestDefinitionID) throws UnknownID, IOFailure {
288        if (!exists(c, harvestDefinitionID)) {
289            String message = "Unknown harvest definition " + harvestDefinitionID;
290            log.debug(message);
291            throw new UnknownID(message);
292        }
293        log.debug("Reading harvestdefinition w/ id {}", harvestDefinitionID);
294        PreparedStatement s = null;
295        try {
296            s = c.prepareStatement("SELECT name, comments, numevents, submitted, "
297                    + "previoushd, maxobjects, maxbytes, "
298                    + "maxjobrunningtime, isindexready, isactive, edition, audience "
299                    + "FROM harvestdefinitions, fullharvests " + "WHERE harvestdefinitions.harvest_id = ?"
300                    + "  AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id");
301            s.setLong(1, harvestDefinitionID);
302            ResultSet res = s.executeQuery();
303            if (res.next()) {
304                // Found full harvest
305                log.debug("fullharvest found w/id " + harvestDefinitionID);
306                final String name = res.getString(1);
307                final String comments = res.getString(2);
308                final int numEvents = res.getInt(3);
309                final Date submissionDate = new Date(res.getTimestamp(4).getTime());
310                final long maxObjects = res.getLong(6);
311                final long maxBytes = res.getLong(7);
312                final long maxJobRunningtime = res.getLong(8);
313                final boolean isIndexReady = res.getBoolean(9);
314                FullHarvest fh;
315                final long prevhd = res.getLong(5);
316                if (!res.wasNull()) {
317                    fh = new FullHarvest(name, comments, prevhd, maxObjects, maxBytes, maxJobRunningtime, isIndexReady,
318                            DAOProviderFactory.getHarvestDefinitionDAOProvider(),
319                            DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(),
320                            DAOProviderFactory.getDomainDAOProvider());
321                } else {
322                    fh = new FullHarvest(name, comments, null, maxObjects, maxBytes, maxJobRunningtime, isIndexReady,
323                            DAOProviderFactory.getHarvestDefinitionDAOProvider(),
324                            DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(),
325                            DAOProviderFactory.getDomainDAOProvider());
326                }
327                fh.setSubmissionDate(submissionDate);
328                fh.setNumEvents(numEvents);
329                fh.setActive(res.getBoolean(10));
330                fh.setOid(harvestDefinitionID);
331                fh.setEdition(res.getLong(11));
332                fh.setAudience(res.getString(12));
333
334                readExtendedFieldValues(fh);
335
336                // We found a FullHarvest object, just return it.
337                log.debug("Returned FullHarvest object w/ id {}", harvestDefinitionID);
338                return fh;
339            }
340            s.close();
341            // No full harvest with that ID, try selective harvest
342            s = c.prepareStatement("SELECT harvestdefinitions.name," + "       harvestdefinitions.comments,"
343                    + "       harvestdefinitions.numevents," + "       harvestdefinitions.submitted,"
344                    + "       harvestdefinitions.isactive," + "       harvestdefinitions.edition,"
345                    + "       harvestdefinitions.audience," + "       schedules.name,"
346                    + "       partialharvests.nextdate, " + "       harvestdefinitions.channel_id "
347                    + "FROM harvestdefinitions, partialharvests, schedules"
348                    + " WHERE harvestdefinitions.harvest_id = ?" + "   AND harvestdefinitions.harvest_id "
349                    + "= partialharvests.harvest_id" + "   AND schedules.schedule_id "
350                    + "= partialharvests.schedule_id");
351            s.setLong(1, harvestDefinitionID);
352            res = s.executeQuery();
353            boolean foundPartialHarvest = res.next();
354            if (foundPartialHarvest) {
355                log.debug("Partialharvest found w/ id " + harvestDefinitionID);
356                // Have to get configs before creating object, so storing data
357                // here.
358                final String name = res.getString(1);
359                final String comments = res.getString(2);
360                final int numEvents = res.getInt(3);
361                final Date submissionDate = new Date(res.getTimestamp(4).getTime());
362                final boolean active = res.getBoolean(5);
363                final long edition = res.getLong(6);
364                final String audience = res.getString(7);
365                final String scheduleName = res.getString(8);
366                final Date nextDate = DBUtils.getDateMaybeNull(res, 9);
367                final Long channelId = DBUtils.getLongMaybeNull(res, 10);
368                s.close();
369                // Found partial harvest -- have to find configurations.
370                // To avoid holding on to the readlock while getting domains,
371                // we grab the strings first, then look up domains and configs.
372                final DomainDAO domainDao = DomainDAO.getInstance();
373                List<SparseDomainConfiguration> configs = new ArrayList<SparseDomainConfiguration>();
374                s = c.prepareStatement("SELECT domains.name, configurations.name "
375                        + "FROM domains, configurations, harvest_configs " + "WHERE harvest_id = ?"
376                        + "  AND configurations.config_id " + "= harvest_configs.config_id"
377                        + "  AND configurations.domain_id = domains.domain_id");
378                s.setLong(1, harvestDefinitionID);
379                res = s.executeQuery();
380                while (res.next()) {
381                    configs.add(new SparseDomainConfiguration(res.getString(1), res.getString(2)));
382                }
383                s.close();
384                List<DomainConfiguration> configurations = new ArrayList<DomainConfiguration>();
385                for (SparseDomainConfiguration domainConfig : configs) {
386                    configurations.add(domainDao.getDomainConfiguration(domainConfig.getDomainName(),
387                            domainConfig.getConfigurationName()));
388                }
389
390                Schedule schedule = ScheduleDAO.getInstance().read(scheduleName);
391
392                PartialHarvest ph = new PartialHarvest(configurations, schedule, name, comments, audience);
393
394                ph.setNumEvents(numEvents);
395                ph.setSubmissionDate(submissionDate);
396                ph.setActive(active);
397                ph.setEdition(edition);
398                ph.setNextDate(nextDate);
399                ph.setOid(harvestDefinitionID);
400                if (channelId != null) {
401                    ph.setChannelId(channelId);
402                }
403
404                readExtendedFieldValues(ph);
405
406                return ph;
407            } else {
408                throw new IllegalState("No entries in fullharvests or partialharvests found for id "
409                        + harvestDefinitionID);
410            }
411        } catch (SQLException e) {
412            throw new IOFailure("SQL Error while reading harvest definition " + harvestDefinitionID + "\n"
413                    + ExceptionUtils.getSQLExceptionCause(e), e);
414        }
415    }
416
417    /**
418     * Update an existing harvest definition with new info.
419     *
420     * @param hd An updated harvest definition
421     * @see HarvestDefinitionDAO#update(HarvestDefinition)
422     */
423    public synchronized void update(HarvestDefinition hd) {
424        ArgumentNotValid.checkNotNull(hd, "HarvestDefinition hd");
425        if (hd.getOid() == null || !exists(hd.getOid())) {
426            final String message = "Cannot update non-existing " + "harvestdefinition '" + hd.getName() + "'";
427            log.debug(message);
428            throw new PermissionDenied(message);
429        }
430        HarvestDefinition preHD = null;
431        if (hd instanceof FullHarvest) {
432            preHD = ((FullHarvest) hd).getPreviousHarvestDefinition();
433        }
434
435        Connection c = HarvestDBConnection.get();
436        PreparedStatement s = null;
437        try {
438            c.setAutoCommit(false);
439            s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, "
440                    + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? "
441                    + "WHERE harvest_id = ? AND edition = ?");
442            DBUtils.setName(s, 1, hd, Constants.MAX_NAME_SIZE);
443            DBUtils.setComments(s, 2, hd, Constants.MAX_COMMENT_SIZE);
444            s.setInt(3, hd.getNumEvents());
445            s.setTimestamp(4, new Timestamp(hd.getSubmissionDate().getTime()));
446            s.setBoolean(5, hd.getActive());
447            long nextEdition = hd.getEdition() + 1;
448            s.setLong(6, nextEdition);
449            s.setString(7, hd.getAudience());
450            s.setLong(8, hd.getOid());
451            s.setLong(9, hd.getEdition());
452
453            int rows = s.executeUpdate();
454            // Since the HD exists, no rows indicates bad edition
455            if (rows == 0) {
456                String message = "Somebody else must have updated " + hd + " since edition " + hd.getEdition()
457                        + ", not updating";
458                log.debug(message);
459                throw new PermissionDenied(message);
460            }
461            s.close();
462            if (hd instanceof FullHarvest) {
463                FullHarvest fh = (FullHarvest) hd;
464                s = c.prepareStatement("UPDATE fullharvests SET previoushd = ?, " + "maxobjects = ?, "
465                        + "maxbytes = ?, " + "maxjobrunningtime = ?, isindexready = ? " + "WHERE harvest_id = ?");
466                if (preHD != null) {
467                    s.setLong(1, preHD.getOid());
468                } else {
469                    s.setNull(1, Types.BIGINT);
470                }
471                s.setLong(2, fh.getMaxCountObjects());
472                s.setLong(3, fh.getMaxBytes());
473                s.setLong(4, fh.getMaxJobRunningTime());
474                s.setBoolean(5, fh.getIndexReady());
475                s.setLong(6, fh.getOid());
476
477                rows = s.executeUpdate();
478                log.debug("{} fullharvests records updated", rows);
479            } else if (hd instanceof PartialHarvest) {
480                PartialHarvest ph = (PartialHarvest) hd;
481                s = c.prepareStatement("UPDATE partialharvests SET " + "schedule_id = "
482                        + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? "
483                        + "WHERE harvest_id = ?");
484                s.setString(1, ph.getSchedule().getName());
485                DBUtils.setDateMaybeNull(s, 2, ph.getNextDate());
486                s.setLong(3, ph.getOid());
487                rows = s.executeUpdate();
488                log.debug("{} partialharvests records updated", rows);
489                s.close();
490                // FIXME The updates to harvest_configs table should be done
491                // in method removeDomainConfiguration(), and not here.
492                // The following deletes ALL harvest_configs entries for
493                // this PartialHarvest, and creates the entries for the
494                // PartialHarvest again!!
495                createHarvestConfigsEntries(c, ph, ph.getOid());
496            } else {
497                String message = "Harvest definition " + hd + " has unknown class " + hd.getClass();
498                log.warn(message);
499                throw new ArgumentNotValid(message);
500            }
501            saveExtendedFieldValues(c, hd);
502
503            c.commit();
504            hd.setEdition(nextEdition);
505        } catch (SQLException e) {
506            throw new IOFailure("SQL error while updating harvest definition " + hd + "\n"
507                    + ExceptionUtils.getSQLExceptionCause(e), e);
508        } finally {
509            DBUtils.closeStatementIfOpen(s);
510            DBUtils.rollbackIfNeeded(c, "updating", hd);
511            HarvestDBConnection.release(c);
512        }
513    }
514
515    /**
516     * Activates or deactivates a partial harvest definition. This method is actually to be used not to have to read
517     * from the DB big harvest definitions and optimize the activation / deactivation, it is sort of a lightweight
518     * version of update.
519     *
520     * @param harvestDefinition the harvest definition object.
521     */
522    @Override
523    public synchronized void flipActive(SparsePartialHarvest harvestDefinition) {
524        ArgumentNotValid.checkNotNull(harvestDefinition, "HarvestDefinition harvestDefinition");
525
526        Connection c = HarvestDBConnection.get();
527        PreparedStatement s = null;
528        try {
529            if (harvestDefinition.getOid() == null || !exists(c, harvestDefinition.getOid())) {
530                final String message = "Cannot update non-existing " + "harvestdefinition '"
531                        + harvestDefinition.getName() + "'";
532                log.debug(message);
533                throw new PermissionDenied(message);
534            }
535
536            c.setAutoCommit(false);
537            s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, "
538                    + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? "
539                    + "WHERE harvest_id = ? AND edition = ?");
540            DBUtils.setName(s, 1, harvestDefinition, Constants.MAX_NAME_SIZE);
541            DBUtils.setComments(s, 2, harvestDefinition, Constants.MAX_COMMENT_SIZE);
542            s.setInt(3, harvestDefinition.getNumEvents());
543            s.setTimestamp(4, new Timestamp(harvestDefinition.getSubmissionDate().getTime()));
544            s.setBoolean(5, !harvestDefinition.isActive());
545            long nextEdition = harvestDefinition.getEdition() + 1;
546            s.setLong(6, nextEdition);
547            s.setString(7, harvestDefinition.getAudience());
548            s.setLong(8, harvestDefinition.getOid());
549            s.setLong(9, harvestDefinition.getEdition());
550            int rows = s.executeUpdate();
551            // Since the HD exists, no rows indicates bad edition
552            if (rows == 0) {
553                String message = "Somebody else must have updated " + harvestDefinition + " since edition "
554                        + harvestDefinition.getEdition() + ", not updating";
555                log.debug(message);
556                throw new PermissionDenied(message);
557            }
558            s.close();
559
560            // Now pull more strings
561            s = c.prepareStatement("UPDATE partialharvests SET schedule_id = "
562                    + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? "
563                    + "WHERE harvest_id = ?");
564            s.setString(1, harvestDefinition.getScheduleName());
565            DBUtils.setDateMaybeNull(s, 2, harvestDefinition.getNextDate());
566            s.setLong(3, harvestDefinition.getOid());
567            rows = s.executeUpdate();
568            log.debug("{} partialharvests records updated", rows);
569            s.close();
570            c.commit();
571        } catch (SQLException e) {
572            throw new IOFailure("SQL error while updating harvest definition " + harvestDefinition + "\n"
573                    + ExceptionUtils.getSQLExceptionCause(e), e);
574        } finally {
575            DBUtils.rollbackIfNeeded(c, "updating", harvestDefinition);
576            HarvestDBConnection.release(c);
577        }
578    }
579
580    @Override
581    public synchronized boolean exists(String name) {
582        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
583        Connection c = HarvestDBConnection.get();
584        try {
585            return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(harvest_id) "
586                    + "FROM harvestdefinitions WHERE name = ?", name);
587        } finally {
588            HarvestDBConnection.release(c);
589        }
590    }
591
592    @Override
593    public synchronized boolean exists(Long oid) {
594        ArgumentNotValid.checkNotNull(oid, "Long oid");
595        Connection c = HarvestDBConnection.get();
596        try {
597            return exists(c, oid);
598        } finally {
599            HarvestDBConnection.release(c);
600        }
601    }
602
603    /**
604     * Check if a harvestdefinition exists with the given id.
605     *
606     * @param c An open connection to the harvestDatabase
607     * @param oid A potential identifier for a harvestdefinition
608     * @return true If a harvestdefinition exists with the given id.
609     * @see HarvestDefinitionDAO#exists(Long)
610     */
611    private boolean exists(Connection c, Long oid) {
612        return 1 == DBUtils.selectIntValue(c, "SELECT COUNT(harvest_id) "
613                + "FROM harvestdefinitions WHERE harvest_id = ?", oid);
614    }
615
616    /**
617     * Get a list of all existing harvest definitions ordered by name.
618     *
619     * @return An iterator that give the existing harvest definitions in turn
620     */
621    @Override
622    public synchronized Iterator<HarvestDefinition> getAllHarvestDefinitions() {
623        Connection c = HarvestDBConnection.get();
624        try {
625            List<Long> hds = DBUtils.selectLongList(c, "SELECT harvest_id FROM harvestdefinitions ORDER BY name");
626            log.debug("Getting an iterator for all stored harvestdefinitions.");
627
628            List<HarvestDefinition> orderedList = new LinkedList<HarvestDefinition>();
629            for (Long id : hds) {
630                orderedList.add(read(c, id));
631            }
632            return orderedList.iterator();
633        } finally {
634            HarvestDBConnection.release(c);
635        }
636    }
637
638    /**
639     * Gets default configurations for all domains that are not aliases.
640     * <p>
641     * This method currently gives an iterator that reads in all domains, although only on demand, that is: when calling
642     * "hasNext".
643     *
644     * @return Iterator containing the default DomainConfiguration for all domains that are not aliases
645     */
646    @Override
647    public synchronized Iterator<DomainConfiguration> getSnapShotConfigurations() {
648        return new FilterIterator<Domain, DomainConfiguration>(DomainDAO.getInstance()
649                .getAllDomainsInSnapshotHarvestOrder()) {
650            public DomainConfiguration filter(Domain domain) {
651                if (domain.getAliasInfo() == null || domain.getAliasInfo().isExpired()) {
652                    return domain.getDefaultConfiguration();
653                } else {
654                    return null;
655                }
656            }
657        };
658    }
659
660    /**
661     * Returns a list of IDs of harvest definitions that are ready to be scheduled.
662     *
663     * @param now The current date
664     * @return List of ready harvest definitions. No check is performed for whether these are already in the middle of
665     * being scheduled.
666     */
667    @Override
668    public Iterable<Long> getReadyHarvestDefinitions(Date now) {
669        ArgumentNotValid.checkNotNull(now, "Date now");
670        Connection connection = HarvestDBConnection.get();
671        try {
672            List<Long> ids = DBUtils.selectLongList(connection, "SELECT fullharvests.harvest_id"
673                    + " FROM fullharvests, harvestdefinitions"
674                    + " WHERE harvestdefinitions.harvest_id = fullharvests.harvest_id" + " AND isactive = ? "
675                    + " AND numevents < 1  AND isindexready = ?", true, true);
676            ids.addAll(DBUtils.selectLongList(connection, "SELECT partialharvests.harvest_id"
677                    + " FROM partialharvests, harvestdefinitions"
678                    + " WHERE harvestdefinitions.harvest_id = partialharvests.harvest_id"
679                    + " AND isactive = ? AND nextdate IS NOT NULL AND nextdate < ?", true, now));
680            Set<Long> distinctIds = new HashSet<>();
681            distinctIds.addAll(ids);
682            if (distinctIds.size() != ids.size()) {
683                log.warn("Query returned multiple identical ids {}. These have been sanitized.", ids);
684                return distinctIds;
685            } else {
686                return ids;
687            }
688        } finally {
689            HarvestDBConnection.release(connection);
690        }
691    }
692
693    /**
694     * Get the harvest definition that has the given name, if any.
695     *
696     * @param name The name of a harvest definition.
697     * @return The HarvestDefinition object with that name, or null if none has that name.
698     */
699    @Override
700    public synchronized HarvestDefinition getHarvestDefinition(String name) {
701        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
702        log.debug("Reading harvestdefinition w/ name '{}'", name);
703        Connection c = HarvestDBConnection.get();
704        PreparedStatement s = null;
705        try {
706            s = c.prepareStatement("SELECT harvest_id FROM harvestdefinitions WHERE name = ?");
707            s.setString(1, name);
708            ResultSet res = s.executeQuery();
709            if (res.next()) {
710                long harvestDefinitionID = res.getLong(1);
711                s.close();
712                return read(c, harvestDefinitionID);
713            }
714            return null;
715        } catch (SQLException e) {
716            throw new IOFailure("SQL error while getting HD by name" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e);
717        } finally {
718            DBUtils.closeStatementIfOpen(s);
719            HarvestDBConnection.release(c);
720        }
721    }
722
723    @Override
724    public List<HarvestRunInfo> getHarvestRunInfo(long harvestID) {
725        Connection c = HarvestDBConnection.get();
726        PreparedStatement s = null;
727        try {
728            ResultSet res = null;
729            Map<Integer, HarvestRunInfo> runInfos = new HashMap<Integer, HarvestRunInfo>();
730            List<HarvestRunInfo> infoList = new ArrayList<HarvestRunInfo>();
731
732            // Select dates and counts for all different statues
733            // for each run
734            s = c.prepareStatement("SELECT name, harvest_num, status, MIN(startdate), MAX(enddate), COUNT(job_id)"
735                    + "  FROM jobs, harvestdefinitions"
736                    + " WHERE harvestdefinitions.harvest_id = ?   AND jobs.harvest_id = harvestdefinitions.harvest_id"
737                    + " GROUP BY name, harvest_num, status ORDER BY harvest_num DESC");
738            s.setLong(1, harvestID);
739            res = s.executeQuery();
740            while (res.next()) {
741                int runNr = res.getInt(2);
742                HarvestRunInfo info = runInfos.get(runNr);
743                if (info == null) {
744                    String name = res.getString(1);
745                    info = new HarvestRunInfo(harvestID, name, runNr);
746                    // Put into hash for easy access when updating.
747                    runInfos.put(runNr, info);
748                    // Add to return list in order given by DB
749                    infoList.add(info);
750                }
751                JobStatus status = JobStatus.fromOrdinal(res.getInt(3));
752                // For started states, check start date
753                if (status != JobStatus.NEW && status != JobStatus.SUBMITTED && status != JobStatus.RESUBMITTED) {
754                    Date startDate = DBUtils.getDateMaybeNull(res, 4);
755                    if (info.getStartDate() == null || (startDate != null && startDate.before(info.getStartDate()))) {
756                        info.setStartDate(startDate);
757                    }
758                }
759                // For finished jobs, check end date
760                if (status == JobStatus.DONE || status == JobStatus.FAILED) {
761                    Date endDate = DBUtils.getDateMaybeNull(res, 5);
762                    if (info.getEndDate() == null || (endDate != null && endDate.after(info.getEndDate()))) {
763                        info.setEndDate(endDate);
764                    }
765                }
766                int count = res.getInt(6);
767                info.setStatusCount(status, count);
768            }
769            s.close();
770            s = c.prepareStatement("SELECT jobs.harvest_num, SUM(historyinfo.bytecount), "
771                    + "SUM(historyinfo.objectcount)," + "COUNT(jobs.status)" + " FROM jobs, historyinfo "
772                    + " WHERE jobs.harvest_id = ? AND historyinfo.job_id = jobs.job_id" + " GROUP BY jobs.harvest_num"
773                    + " ORDER BY jobs.harvest_num");
774            s.setLong(1, harvestID);
775            res = s.executeQuery();
776
777            while (res.next()) {
778                final int harvestNum = res.getInt(1);
779                HarvestRunInfo info = runInfos.get(harvestNum);
780                if (info != null) {
781                    info.setBytesHarvested(res.getLong(2));
782                    info.setDocsHarvested(res.getLong(3));
783                } else {
784                    log.debug("Harvestnum {} for harvestID {} is skipped. Must have arrived between selects",
785                            harvestNum, harvestID);
786                }
787            }
788
789            // Make sure that jobs that aren't really done don't have end date.
790            for (HarvestRunInfo info : infoList) {
791                if (info.getJobCount(JobStatus.STARTED) != 0 || info.getJobCount(JobStatus.NEW) != 0
792                        || info.getJobCount(JobStatus.SUBMITTED) != 0) {
793                    info.setEndDate(null);
794                }
795            }
796            return infoList;
797        } catch (SQLException e) {
798            String message = "SQL error asking for harvest run info on " + harvestID + " in database" + "\n"
799                    + ExceptionUtils.getSQLExceptionCause(e);
800            log.warn(message, e);
801            throw new IOFailure(message, e);
802        } finally {
803            DBUtils.closeStatementIfOpen(s);
804            HarvestDBConnection.release(c);
805        }
806    }
807
808    /**
809     * Get all domain,configuration pairs for a harvest definition in sparse version for GUI purposes.
810     *
811     * @param harvestDefinitionID The ID of the harvest definition.
812     * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions.
813     * @throws ArgumentNotValid on null argument.
814     */
815    @Override
816    public List<SparseDomainConfiguration> getSparseDomainConfigurations(Long harvestDefinitionID) {
817        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
818        Connection c = HarvestDBConnection.get();
819        try {
820            return getSparseDomainConfigurations(c, harvestDefinitionID);
821        } finally {
822            HarvestDBConnection.release(c);
823        }
824    }
825
826    /**
827     * Get all domain,configuration pairs for a harvest definition in sparse version.
828     *
829     * @param c a connection to the harvest database
830     * @param harvestDefinitionID The ID of the harvest definition.
831     * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions.
832     */
833    private List<SparseDomainConfiguration> getSparseDomainConfigurations(Connection c, Long harvestDefinitionID) {
834        try (PreparedStatement s = c
835                .prepareStatement("SELECT domains.name, configurations.name " + "FROM domains, configurations,"
836                        + " harvest_configs "
837                        + "WHERE harvest_id = ?  AND configurations.config_id = harvest_configs.config_id"
838                        + " AND configurations.domain_id = domains.domain_id");
839        ) {
840            s.setLong(1, harvestDefinitionID);
841            ResultSet res = s.executeQuery();
842            List<SparseDomainConfiguration> resultList = new ArrayList<SparseDomainConfiguration>();
843            while (res.next()) {
844                SparseDomainConfiguration sdc = new SparseDomainConfiguration(res.getString(1), res.getString(2));
845                resultList.add(sdc);
846            }
847
848            Collections.sort(resultList, new SparseDomainConfigurationComparator());
849            return resultList;
850        } catch (SQLException e) {
851            throw new IOFailure("SQL error getting sparse domains" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e);
852        }
853    }
854
855    /**
856     * Get all sparse versions of partial harvests for GUI purposes ordered by name.
857     *
858     * @return An iterable (possibly empty) of SparsePartialHarvests
859     */
860    public Iterable<SparsePartialHarvest> getSparsePartialHarvestDefinitions(boolean excludeInactive) {
861        String query = "SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.name,"
862                + "       harvestdefinitions.comments," + "       harvestdefinitions.numevents,"
863                + "       harvestdefinitions.submitted," + "       harvestdefinitions.isactive,"
864                + "       harvestdefinitions.edition," + "       schedules.name," + "       partialharvests.nextdate, "
865                + "       harvestdefinitions.audience, " + "       harvestdefinitions.channel_id "
866                + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id "
867                + "       = partialharvests.harvest_id" + " AND (harvestdefinitions.isactive "
868                + " = ?"
869                // This linie is duplicated to allow to select both active
870                // and inactive HD's.
871                + " OR harvestdefinitions" + ".isactive " + " = ?)" + "   AND schedules.schedule_id "
872                + "       = partialharvests.schedule_id " + "ORDER BY harvestdefinitions.name";
873        try (
874                Connection c = HarvestDBConnection.get();
875                PreparedStatement s = DBUtils.prepareStatement(c, query, true, excludeInactive);
876        ) {
877            ResultSet res = s.executeQuery();
878            List<SparsePartialHarvest> harvests = new ArrayList<SparsePartialHarvest>();
879            while (res.next()) {
880                SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), res.getString(2), res.getString(3),
881                        res.getInt(4), new Date(res.getTimestamp(5).getTime()), res.getBoolean(6), res.getLong(7),
882                        res.getString(8), DBUtils.getDateMaybeNull(res, 9), res.getString(10),
883                        DBUtils.getLongMaybeNull(res, 11));
884                harvests.add(sph);
885            }
886            return harvests;
887        } catch (SQLException e) {
888            throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e);
889        }
890    }
891
892    /**
893     * Get a sparse version of a partial harvest for GUI purposes.
894     *
895     * @param harvestName Name of harvest definition.
896     * @return Sparse version of partial harvest or null for none.
897     * @throws ArgumentNotValid on null or empty name.
898     */
899    @Override
900    public SparsePartialHarvest getSparsePartialHarvest(String harvestName) {
901        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
902        try (
903                Connection c = HarvestDBConnection.get();
904                PreparedStatement s = c
905                        .prepareStatement("SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.comments,"
906                                + "       harvestdefinitions.numevents," + "       harvestdefinitions.submitted,"
907                                + "       harvestdefinitions.isactive," + "       harvestdefinitions.edition,"
908                                + "       schedules.name," + "       partialharvests.nextdate, "
909                                + "       harvestdefinitions.audience, " + "       harvestdefinitions.channel_id "
910                                + "FROM harvestdefinitions, partialharvests, schedules"
911                                + " WHERE harvestdefinitions.name = ?"
912                                + "   AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id"
913                                + "   AND schedules.schedule_id " + "= partialharvests.schedule_id");
914        ) {
915            s.setString(1, harvestName);
916            ResultSet res = s.executeQuery();
917            if (res.next()) {
918                SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), harvestName, res.getString(2),
919                        res.getInt(3), new Date(res.getTimestamp(4).getTime()), res.getBoolean(5), res.getLong(6),
920                        res.getString(7), DBUtils.getDateMaybeNull(res, 8), res.getString(9), DBUtils.getLongMaybeNull(
921                        res, 10));
922                sph.setExtendedFieldValues(getExtendedFieldValues(sph.getOid()));
923                return sph;
924            } else {
925                return null;
926            }
927        } catch (SQLException e) {
928            throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e);
929        }
930    }
931
932    /**
933     * Get all sparse versions of full harvests for GUI purposes.
934     *
935     * @return An iterable (possibly empty) of SparseFullHarvests
936     */
937    public Iterable<SparseFullHarvest> getAllSparseFullHarvestDefinitions() {
938        try (
939                Connection c = HarvestDBConnection.get();
940                PreparedStatement s = c
941                        .prepareStatement("SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.name,"
942                                + "       harvestdefinitions.comments," + "       harvestdefinitions.numevents,"
943                                + "       harvestdefinitions.isactive," + "       harvestdefinitions.edition,"
944                                + "       fullharvests.maxobjects," + "       fullharvests.maxbytes,"
945                                + "       fullharvests.maxjobrunningtime," + "       fullharvests.previoushd, "
946                                + "       harvestdefinitions.channel_id " + "FROM harvestdefinitions, fullharvests"
947                                + " WHERE harvestdefinitions.harvest_id " + "       = fullharvests.harvest_id"
948                                + " ORDER BY harvestdefinitions.name");
949        ) {
950            ResultSet res = s.executeQuery();
951            List<SparseFullHarvest> harvests = new ArrayList<SparseFullHarvest>();
952            while (res.next()) {
953                SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), res.getString(2), res.getString(3),
954                        res.getInt(4), res.getBoolean(5), res.getLong(6), res.getLong(7), res.getLong(8),
955                        res.getLong(9), DBUtils.getLongMaybeNull(res, 10), DBUtils.getLongMaybeNull(res, 11));
956                // EAV
957                long oid = sfh.getOid();
958                List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)oid);
959                sfh.setAttributesAndTypes(attributesAndTypes);
960                harvests.add(sfh);
961            }
962            return harvests;
963        } catch (SQLException e) {
964            throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e);
965        }
966    }
967
968    /**
969     * Get the name of a harvest given its ID.
970     *
971     * @param harvestDefinitionID The ID of a harvest
972     * @return The name of the given harvest.
973     * @throws ArgumentNotValid on null argument
974     * @throws UnknownID if no harvest has the given ID.
975     * @throws IOFailure on any other error talking to the database
976     */
977    @Override
978    public String getHarvestName(Long harvestDefinitionID) {
979        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
980        try (
981                Connection c = HarvestDBConnection.get();
982                PreparedStatement s = c.prepareStatement("SELECT name FROM harvestdefinitions WHERE harvest_id = ?");
983        ) {
984            s.setLong(1, harvestDefinitionID);
985            ResultSet res = s.executeQuery();
986            String name = null;
987            while (res.next()) {
988                if (name != null) {
989                    throw new IOFailure("Found more than one name for harvest definition " + harvestDefinitionID
990                            + ": '" + name + "' and '" + res.getString(1) + "'");
991                }
992                name = res.getString(1);
993            }
994            if (name == null) {
995                throw new UnknownID("No name found for harvest definition " + harvestDefinitionID);
996            }
997            return name;
998        } catch (SQLException e) {
999            throw new IOFailure("An error occurred finding the name for " + "harvest definition " + harvestDefinitionID
1000                    + "\n" + ExceptionUtils.getSQLExceptionCause(e), e);
1001        }
1002    }
1003
1004    /**
1005     * Get whether a given harvest is a snapshot or selective harvest.
1006     *
1007     * @param harvestDefinitionID ID of a harvest
1008     * @return True if the given harvest is a snapshot harvest, false otherwise.
1009     * @throws ArgumentNotValid on null argument
1010     * @throws UnknownID if no harvest has the given ID.
1011     */
1012    @Override
1013    public boolean isSnapshot(Long harvestDefinitionID) {
1014        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
1015        try (Connection connection = HarvestDBConnection.get();) {
1016            boolean isSnapshot = DBUtils.selectAny(connection,
1017                    "SELECT harvest_id FROM fullharvests WHERE harvest_id = ?", harvestDefinitionID);
1018            if (isSnapshot) {
1019                return true;
1020            }
1021            boolean isSelective = DBUtils.selectAny(connection, "SELECT harvest_id FROM partialharvests "
1022                    + "WHERE harvest_id = ?", harvestDefinitionID);
1023            if (isSelective) {
1024                return false;
1025            }
1026            throw new UnknownID("Failed to find harvest definition with id " + harvestDefinitionID);
1027        } catch (SQLException e) {
1028            throw new RuntimeException("Unable to close DB connection");
1029        }
1030    }
1031
1032    /**
1033     * Get a sparse version of a full harvest for GUI purposes.
1034     *
1035     * @param harvestName Name of harvest definition.
1036     * @return Sparse version of full harvest or null for none.
1037     * @throws ArgumentNotValid on null or empty name.
1038     * @throws UnknownID if no harvest has the given ID.
1039     * @throws IOFailure on any other error talking to the database
1040     */
1041    @Override
1042    public SparseFullHarvest getSparseFullHarvest(String harvestName) {
1043        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1044        try (Connection c = HarvestDBConnection.get();
1045                PreparedStatement s = c
1046                        .prepareStatement("SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.comments,"
1047                                + "       harvestdefinitions.numevents," + "       harvestdefinitions.isactive,"
1048                                + "       harvestdefinitions.edition," + "       fullharvests.maxobjects,"
1049                                + "       fullharvests.maxbytes," + "       fullharvests.maxjobrunningtime,"
1050                                + "       fullharvests.previoushd, " + "       harvestdefinitions.channel_id "
1051                                + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.name = ?"
1052                                + "   AND harvestdefinitions.harvest_id " + "       = fullharvests.harvest_id");
1053        ) {
1054            s.setString(1, harvestName);
1055            ResultSet res = s.executeQuery();
1056            if (res.next()) {
1057                SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), harvestName, res.getString(2),
1058                        res.getInt(3), res.getBoolean(4), res.getLong(5), res.getLong(6), res.getLong(7),
1059                        res.getLong(8), DBUtils.getLongMaybeNull(res, 9), DBUtils.getLongMaybeNull(res, 10));
1060                // EAV
1061                long oid = sfh.getOid();
1062                List<AttributeAndType> attributesAndTypes = EAV.getInstance().getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)oid);
1063                sfh.setAttributesAndTypes(attributesAndTypes);
1064                sfh.setExtendedFieldValues(getExtendedFieldValues(sfh.getOid()));
1065                return sfh;
1066            } else {
1067                return null;
1068            }
1069        } catch (SQLException e) {
1070            throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e);
1071        }
1072    }
1073
1074    /**
1075     * Get a sorted list of all domain names of a HarvestDefinition.
1076     *
1077     * @param harvestName of HarvestDefinition
1078     * @return List of all domains of the HarvestDefinition.
1079     */
1080    @Override
1081    public List<String> getListOfDomainsOfHarvestDefinition(String harvestName) {
1082        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1083        try (Connection c = HarvestDBConnection.get();
1084                PreparedStatement s = c.prepareStatement(
1085                        // Note: the DISTINCT below is put in deliberately to fix
1086                        // bug 1878: Seeds for domain is shown twice on page
1087                        // History/Harveststatus-seeds.jsp
1088                        "SELECT DISTINCT domains.name" + " FROM     domains," + "          configurations,"
1089                                + "          harvest_configs," + "          harvestdefinitions"
1090                                + " WHERE    configurations.domain_id = domains.domain_id"
1091                                + " AND harvest_configs.config_id = "
1092                                + "configurations.config_id" + " AND harvest_configs.harvest_id = "
1093                                + "harvestdefinitions.harvest_id" + " AND harvestdefinitions.name = ?"
1094                                + " ORDER BY domains.name");
1095        ) {
1096            s.setString(1, harvestName);
1097            ResultSet res = s.executeQuery();
1098            List<String> domains = new ArrayList<String>();
1099
1100            while (res.next()) {
1101                domains.add(res.getString(1));
1102            }
1103            return domains;
1104        } catch (SQLException e) {
1105            throw new IOFailure("SQL error getting seeds of a domain of a harvest definition" + "\n"
1106                    + ExceptionUtils.getSQLExceptionCause(e), e);
1107        }
1108    }
1109
1110    /**
1111     * Get a sorted list of all seeds of a Domain in a HarvestDefinition.
1112     *
1113     * @param harvestName of HarvestDefinition
1114     * @param domainName of Domain
1115     * @return List of all seeds of the Domain in the HarvestDefinition.
1116     */
1117    @Override
1118    public List<String> getListOfSeedsOfDomainOfHarvestDefinition(String harvestName, String domainName) {
1119        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1120        ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
1121        try (Connection c = HarvestDBConnection.get();
1122                PreparedStatement s = c.prepareStatement(
1123                        "SELECT seedlists.seeds" + " FROM   configurations," + "        harvest_configs,"
1124                                + "        harvestdefinitions," + "        seedlists," + "        config_seedlists,"
1125                                + "        domains" + " WHERE  config_seedlists.seedlist_id " + "= seedlists.seedlist_id"
1126                                + " AND configurations.config_id " + "= config_seedlists.config_id"
1127                                + " AND configurations.config_id " + "= harvest_configs.config_id"
1128                                + " AND harvest_configs.harvest_id " + "= harvestdefinitions.harvest_id"
1129                                + " AND configurations.domain_id = domains.domain_id" + " AND domains.name = ?"
1130                                + " AND harvestdefinitions.name = ?");
1131        ) {
1132            s.setString(1, domainName);
1133            s.setString(2, harvestName);
1134            ResultSet res = s.executeQuery();
1135            List<String> seeds = new ArrayList<String>();
1136
1137            while (res.next()) {
1138                String seedsOfDomain = res.getString(1);
1139
1140                StringTokenizer st = new StringTokenizer(seedsOfDomain, "\n");
1141
1142                while (st.hasMoreTokens()) {
1143                    String seed = st.nextToken();
1144
1145                    boolean isDuplicate = false;
1146                    for (String entry : seeds) {
1147                        if (entry.equals(seed)) {
1148                            isDuplicate = true;
1149                            break;
1150                        }
1151                    }
1152                    if (!isDuplicate) { // duplicates will not be added
1153                        seeds.add(seed);
1154                    }
1155                }
1156            }
1157
1158            Collections.sort(seeds, Collator.getInstance());
1159
1160            return seeds;
1161        } catch (SQLException e) {
1162            throw new IOFailure("SQL error getting seeds of a domain\n" + ExceptionUtils.getSQLExceptionCause(e), e);
1163        }
1164    }
1165
1166    @Override
1167    public Set<Long> getJobIdsForSnapshotDeduplicationIndex(Long harvestId) {
1168        ArgumentNotValid.checkNotNull(harvestId, "Long harvestId");
1169        Set<Long> jobIds = new HashSet<Long>();
1170        if (!isSnapshot(harvestId)) {
1171            throw new NotImplementedException("This functionality only works for snapshot harvests");
1172        }
1173        List<Long> harvestDefinitions = getPreviousFullHarvests(harvestId);
1174        try (Connection c = HarvestDBConnection.get();) {
1175            List<Long> jobs = new ArrayList<Long>();
1176            if (!harvestDefinitions.isEmpty()) {
1177                // Select all jobs from a given list of harvest definitions
1178                jobs.addAll(DBUtils.selectLongList(c, "SELECT jobs.job_id FROM jobs WHERE jobs.harvest_id IN ("
1179                        + StringUtils.conjoin(",", harvestDefinitions) + ")"));
1180            }
1181            jobIds.addAll(jobs);
1182        } catch (SQLException e) {
1183            log.error("Unable to close DB connection", e);
1184        }
1185
1186        return jobIds;
1187    }
1188
1189    /**
1190     * Get list of harvests previous to this one.
1191     *
1192     * @param thisHarvest The id of this harvestdefinition
1193     * @return a list of IDs belonging to harvests previous to this one.
1194     */
1195    private List<Long> getPreviousFullHarvests(Long thisHarvest) {
1196        List<Long> results = new ArrayList<Long>();
1197        try (Connection c = HarvestDBConnection.get();) {
1198            // Follow the chain of originating IDs back
1199            for (Long originatingHarvest = thisHarvest; originatingHarvest != null;
1200                // Compute next originatingHarvest
1201                 originatingHarvest = DBUtils.selectFirstLongValueIfAny(c, "SELECT previoushd FROM fullharvests"
1202                         + " WHERE fullharvests.harvest_id=?", originatingHarvest)) {
1203                if (!originatingHarvest.equals(thisHarvest)) {
1204                    results.add(originatingHarvest);
1205                }
1206            }
1207
1208            // Find the first harvest in the chain (but last in the list).
1209            Long firstHarvest = thisHarvest;
1210            if (!results.isEmpty()) {
1211                firstHarvest = results.get(results.size() - 1);
1212            }
1213
1214            // Find the last harvest in the chain before
1215            Long olderHarvest = DBUtils.selectFirstLongValueIfAny(c, "SELECT fullharvests.harvest_id"
1216                            + " FROM fullharvests, harvestdefinitions," + "  harvestdefinitions AS currenthd"
1217                            + " WHERE currenthd.harvest_id=?" + " AND fullharvests.harvest_id "
1218                            + "= harvestdefinitions.harvest_id"
1219                            + " AND harvestdefinitions.submitted " + "< currenthd.submitted"
1220                            + " ORDER BY harvestdefinitions.submitted " + HarvestStatusQuery.SORT_ORDER.DESC.name(),
1221                    firstHarvest);
1222            // Follow the chain of originating IDs back
1223            for (Long originatingHarvest = olderHarvest; originatingHarvest != null; originatingHarvest = DBUtils
1224                    .selectFirstLongValueIfAny(c, "SELECT previoushd FROM fullharvests"
1225                            + " WHERE fullharvests.harvest_id=?", originatingHarvest)) {
1226                results.add(originatingHarvest);
1227            }
1228        } catch (SQLException e) {
1229            log.warn("Exception thrown while updating fullharvests.isindexready field: {}",
1230                    ExceptionUtils.getSQLExceptionCause(e), e);
1231        }
1232        return results;
1233    }
1234
1235    @Override
1236    public void setIndexIsReady(Long harvestId, boolean newValue) {
1237        if (!isSnapshot(harvestId)) {
1238            throw new NotImplementedException("Not implemented for non snapshot harvests");
1239        } else {
1240            try (
1241                    Connection c = HarvestDBConnection.get();
1242                    PreparedStatement s = c.prepareStatement(
1243                            "UPDATE fullharvests SET isindexready=? WHERE harvest_id=?");
1244            ) {
1245                s.setBoolean(1, newValue);
1246                s.setLong(2, harvestId);
1247                int rows = s.executeUpdate();
1248                log.debug(rows + " entries of table fullharvests updated");
1249            } catch (SQLException e) {
1250                log.warn("Exception thrown while updating fullharvests.isindexready field: {}",
1251                        ExceptionUtils.getSQLExceptionCause(e), e);
1252            }
1253        }
1254    }
1255
1256    /*
1257     * Removes the entry in harvest_configs, that binds a certain domainconfiguration to this PartialHarvest. TODO maybe
1258     * update the edition as well.
1259     */
1260    @Override
1261    public void removeDomainConfiguration(Long harvestId, SparseDomainConfiguration key) {
1262        ArgumentNotValid.checkNotNull(key, "DomainConfigurationKey key");
1263        if (harvestId == null) {
1264            // Don't need to do anything, if PartialHarvest is not
1265            // yet stored in database
1266            log.warn("No removal of domainConfiguration, " + "as harvestId is null");
1267            return;
1268        }
1269        Connection connection = HarvestDBConnection.get();
1270        PreparedStatement s = null;
1271        try {
1272            s = connection.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ? "
1273                    + "AND config_id = (SELECT config_id " + " FROM configurations, domains "
1274                    + "WHERE domains.name = ? AND configurations.name = ?"
1275                    + "  AND domains.domain_id = configurations.domain_id)");
1276            s.setLong(1, harvestId);
1277            s.setString(2, key.getDomainName());
1278            s.setString(3, key.getConfigurationName());
1279            s.executeUpdate();
1280        } catch (SQLException e) {
1281            log.warn("Exception thrown while removing domainconfiguration: {}", ExceptionUtils.getSQLExceptionCause(e),
1282                    e);
1283        } finally {
1284            DBUtils.closeStatementIfOpen(s);
1285            DBUtils.rollbackIfNeeded(connection, "removing DomainConfiguration from harvest w/id " + harvestId
1286                    + " failed", harvestId);
1287            HarvestDBConnection.release(connection);
1288        }
1289    }
1290
1291    @Override
1292    public void updateNextdate(long harvestId, Date nextdate) {
1293        ArgumentNotValid.checkNotNull(harvestId, "Long harvest ID");
1294        ArgumentNotValid.checkNotNull(nextdate, "Date nextdate");
1295        if (harvestId < 0) {
1296            // Don't need to do anything, if PartialHarvest is not
1297            // yet stored in database
1298            return;
1299        }
1300        Connection connection = HarvestDBConnection.get();
1301        PreparedStatement s = null;
1302        try {
1303            connection.setAutoCommit(false);
1304            s = connection.prepareStatement("UPDATE partialharvests SET nextdate = ? " + "WHERE harvest_id = ?");
1305            DBUtils.setDateMaybeNull(s, 1, nextdate);
1306            s.setLong(2, harvestId);
1307            s.executeUpdate();
1308            connection.commit();
1309        } catch (SQLException e) {
1310            log.warn("Exception thrown while updating nextdate: {}", ExceptionUtils.getSQLExceptionCause(e), e);
1311        } finally {
1312            DBUtils.closeStatementIfOpen(s);
1313            DBUtils.rollbackIfNeeded(connection, "Updating nextdate from", harvestId);
1314            HarvestDBConnection.release(connection);
1315        }
1316    }
1317
1318    @Override
1319    public void addDomainConfiguration(PartialHarvest ph, SparseDomainConfiguration dcKey) {
1320        ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph");
1321        ArgumentNotValid.checkNotNull(dcKey, "DomainConfigurationKey dcKey");
1322
1323        try (
1324                Connection connection = HarvestDBConnection.get();
1325                PreparedStatement s = connection
1326                        .prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) "
1327                                + "SELECT ?, config_id FROM configurations, domains "
1328                                + "WHERE domains.name = ? AND configurations.name = ?"
1329                                + "  AND domains.domain_id = configurations.domain_id");
1330
1331        ) {
1332            s.setLong(1, ph.getOid());
1333            s.setString(2, dcKey.getDomainName());
1334            s.setString(3, dcKey.getConfigurationName());
1335            s.executeUpdate();
1336            s.close();
1337        } catch (SQLException e) {
1338            log.warn("Exception thrown while adding domainConfiguration: {}", ExceptionUtils.getSQLExceptionCause(e), e);
1339        }
1340    }
1341
1342    @Override
1343    public void resetDomainConfigurations(PartialHarvest ph, List<DomainConfiguration> dcList) {
1344        ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph");
1345        ArgumentNotValid.checkNotNull(dcList, "List<DomainConfiguration> dcList");
1346
1347        try (Connection connection = HarvestDBConnection.get()) {
1348            createHarvestConfigsEntries(connection, ph, ph.getOid());
1349        } catch (SQLException e) {
1350            log.warn("Exception thrown while resetting domainConfigurations: {}",
1351                    ExceptionUtils.getSQLExceptionCause(e), e);
1352        }
1353    }
1354
1355    @Override
1356    public void mapToHarvestChannel(long harvestDefinitionId, HarvestChannel channel) {
1357        ArgumentNotValid.checkNotNull(channel, "HarvestChannel channel");
1358
1359        try (
1360                Connection connection = HarvestDBConnection.get();
1361                PreparedStatement s = connection.prepareStatement(
1362                        "UPDATE harvestdefinitions SET channel_id=? WHERE harvest_id=?");
1363        ) {
1364            s.setLong(1, channel.getId());
1365            s.setLong(2, harvestDefinitionId);
1366            if (s.executeUpdate() != 1) {
1367                throw new IOFailure("Could not map harvest channel " + channel.getId() + " to harvest definition "
1368                        + harvestDefinitionId);
1369            }
1370            s.close();
1371        } catch (SQLException e) {
1372            log.warn("Exception thrown while mapping to harvest channel: {}", ExceptionUtils.getSQLExceptionCause(e), e);
1373        }
1374    }
1375
1376    /**
1377     * Saves all extended Field values for a HarvestDefinition in the Database.
1378     *
1379     * @param c Connection to Database
1380     * @param h HarvestDefinition where loaded extended Field Values will be set
1381     * @throws SQLException If database errors occur.
1382     */
1383    private void saveExtendedFieldValues(Connection c, HarvestDefinition h) throws SQLException {
1384        List<ExtendedFieldValue> list = h.getExtendedFieldValues();
1385        for (int i = 0; i < list.size(); i++) {
1386            ExtendedFieldValue efv = list.get(i);
1387            efv.setInstanceID(h.getOid());
1388
1389            ExtendedFieldValueDBDAO dao = (ExtendedFieldValueDBDAO) ExtendedFieldValueDAO.getInstance();
1390            if (efv.getExtendedFieldValueID() != null) {
1391                dao.update(c, efv, false);
1392            } else {
1393                dao.create(c, efv, false);
1394            }
1395        }
1396    }
1397
1398    /**
1399     * Reads all extended Field values from the database for a HarvestDefinition.
1400     *
1401     * @param h HarvestDefinition where loaded extended Field Values will be set
1402     * @throws SQLException If database errors occur.
1403     */
1404    private void readExtendedFieldValues(HarvestDefinition h) throws SQLException {
1405        h.setExtendedFieldValues(getExtendedFieldValues(h.getOid()));
1406    }
1407
1408    /**
1409     * Reads all extended Field values from the database for a HarvestDefinitionOid.
1410     *
1411     * @param aOid HarvestDefinition where loaded extended Field Values will be set
1412     * @return a list of ExtendedFieldValues belonging to the given harvest oid
1413     * @throws SQLException If database errors occur.
1414     */
1415    private List<ExtendedFieldValue> getExtendedFieldValues(Long aOid) throws SQLException {
1416        List<ExtendedFieldValue> extendedFieldValues = new ArrayList<ExtendedFieldValue>();
1417
1418        ExtendedFieldDAO dao = ExtendedFieldDAO.getInstance();
1419        List<ExtendedField> list = dao.getAll(ExtendedFieldTypes.HARVESTDEFINITION);
1420
1421        for (int i = 0; i < list.size(); i++) {
1422            ExtendedField ef = list.get(i);
1423
1424            ExtendedFieldValueDAO dao2 = ExtendedFieldValueDAO.getInstance();
1425            ExtendedFieldValue efv = dao2.read(ef.getExtendedFieldID(), aOid);
1426
1427            if (efv == null) {
1428                efv = new ExtendedFieldValue();
1429                efv.setExtendedFieldID(ef.getExtendedFieldID());
1430                efv.setInstanceID(aOid);
1431                efv.setContent(new ExtendedFieldDefaultValue(ef.getDefaultValue(), ef.getFormattingPattern(), ef
1432                        .getDatatype()).getDBValue());
1433            }
1434
1435            extendedFieldValues.add(efv);
1436        }
1437
1438        return extendedFieldValues;
1439    }
1440}