001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.sql.Connection;
027import java.sql.PreparedStatement;
028import java.sql.ResultSet;
029import java.sql.SQLException;
030import java.sql.Timestamp;
031import java.sql.Types;
032import java.text.Collator;
033import java.util.ArrayList;
034import java.util.Collections;
035import java.util.Comparator;
036import java.util.Date;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.Iterator;
040import java.util.LinkedList;
041import java.util.List;
042import java.util.Map;
043import java.util.Set;
044import java.util.StringTokenizer;
045
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049import dk.netarkivet.common.exceptions.ArgumentNotValid;
050import dk.netarkivet.common.exceptions.IOFailure;
051import dk.netarkivet.common.exceptions.IllegalState;
052import dk.netarkivet.common.exceptions.NotImplementedException;
053import dk.netarkivet.common.exceptions.PermissionDenied;
054import dk.netarkivet.common.exceptions.UnknownID;
055import dk.netarkivet.common.utils.DBUtils;
056import dk.netarkivet.common.utils.ExceptionUtils;
057import dk.netarkivet.common.utils.FilterIterator;
058import dk.netarkivet.common.utils.StringUtils;
059import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory;
060import dk.netarkivet.harvester.datamodel.eav.EAV;
061import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType;
062import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedField;
063import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO;
064import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDefaultValue;
065import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldTypes;
066import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValue;
067import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDAO;
068import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldValueDBDAO;
069import dk.netarkivet.harvester.webinterface.HarvestStatusQuery;
070
071/**
072 * A database-oriented implementation of the HarvestDefinitionDAO.
073 * <p>
074 * The statements to create the tables are located in:
075 * <ul>
076 * <li><em>Derby:</em> scripts/sql/createfullhddb.sql</li>
077 * <li><em>MySQL:</em> scripts/sql/createfullhddb.mysql</li>
078 * <li><em>PostgreSQL:</em> scripts/postgresql/netarchivesuite_init.sql</li>
079 * </ul>
080 */
081public class HarvestDefinitionDBDAO extends HarvestDefinitionDAO {
082
083    /** The logger. */
084    private static final Logger log = LoggerFactory.getLogger(HarvestDefinitionDBDAO.class);
085
086    /**
087     * Comparator used for sorting the UI list of {@link SparseDomainConfiguration}s. Sorts first by domain name
088     * alphabetical order, next by configuration name.
089     */
090    private static class SparseDomainConfigurationComparator implements Comparator<SparseDomainConfiguration> {
091
092        @Override
093        public int compare(SparseDomainConfiguration sdc1, SparseDomainConfiguration sdc2) {
094            int domComp = sdc1.getDomainName().compareTo(sdc2.getDomainName());
095            if (0 == domComp) {
096                return sdc1.getConfigurationName().compareTo(sdc2.getConfigurationName());
097            }
098            return domComp;
099        }
100    }
101
102    /** Create a new HarvestDefinitionDAO using database. */
103    HarvestDefinitionDBDAO() {
104        Connection connection = HarvestDBConnection.get();
105        try {
106            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.FULLHARVESTS);
107            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTDEFINITIONS);
108            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.PARTIALHARVESTS);
109            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.HARVESTCONFIGS);
110            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDTYPE);
111            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELD);
112            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.EXTENDEDFIELDVALUE);
113        } finally {
114            HarvestDBConnection.release(connection);
115        }
116    }
117
118    /**
119     * Create a harvest definition in Database. The harvest definition object should not have its ID set unless we are
120     * in the middle of migrating.
121     *
122     * @param harvestDefinition A new harvest definition to store in the database.
123     * @return The harvestId for the just created harvest definition.
124     * @see HarvestDefinitionDAO#create(HarvestDefinition)
125     */
126    @Override
127    public synchronized Long create(HarvestDefinition harvestDefinition) {
128        Long id = harvestDefinition.getOid();
129        try (Connection connection = HarvestDBConnection.get();) {
130            Date submissiondate = new Date();
131            final int edition = 1;
132            try {
133                if (id == null) {
134                    id = generateNextID(connection);
135                }
136
137                connection.setAutoCommit(false);
138                try (PreparedStatement s = connection.prepareStatement("INSERT INTO harvestdefinitions "
139                        + "( harvest_id, name, comments, numevents, submitted,  isactive, edition, audience ) "
140                        + "VALUES ( ?, ?, ?, ?, ?, ?, ?,? )");) {
141                    s.setLong(1, id);
142                    DBUtils.setName(s, 2, harvestDefinition, Constants.MAX_NAME_SIZE);
143                    DBUtils.setComments(s, 3, harvestDefinition, Constants.MAX_COMMENT_SIZE);
144                    s.setLong(4, harvestDefinition.getNumEvents());
145                    // Don't set on object, as we may yet rollback
146                    s.setTimestamp(5, new Timestamp(submissiondate.getTime()));
147                    s.setBoolean(6, harvestDefinition.getActive());
148                    s.setLong(7, edition);
149                    s.setString(8, harvestDefinition.getAudience());
150                    s.executeUpdate();
151                }
152                if (harvestDefinition instanceof FullHarvest) {
153                    FullHarvest fh = (FullHarvest) harvestDefinition;
154                    try (PreparedStatement s = connection.prepareStatement("INSERT INTO fullharvests "
155                            + "( harvest_id, maxobjects, maxbytes, maxjobrunningtime, previoushd, isindexready)"
156                            + "VALUES ( ?, ?, ?, ?, ?, ? )");) {
157                        s.setLong(1, id);
158                        s.setLong(2, fh.getMaxCountObjects());
159                        s.setLong(3, fh.getMaxBytes());
160                        s.setLong(4, fh.getMaxJobRunningTime());
161                        if (fh.getPreviousHarvestDefinition() != null) {
162                            s.setLong(5, fh.getPreviousHarvestDefinition().getOid());
163                        } else {
164                            s.setNull(5, Types.BIGINT);
165                        }
166                        s.setBoolean(6, fh.getIndexReady());
167                        s.executeUpdate();
168                    }
169                } else if (harvestDefinition instanceof PartialHarvest) {
170                    PartialHarvest ph = (PartialHarvest) harvestDefinition;
171                    // Get schedule id
172                    long scheduleId = DBUtils.selectLongValue(connection,
173                            "SELECT schedule_id FROM schedules WHERE name = ?", ph.getSchedule().getName());
174                    try (PreparedStatement s = connection
175                            .prepareStatement("INSERT INTO partialharvests ( harvest_id, schedule_id, nextdate ) "
176                                    + "VALUES ( ?, ?, ? )");) {
177                        s.setLong(1, id);
178                        s.setLong(2, scheduleId);
179                        DBUtils.setDateMaybeNull(s, 3, ph.getNextDate());
180                        s.executeUpdate();
181                        createHarvestConfigsEntries(connection, ph, id);
182                    }
183                } else {
184                    String message = "Harvest definition " + harvestDefinition + " is of unknown class "
185                            + harvestDefinition.getClass();
186                    log.warn(message);
187                    throw new ArgumentNotValid(message);
188                }
189                connection.commit();
190
191                // Now that we have committed, set new data on object.
192                harvestDefinition.setSubmissionDate(submissiondate);
193                harvestDefinition.setEdition(edition);
194                harvestDefinition.setOid(id);
195
196                // saving after receiving id
197                saveExtendedFieldValues(connection, harvestDefinition);
198
199            } catch (SQLException e) {
200                String message = "SQL error creating harvest definition " + harvestDefinition + " in database" + "\n"
201                        + ExceptionUtils.getSQLExceptionCause(e);
202                log.warn(message, e);
203                throw new IOFailure(message, e);
204            } finally {
205                DBUtils.rollbackIfNeeded(connection, "creating", harvestDefinition);
206            }
207        } catch (SQLException e) {
208            log.error("Unable to close db resources", e);
209        }
210
211        return id;
212    }
213
214    /**
215     * Create the entries in the harvest_configs table that connect PartialHarvests and their configurations.
216     *
217     * @param c harvest definition DB connection
218     * @param ph The harvest to insert entries for.
219     * @param id The id of the harvest -- this may not yet be set on ph
220     * @throws SQLException If a database error occurs during the create process.
221     */
222    private void createHarvestConfigsEntries(Connection c, PartialHarvest ph, long id) throws SQLException {
223        try (PreparedStatement s = c.prepareStatement("DELETE FROM harvest_configs WHERE harvest_id = ?");) {
224            s.setLong(1, id);
225            s.executeUpdate();
226        }
227        try (PreparedStatement s = c.prepareStatement("INSERT INTO harvest_configs " + "( harvest_id, config_id ) "
228                + "SELECT ?, config_id FROM configurations, domains "
229                + "WHERE domains.name = ? AND configurations.name = ?"
230                + "  AND domains.domain_id = configurations.domain_id");) {
231            Iterator<DomainConfiguration> dcs = ph.getDomainConfigurations();
232            while (dcs.hasNext()) {
233                DomainConfiguration dc = dcs.next();
234                s.setLong(1, id);
235                s.setString(2, dc.getDomainName());
236                s.setString(3, dc.getName());
237                s.executeUpdate();
238            }
239        }
240    }
241
242    /**
243     * Generates the next id of a harvest definition. this implementation retrieves the maximum value of harvest_id in
244     * the DB, and returns this value + 1.
245     *
246     * @param c An open connection to the harvestDatabase
247     * @return The next available ID
248     */
249    private synchronized Long generateNextID(Connection c) {
250        Long maxVal = DBUtils.selectLongValue(c, "SELECT max(harvest_id) FROM harvestdefinitions");
251        if (maxVal == null) {
252            maxVal = 0L;
253        }
254        return maxVal + 1L;
255    }
256
257    /**
258     * Read the stored harvest definition for the given ID.
259     *
260     * @param harvestDefinitionID An ID number for a harvest definition
261     * @return A harvest definition that has been read from persistent storage.
262     * @throws UnknownID if no entry with that ID exists in the database
263     * @throws IOFailure If DB-failure occurs?
264     * @see HarvestDefinitionDAO#read(Long)
265     */
266    @Override
267    public synchronized HarvestDefinition read(Long harvestDefinitionID) throws UnknownID, IOFailure {
268        Connection c = HarvestDBConnection.get();
269        try {
270            return read(c, harvestDefinitionID);
271        } finally {
272            HarvestDBConnection.release(c);
273        }
274    }
275
276    /**
277     * Read the stored harvest definition for the given ID.
278     *
279     * @param c The used database connection
280     * @param harvestDefinitionID ID number for a harvest definition
281     * @return A harvest definition that has been read from persistent storage.
282     * @throws UnknownID if no entry with that ID exists in the database
283     * @throws IOFailure If DB-failure occurs?
284     * @see HarvestDefinitionDAO#read(Long)
285     */
286    private HarvestDefinition read(Connection c, Long harvestDefinitionID) throws UnknownID, IOFailure {
287        if (!exists(c, harvestDefinitionID)) {
288            String message = "Unknown harvest definition " + harvestDefinitionID;
289            log.debug(message);
290            throw new UnknownID(message);
291        }
292        log.debug("Reading harvestdefinition w/ id {}", harvestDefinitionID);
293        PreparedStatement s = null;
294        try {
295            s = c.prepareStatement(
296                    "SELECT name, comments, numevents, submitted, " + "previoushd, maxobjects, maxbytes, "
297                            + "maxjobrunningtime, isindexready, isactive, edition, audience "
298                            + "FROM harvestdefinitions, fullharvests " + "WHERE harvestdefinitions.harvest_id = ?"
299                            + "  AND harvestdefinitions.harvest_id " + " = fullharvests.harvest_id");
300            s.setLong(1, harvestDefinitionID);
301            ResultSet res = s.executeQuery();
302            if (res.next()) {
303                // Found full harvest
304                log.debug("fullharvest found w/id " + harvestDefinitionID);
305                final String name = res.getString(1);
306                final String comments = res.getString(2);
307                final int numEvents = res.getInt(3);
308                final Date submissionDate = new Date(res.getTimestamp(4).getTime());
309                final long maxObjects = res.getLong(6);
310                final long maxBytes = res.getLong(7);
311                final long maxJobRunningtime = res.getLong(8);
312                final boolean isIndexReady = res.getBoolean(9);
313                FullHarvest fh;
314                final long prevhd = res.getLong(5);
315                if (!res.wasNull()) {
316                    fh = new FullHarvest(name, comments, prevhd, maxObjects, maxBytes, maxJobRunningtime, isIndexReady,
317                            DAOProviderFactory.getHarvestDefinitionDAOProvider(),
318                            DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(),
319                            DAOProviderFactory.getDomainDAOProvider());
320                } else {
321                    fh = new FullHarvest(name, comments, null, maxObjects, maxBytes, maxJobRunningtime, isIndexReady,
322                            DAOProviderFactory.getHarvestDefinitionDAOProvider(),
323                            DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(),
324                            DAOProviderFactory.getDomainDAOProvider());
325                }
326                fh.setSubmissionDate(submissionDate);
327                fh.setNumEvents(numEvents);
328                fh.setActive(res.getBoolean(10));
329                fh.setOid(harvestDefinitionID);
330                fh.setEdition(res.getLong(11));
331                fh.setAudience(res.getString(12));
332
333                readExtendedFieldValues(fh);
334
335                // We found a FullHarvest object, just return it.
336                log.debug("Returned FullHarvest object w/ id {}", harvestDefinitionID);
337                return fh;
338            }
339            s.close();
340            // No full harvest with that ID, try selective harvest
341            s = c.prepareStatement("SELECT harvestdefinitions.name," + "       harvestdefinitions.comments,"
342                    + "       harvestdefinitions.numevents," + "       harvestdefinitions.submitted,"
343                    + "       harvestdefinitions.isactive," + "       harvestdefinitions.edition,"
344                    + "       harvestdefinitions.audience," + "       schedules.name,"
345                    + "       partialharvests.nextdate, " + "       harvestdefinitions.channel_id "
346                    + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id = ?"
347                    + "   AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id"
348                    + "   AND schedules.schedule_id " + "= partialharvests.schedule_id");
349            s.setLong(1, harvestDefinitionID);
350            res = s.executeQuery();
351            boolean foundPartialHarvest = res.next();
352            if (foundPartialHarvest) {
353                log.debug("Partialharvest found w/ id " + harvestDefinitionID);
354                // Have to get configs before creating object, so storing data
355                // here.
356                final String name = res.getString(1);
357                final String comments = res.getString(2);
358                final int numEvents = res.getInt(3);
359                final Date submissionDate = new Date(res.getTimestamp(4).getTime());
360                final boolean active = res.getBoolean(5);
361                final long edition = res.getLong(6);
362                final String audience = res.getString(7);
363                final String scheduleName = res.getString(8);
364                final Date nextDate = DBUtils.getDateMaybeNull(res, 9);
365                final Long channelId = DBUtils.getLongMaybeNull(res, 10);
366                s.close();
367                // Found partial harvest -- have to find configurations.
368                // To avoid holding on to the readlock while getting domains,
369                // we grab the strings first, then look up domains and configs.
370                final DomainDAO domainDao = DomainDAO.getInstance();
371                List<SparseDomainConfiguration> configs = new ArrayList<SparseDomainConfiguration>();
372                s = c.prepareStatement(
373                        "SELECT domains.name, configurations.name " + "FROM domains, configurations, harvest_configs "
374                                + "WHERE harvest_id = ?" + "  AND configurations.config_id "
375                                + "= harvest_configs.config_id" + "  AND configurations.domain_id = domains.domain_id");
376                s.setLong(1, harvestDefinitionID);
377                res = s.executeQuery();
378                while (res.next()) {
379                    configs.add(new SparseDomainConfiguration(res.getString(1), res.getString(2)));
380                }
381                s.close();
382                List<DomainConfiguration> configurations = new ArrayList<DomainConfiguration>();
383                for (SparseDomainConfiguration domainConfig : configs) {
384                    configurations.add(domainDao.getDomainConfiguration(domainConfig.getDomainName(),
385                            domainConfig.getConfigurationName()));
386                }
387
388                Schedule schedule = ScheduleDAO.getInstance().read(scheduleName);
389
390                PartialHarvest ph = new PartialHarvest(configurations, schedule, name, comments, audience);
391
392                ph.setNumEvents(numEvents);
393                ph.setSubmissionDate(submissionDate);
394                ph.setActive(active);
395                ph.setEdition(edition);
396                ph.setNextDate(nextDate);
397                ph.setOid(harvestDefinitionID);
398                if (channelId != null) {
399                    ph.setChannelId(channelId);
400                }
401
402                readExtendedFieldValues(ph);
403
404                return ph;
405            } else {
406                throw new IllegalState(
407                        "No entries in fullharvests or partialharvests found for id " + harvestDefinitionID);
408            }
409        } catch (SQLException e) {
410            throw new IOFailure("SQL Error while reading harvest definition " + harvestDefinitionID + "\n"
411                    + ExceptionUtils.getSQLExceptionCause(e), e);
412        }
413    }
414
415    /**
416     * Update an existing harvest definition with new info.
417     *
418     * @param hd An updated harvest definition
419     * @see HarvestDefinitionDAO#update(HarvestDefinition)
420     */
421    public synchronized void update(HarvestDefinition hd) {
422        ArgumentNotValid.checkNotNull(hd, "HarvestDefinition hd");
423        if (hd.getOid() == null || !exists(hd.getOid())) {
424            final String message = "Cannot update non-existing " + "harvestdefinition '" + hd.getName() + "'";
425            log.debug(message);
426            throw new PermissionDenied(message);
427        }
428        HarvestDefinition preHD = null;
429        if (hd instanceof FullHarvest) {
430            preHD = ((FullHarvest) hd).getPreviousHarvestDefinition();
431        }
432
433        Connection c = HarvestDBConnection.get();
434        PreparedStatement s = null;
435        try {
436            c.setAutoCommit(false);
437            s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, "
438                    + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? "
439                    + "WHERE harvest_id = ? AND edition = ?");
440            DBUtils.setName(s, 1, hd, Constants.MAX_NAME_SIZE);
441            DBUtils.setComments(s, 2, hd, Constants.MAX_COMMENT_SIZE);
442            s.setInt(3, hd.getNumEvents());
443            s.setTimestamp(4, new Timestamp(hd.getSubmissionDate().getTime()));
444            s.setBoolean(5, hd.getActive());
445            long nextEdition = hd.getEdition() + 1;
446            s.setLong(6, nextEdition);
447            s.setString(7, hd.getAudience());
448            s.setLong(8, hd.getOid());
449            s.setLong(9, hd.getEdition());
450
451            int rows = s.executeUpdate();
452            // Since the HD exists, no rows indicates bad edition
453            if (rows == 0) {
454                String message = "Somebody else must have updated " + hd + " since edition " + hd.getEdition()
455                        + ", not updating";
456                log.debug(message);
457                throw new PermissionDenied(message);
458            }
459            s.close();
460            if (hd instanceof FullHarvest) {
461                FullHarvest fh = (FullHarvest) hd;
462                s = c.prepareStatement("UPDATE fullharvests SET previoushd = ?, " + "maxobjects = ?, "
463                        + "maxbytes = ?, " + "maxjobrunningtime = ?, isindexready = ? " + "WHERE harvest_id = ?");
464                if (preHD != null) {
465                    s.setLong(1, preHD.getOid());
466                } else {
467                    s.setNull(1, Types.BIGINT);
468                }
469                s.setLong(2, fh.getMaxCountObjects());
470                s.setLong(3, fh.getMaxBytes());
471                s.setLong(4, fh.getMaxJobRunningTime());
472                s.setBoolean(5, fh.getIndexReady());
473                s.setLong(6, fh.getOid());
474
475                rows = s.executeUpdate();
476                log.debug("{} fullharvests records updated", rows);
477            } else if (hd instanceof PartialHarvest) {
478                PartialHarvest ph = (PartialHarvest) hd;
479                s = c.prepareStatement("UPDATE partialharvests SET " + "schedule_id = "
480                        + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? "
481                        + "WHERE harvest_id = ?");
482                s.setString(1, ph.getSchedule().getName());
483                DBUtils.setDateMaybeNull(s, 2, ph.getNextDate());
484                s.setLong(3, ph.getOid());
485                rows = s.executeUpdate();
486                log.debug("{} partialharvests records updated", rows);
487                s.close();
488                // FIXME The updates to harvest_configs table should be done
489                // in method removeDomainConfiguration(), and not here.
490                // The following deletes ALL harvest_configs entries for
491                // this PartialHarvest, and creates the entries for the
492                // PartialHarvest again!!
493                createHarvestConfigsEntries(c, ph, ph.getOid());
494            } else {
495                String message = "Harvest definition " + hd + " has unknown class " + hd.getClass();
496                log.warn(message);
497                throw new ArgumentNotValid(message);
498            }
499            saveExtendedFieldValues(c, hd);
500
501            c.commit();
502            hd.setEdition(nextEdition);
503        } catch (SQLException e) {
504            throw new IOFailure(
505                    "SQL error while updating harvest definition " + hd + "\n" + ExceptionUtils.getSQLExceptionCause(e),
506                    e);
507        } finally {
508            DBUtils.closeStatementIfOpen(s);
509            DBUtils.rollbackIfNeeded(c, "updating", hd);
510            HarvestDBConnection.release(c);
511        }
512    }
513
514    /**
515     * Activates or deactivates a partial harvest definition. This method is actually to be used not to have to read
516     * from the DB big harvest definitions and optimize the activation / deactivation, it is sort of a lightweight
517     * version of update.
518     *
519     * @param harvestDefinition the harvest definition object.
520     */
521    @Override
522    public synchronized void flipActive(SparsePartialHarvest harvestDefinition) {
523        ArgumentNotValid.checkNotNull(harvestDefinition, "HarvestDefinition harvestDefinition");
524
525        Connection c = HarvestDBConnection.get();
526        PreparedStatement s = null;
527        try {
528            if (harvestDefinition.getOid() == null || !exists(c, harvestDefinition.getOid())) {
529                final String message = "Cannot update non-existing " + "harvestdefinition '"
530                        + harvestDefinition.getName() + "'";
531                log.debug(message);
532                throw new PermissionDenied(message);
533            }
534
535            c.setAutoCommit(false);
536            s = c.prepareStatement("UPDATE harvestdefinitions SET " + "name = ?, " + "comments = ?, "
537                    + "numevents = ?, " + "submitted = ?," + "isactive = ?," + "edition = ?, audience = ? "
538                    + "WHERE harvest_id = ? AND edition = ?");
539            DBUtils.setName(s, 1, harvestDefinition, Constants.MAX_NAME_SIZE);
540            DBUtils.setComments(s, 2, harvestDefinition, Constants.MAX_COMMENT_SIZE);
541            s.setInt(3, harvestDefinition.getNumEvents());
542            s.setTimestamp(4, new Timestamp(harvestDefinition.getSubmissionDate().getTime()));
543            s.setBoolean(5, !harvestDefinition.isActive());
544            long nextEdition = harvestDefinition.getEdition() + 1;
545            s.setLong(6, nextEdition);
546            s.setString(7, harvestDefinition.getAudience());
547            s.setLong(8, harvestDefinition.getOid());
548            s.setLong(9, harvestDefinition.getEdition());
549            int rows = s.executeUpdate();
550            // Since the HD exists, no rows indicates bad edition
551            if (rows == 0) {
552                String message = "Somebody else must have updated " + harvestDefinition + " since edition "
553                        + harvestDefinition.getEdition() + ", not updating";
554                log.debug(message);
555                throw new PermissionDenied(message);
556            }
557            s.close();
558
559            // Now pull more strings
560            s = c.prepareStatement("UPDATE partialharvests SET schedule_id = "
561                    + "(SELECT schedule_id FROM schedules WHERE schedules.name = ?), " + "nextdate = ? "
562                    + "WHERE harvest_id = ?");
563            s.setString(1, harvestDefinition.getScheduleName());
564            DBUtils.setDateMaybeNull(s, 2, harvestDefinition.getNextDate());
565            s.setLong(3, harvestDefinition.getOid());
566            rows = s.executeUpdate();
567            log.debug("{} partialharvests records updated", rows);
568            s.close();
569            c.commit();
570        } catch (SQLException e) {
571            throw new IOFailure("SQL error while updating harvest definition " + harvestDefinition + "\n"
572                    + ExceptionUtils.getSQLExceptionCause(e), e);
573        } finally {
574            DBUtils.rollbackIfNeeded(c, "updating", harvestDefinition);
575            HarvestDBConnection.release(c);
576        }
577    }
578
579    @Override
580    public synchronized boolean exists(String name) {
581        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
582        Connection c = HarvestDBConnection.get();
583        try {
584            return 1 == DBUtils.selectIntValue(c,
585                    "SELECT COUNT(harvest_id) " + "FROM harvestdefinitions WHERE name = ?", name);
586        } finally {
587            HarvestDBConnection.release(c);
588        }
589    }
590
591    @Override
592    public synchronized boolean exists(Long oid) {
593        ArgumentNotValid.checkNotNull(oid, "Long oid");
594        Connection c = HarvestDBConnection.get();
595        try {
596            return exists(c, oid);
597        } finally {
598            HarvestDBConnection.release(c);
599        }
600    }
601
602    /**
603     * Check if a harvestdefinition exists with the given id.
604     *
605     * @param c An open connection to the harvestDatabase
606     * @param oid A potential identifier for a harvestdefinition
607     * @return true If a harvestdefinition exists with the given id.
608     * @see HarvestDefinitionDAO#exists(Long)
609     */
610    private boolean exists(Connection c, Long oid) {
611        return 1 == DBUtils.selectIntValue(c,
612                "SELECT COUNT(harvest_id) " + "FROM harvestdefinitions WHERE harvest_id = ?", oid);
613    }
614
615    /**
616     * Get a list of all existing harvest definitions ordered by name.
617     *
618     * @return An iterator that give the existing harvest definitions in turn
619     */
620    @Override
621    public synchronized Iterator<HarvestDefinition> getAllHarvestDefinitions() {
622        Connection c = HarvestDBConnection.get();
623        try {
624            List<Long> hds = DBUtils.selectLongList(c, "SELECT harvest_id FROM harvestdefinitions ORDER BY name");
625            log.debug("Getting an iterator for all stored harvestdefinitions.");
626
627            List<HarvestDefinition> orderedList = new LinkedList<HarvestDefinition>();
628            for (Long id : hds) {
629                orderedList.add(read(c, id));
630            }
631            return orderedList.iterator();
632        } finally {
633            HarvestDBConnection.release(c);
634        }
635    }
636
637    /**
638     * Gets default configurations for all domains that are not aliases.
639     * <p>
640     * This method currently gives an iterator that reads in all domains, although only on demand, that is: when calling
641     * "hasNext".
642     *
643     * @return Iterator containing the default DomainConfiguration for all domains that are not aliases
644     */
645    @Override
646    public synchronized Iterator<DomainConfiguration> getSnapShotConfigurations() {
647        return new FilterIterator<Domain, DomainConfiguration>(
648                DomainDAO.getInstance().getAllDomainsInSnapshotHarvestOrder()) {
649            public DomainConfiguration filter(Domain domain) {
650                if (domain.getAliasInfo() == null || domain.getAliasInfo().isExpired()) {
651                    return domain.getDefaultConfiguration();
652                } else {
653                    return null;
654                }
655            }
656        };
657    }
658
659    /**
660     * Returns a list of IDs of harvest definitions that are ready to be scheduled.
661     *
662     * @param now The current date
663     * @return List of ready harvest definitions. No check is performed for whether these are already in the middle of
664     * being scheduled.
665     */
666    @Override
667    public Iterable<Long> getReadyHarvestDefinitions(Date now) {
668        ArgumentNotValid.checkNotNull(now, "Date now");
669        Connection connection = HarvestDBConnection.get();
670        try {
671            List<Long> ids = DBUtils.selectLongList(connection,
672                    "SELECT fullharvests.harvest_id" + " FROM fullharvests, harvestdefinitions"
673                            + " WHERE harvestdefinitions.harvest_id = fullharvests.harvest_id" + " AND isactive = ? "
674                            + " AND numevents < 1  AND isindexready = ?",
675                    true, true);
676            ids.addAll(DBUtils.selectLongList(connection,
677                    "SELECT partialharvests.harvest_id" + " FROM partialharvests, harvestdefinitions"
678                            + " WHERE harvestdefinitions.harvest_id = partialharvests.harvest_id"
679                            + " AND isactive = ? AND nextdate IS NOT NULL AND nextdate < ?",
680                    true, now));
681            Set<Long> distinctIds = new HashSet<>();
682            distinctIds.addAll(ids);
683            if (distinctIds.size() != ids.size()) {
684                log.warn("Query returned multiple identical ids {}. These have been sanitized.", ids);
685                return distinctIds;
686            } else {
687                return ids;
688            }
689        } finally {
690            HarvestDBConnection.release(connection);
691        }
692    }
693
694    /**
695     * Get the harvest definition that has the given name, if any.
696     *
697     * @param name The name of a harvest definition.
698     * @return The HarvestDefinition object with that name, or null if none has that name.
699     */
700    @Override
701    public synchronized HarvestDefinition getHarvestDefinition(String name) {
702        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
703        log.debug("Reading harvestdefinition w/ name '{}'", name);
704        Connection c = HarvestDBConnection.get();
705        PreparedStatement s = null;
706        try {
707            s = c.prepareStatement("SELECT harvest_id FROM harvestdefinitions WHERE name = ?");
708            s.setString(1, name);
709            ResultSet res = s.executeQuery();
710            if (res.next()) {
711                long harvestDefinitionID = res.getLong(1);
712                s.close();
713                return read(c, harvestDefinitionID);
714            }
715            return null;
716        } catch (SQLException e) {
717            throw new IOFailure("SQL error while getting HD by name" + "\n" + ExceptionUtils.getSQLExceptionCause(e),
718                    e);
719        } finally {
720            DBUtils.closeStatementIfOpen(s);
721            HarvestDBConnection.release(c);
722        }
723    }
724
725    @Override
726    public List<HarvestRunInfo> getHarvestRunInfo(long harvestID) {
727        Connection c = HarvestDBConnection.get();
728        PreparedStatement s = null;
729        try {
730            ResultSet res = null;
731            Map<Integer, HarvestRunInfo> runInfos = new HashMap<Integer, HarvestRunInfo>();
732            List<HarvestRunInfo> infoList = new ArrayList<HarvestRunInfo>();
733
734            // Select dates and counts for all different statues
735            // for each run
736            s = c.prepareStatement("SELECT name, harvest_num, status, MIN(startdate), MAX(enddate), COUNT(job_id)"
737                    + "  FROM jobs, harvestdefinitions"
738                    + " WHERE harvestdefinitions.harvest_id = ?   AND jobs.harvest_id = harvestdefinitions.harvest_id"
739                    + " GROUP BY name, harvest_num, status ORDER BY harvest_num DESC");
740            s.setLong(1, harvestID);
741            res = s.executeQuery();
742            while (res.next()) {
743                int runNr = res.getInt(2);
744                HarvestRunInfo info = runInfos.get(runNr);
745                if (info == null) {
746                    String name = res.getString(1);
747                    info = new HarvestRunInfo(harvestID, name, runNr);
748                    // Put into hash for easy access when updating.
749                    runInfos.put(runNr, info);
750                    // Add to return list in order given by DB
751                    infoList.add(info);
752                }
753                JobStatus status = JobStatus.fromOrdinal(res.getInt(3));
754                // For started states, check start date
755                if (status != JobStatus.NEW && status != JobStatus.SUBMITTED && status != JobStatus.RESUBMITTED) {
756                    Date startDate = DBUtils.getDateMaybeNull(res, 4);
757                    if (info.getStartDate() == null || (startDate != null && startDate.before(info.getStartDate()))) {
758                        info.setStartDate(startDate);
759                    }
760                }
761                // For finished jobs, check end date
762                if (status == JobStatus.DONE || status == JobStatus.FAILED) {
763                    Date endDate = DBUtils.getDateMaybeNull(res, 5);
764                    if (info.getEndDate() == null || (endDate != null && endDate.after(info.getEndDate()))) {
765                        info.setEndDate(endDate);
766                    }
767                }
768                int count = res.getInt(6);
769                info.setStatusCount(status, count);
770            }
771            s.close();
772            s = c.prepareStatement("SELECT jobs.harvest_num, SUM(historyinfo.bytecount), "
773                    + "SUM(historyinfo.objectcount)," + "COUNT(jobs.status)" + " FROM jobs, historyinfo "
774                    + " WHERE jobs.harvest_id = ? AND historyinfo.job_id = jobs.job_id" + " GROUP BY jobs.harvest_num"
775                    + " ORDER BY jobs.harvest_num");
776            s.setLong(1, harvestID);
777            res = s.executeQuery();
778
779            while (res.next()) {
780                final int harvestNum = res.getInt(1);
781                HarvestRunInfo info = runInfos.get(harvestNum);
782                if (info != null) {
783                    info.setBytesHarvested(res.getLong(2));
784                    info.setDocsHarvested(res.getLong(3));
785                } else {
786                    log.debug("Harvestnum {} for harvestID {} is skipped. Must have arrived between selects",
787                            harvestNum, harvestID);
788                }
789            }
790
791            // Make sure that jobs that aren't really done don't have end date.
792            for (HarvestRunInfo info : infoList) {
793                if (info.getJobCount(JobStatus.STARTED) != 0 || info.getJobCount(JobStatus.NEW) != 0
794                        || info.getJobCount(JobStatus.SUBMITTED) != 0) {
795                    info.setEndDate(null);
796                }
797            }
798            return infoList;
799        } catch (SQLException e) {
800            String message = "SQL error asking for harvest run info on " + harvestID + " in database" + "\n"
801                    + ExceptionUtils.getSQLExceptionCause(e);
802            log.warn(message, e);
803            throw new IOFailure(message, e);
804        } finally {
805            DBUtils.closeStatementIfOpen(s);
806            HarvestDBConnection.release(c);
807        }
808    }
809
810    /**
811     * Get all domain,configuration pairs for a harvest definition in sparse version for GUI purposes.
812     *
813     * @param harvestDefinitionID The ID of the harvest definition.
814     * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions.
815     * @throws ArgumentNotValid on null argument.
816     */
817    @Override
818    public List<SparseDomainConfiguration> getSparseDomainConfigurations(Long harvestDefinitionID) {
819        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
820        Connection c = HarvestDBConnection.get();
821        try {
822            return getSparseDomainConfigurations(c, harvestDefinitionID);
823        } finally {
824            HarvestDBConnection.release(c);
825        }
826    }
827
828    /**
829     * Get all domain,configuration pairs for a harvest definition in sparse version.
830     *
831     * @param c a connection to the harvest database
832     * @param harvestDefinitionID The ID of the harvest definition.
833     * @return Domain, configuration pairs for that HD. Returns an empty iterable for unknown harvest definitions.
834     */
835    private List<SparseDomainConfiguration> getSparseDomainConfigurations(Connection c, Long harvestDefinitionID) {
836        try (PreparedStatement s = c.prepareStatement(
837                "SELECT domains.name, configurations.name " + "FROM domains, configurations," + " harvest_configs "
838                        + "WHERE harvest_id = ?  AND configurations.config_id = harvest_configs.config_id"
839                        + " AND configurations.domain_id = domains.domain_id");) {
840            s.setLong(1, harvestDefinitionID);
841            ResultSet res = s.executeQuery();
842            List<SparseDomainConfiguration> resultList = new ArrayList<SparseDomainConfiguration>();
843            while (res.next()) {
844                SparseDomainConfiguration sdc = new SparseDomainConfiguration(res.getString(1), res.getString(2));
845                resultList.add(sdc);
846            }
847
848            Collections.sort(resultList, new SparseDomainConfigurationComparator());
849            return resultList;
850        } catch (SQLException e) {
851            throw new IOFailure("SQL error getting sparse domains" + "\n" + ExceptionUtils.getSQLExceptionCause(e), e);
852        }
853    }
854
855    /**
856     * Get all sparse versions of partial harvests for GUI purposes ordered by name.
857     *
858     * @return An iterable (possibly empty) of SparsePartialHarvests
859     */
860    public Iterable<SparsePartialHarvest> getSparsePartialHarvestDefinitions(boolean excludeInactive) {
861        String query = "SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.name,"
862                + "       harvestdefinitions.comments," + "       harvestdefinitions.numevents,"
863                + "       harvestdefinitions.submitted," + "       harvestdefinitions.isactive,"
864                + "       harvestdefinitions.edition," + "       schedules.name," + "       partialharvests.nextdate, "
865                + "       harvestdefinitions.audience, " + "       harvestdefinitions.channel_id "
866                + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.harvest_id "
867                + "       = partialharvests.harvest_id" + " AND (harvestdefinitions.isactive " + " = ?"
868                // This linie is duplicated to allow to select both active
869                // and inactive HD's.
870                + " OR harvestdefinitions" + ".isactive " + " = ?)" + "   AND schedules.schedule_id "
871                + "       = partialharvests.schedule_id " + "ORDER BY harvestdefinitions.name";
872        try (Connection c = HarvestDBConnection.get();
873                PreparedStatement s = DBUtils.prepareStatement(c, query, true, excludeInactive);) {
874            ResultSet res = s.executeQuery();
875            List<SparsePartialHarvest> harvests = new ArrayList<SparsePartialHarvest>();
876            while (res.next()) {
877                SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), res.getString(2), res.getString(3),
878                        res.getInt(4), new Date(res.getTimestamp(5).getTime()), res.getBoolean(6), res.getLong(7),
879                        res.getString(8), DBUtils.getDateMaybeNull(res, 9), res.getString(10),
880                        DBUtils.getLongMaybeNull(res, 11));
881                harvests.add(sph);
882            }
883            return harvests;
884        } catch (SQLException e) {
885            throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e);
886        }
887    }
888
889    /**
890     * Get a sparse version of a partial harvest for GUI purposes.
891     *
892     * @param harvestName Name of harvest definition.
893     * @return Sparse version of partial harvest or null for none.
894     * @throws ArgumentNotValid on null or empty name.
895     */
896    @Override
897    public SparsePartialHarvest getSparsePartialHarvest(String harvestName) {
898        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
899        try (Connection c = HarvestDBConnection.get();
900                PreparedStatement s = c.prepareStatement("SELECT harvestdefinitions.harvest_id,"
901                        + "       harvestdefinitions.comments," + "       harvestdefinitions.numevents,"
902                        + "       harvestdefinitions.submitted," + "       harvestdefinitions.isactive,"
903                        + "       harvestdefinitions.edition," + "       schedules.name,"
904                        + "       partialharvests.nextdate, " + "       harvestdefinitions.audience, "
905                        + "       harvestdefinitions.channel_id "
906                        + "FROM harvestdefinitions, partialharvests, schedules" + " WHERE harvestdefinitions.name = ?"
907                        + "   AND harvestdefinitions.harvest_id " + "= partialharvests.harvest_id"
908                        + "   AND schedules.schedule_id " + "= partialharvests.schedule_id");) {
909            s.setString(1, harvestName);
910            ResultSet res = s.executeQuery();
911            if (res.next()) {
912                SparsePartialHarvest sph = new SparsePartialHarvest(res.getLong(1), harvestName, res.getString(2),
913                        res.getInt(3), new Date(res.getTimestamp(4).getTime()), res.getBoolean(5), res.getLong(6),
914                        res.getString(7), DBUtils.getDateMaybeNull(res, 8), res.getString(9),
915                        DBUtils.getLongMaybeNull(res, 10));
916                sph.setExtendedFieldValues(getExtendedFieldValues(sph.getOid()));
917                return sph;
918            } else {
919                return null;
920            }
921        } catch (SQLException e) {
922            throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e);
923        }
924    }
925
926    /**
927     * Get all sparse versions of full harvests for GUI purposes.
928     *
929     * @return An iterable (possibly empty) of SparseFullHarvests
930     */
931    public Iterable<SparseFullHarvest> getAllSparseFullHarvestDefinitions() {
932        try (Connection c = HarvestDBConnection.get();
933                PreparedStatement s = c.prepareStatement("SELECT harvestdefinitions.harvest_id,"
934                        + "       harvestdefinitions.name," + "       harvestdefinitions.comments,"
935                        + "       harvestdefinitions.numevents," + "       harvestdefinitions.isactive,"
936                        + "       harvestdefinitions.edition," + "       fullharvests.maxobjects,"
937                        + "       fullharvests.maxbytes," + "       fullharvests.maxjobrunningtime,"
938                        + "       fullharvests.previoushd, " + "       harvestdefinitions.channel_id "
939                        + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.harvest_id "
940                        + "       = fullharvests.harvest_id" + " ORDER BY harvestdefinitions.name");) {
941            ResultSet res = s.executeQuery();
942            List<SparseFullHarvest> harvests = new ArrayList<SparseFullHarvest>();
943            while (res.next()) {
944                SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), res.getString(2), res.getString(3),
945                        res.getInt(4), res.getBoolean(5), res.getLong(6), res.getLong(7), res.getLong(8),
946                        res.getLong(9), DBUtils.getLongMaybeNull(res, 10), DBUtils.getLongMaybeNull(res, 11));
947                // EAV
948                long oid = sfh.getOid();
949                List<AttributeAndType> attributesAndTypes = EAV.getInstance()
950                        .getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int) oid);
951                sfh.setAttributesAndTypes(attributesAndTypes);
952                harvests.add(sfh);
953            }
954            return harvests;
955        } catch (SQLException e) {
956            throw new IOFailure("SQL error getting sparse harvests\n" + ExceptionUtils.getSQLExceptionCause(e), e);
957        }
958    }
959
960    /**
961     * Get the name of a harvest given its ID.
962     *
963     * @param harvestDefinitionID The ID of a harvest
964     * @return The name of the given harvest.
965     * @throws ArgumentNotValid on null argument
966     * @throws UnknownID if no harvest has the given ID.
967     * @throws IOFailure on any other error talking to the database
968     */
969    @Override
970    public String getHarvestName(Long harvestDefinitionID) {
971        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
972        try (Connection c = HarvestDBConnection.get();
973                PreparedStatement s = c.prepareStatement("SELECT name FROM harvestdefinitions WHERE harvest_id = ?");) {
974            s.setLong(1, harvestDefinitionID);
975            ResultSet res = s.executeQuery();
976            String name = null;
977            while (res.next()) {
978                if (name != null) {
979                    throw new IOFailure("Found more than one name for harvest definition " + harvestDefinitionID + ": '"
980                            + name + "' and '" + res.getString(1) + "'");
981                }
982                name = res.getString(1);
983            }
984            if (name == null) {
985                throw new UnknownID("No name found for harvest definition " + harvestDefinitionID);
986            }
987            return name;
988        } catch (SQLException e) {
989            throw new IOFailure("An error occurred finding the name for " + "harvest definition " + harvestDefinitionID
990                    + "\n" + ExceptionUtils.getSQLExceptionCause(e), e);
991        }
992    }
993
994    /**
995     * Get whether a given harvest is a snapshot or selective harvest.
996     *
997     * @param harvestDefinitionID ID of a harvest
998     * @return True if the given harvest is a snapshot harvest, false otherwise.
999     * @throws ArgumentNotValid on null argument
1000     * @throws UnknownID if no harvest has the given ID.
1001     */
1002    @Override
1003    public boolean isSnapshot(Long harvestDefinitionID) {
1004        ArgumentNotValid.checkNotNull(harvestDefinitionID, "harvestDefinitionID");
1005        try (Connection connection = HarvestDBConnection.get();) {
1006            boolean isSnapshot = DBUtils.selectAny(connection,
1007                    "SELECT harvest_id FROM fullharvests WHERE harvest_id = ?", harvestDefinitionID);
1008            if (isSnapshot) {
1009                return true;
1010            }
1011            boolean isSelective = DBUtils.selectAny(connection,
1012                    "SELECT harvest_id FROM partialharvests " + "WHERE harvest_id = ?", harvestDefinitionID);
1013            if (isSelective) {
1014                return false;
1015            }
1016            throw new UnknownID("Failed to find harvest definition with id " + harvestDefinitionID);
1017        } catch (SQLException e) {
1018            throw new RuntimeException("Unable to close DB connection");
1019        }
1020    }
1021
1022    /**
1023     * Get a sparse version of a full harvest for GUI purposes.
1024     *
1025     * @param harvestName Name of harvest definition.
1026     * @return Sparse version of full harvest or null for none.
1027     * @throws ArgumentNotValid on null or empty name.
1028     * @throws UnknownID if no harvest has the given ID.
1029     * @throws IOFailure on any other error talking to the database
1030     */
1031    @Override
1032    public SparseFullHarvest getSparseFullHarvest(String harvestName) {
1033        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1034        try (Connection c = HarvestDBConnection.get();
1035                PreparedStatement s = c.prepareStatement(
1036                        "SELECT harvestdefinitions.harvest_id," + "       harvestdefinitions.comments,"
1037                                + "       harvestdefinitions.numevents," + "       harvestdefinitions.isactive,"
1038                                + "       harvestdefinitions.edition," + "       fullharvests.maxobjects,"
1039                                + "       fullharvests.maxbytes," + "       fullharvests.maxjobrunningtime,"
1040                                + "       fullharvests.previoushd, " + "       harvestdefinitions.channel_id "
1041                                + "FROM harvestdefinitions, fullharvests" + " WHERE harvestdefinitions.name = ?"
1042                                + "   AND harvestdefinitions.harvest_id " + "       = fullharvests.harvest_id");) {
1043            s.setString(1, harvestName);
1044            ResultSet res = s.executeQuery();
1045            if (res.next()) {
1046                SparseFullHarvest sfh = new SparseFullHarvest(res.getLong(1), harvestName, res.getString(2),
1047                        res.getInt(3), res.getBoolean(4), res.getLong(5), res.getLong(6), res.getLong(7),
1048                        res.getLong(8), DBUtils.getLongMaybeNull(res, 9), DBUtils.getLongMaybeNull(res, 10));
1049                // EAV
1050                long oid = sfh.getOid();
1051                List<AttributeAndType> attributesAndTypes = EAV.getInstance()
1052                        .getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int) oid);
1053                sfh.setAttributesAndTypes(attributesAndTypes);
1054                sfh.setExtendedFieldValues(getExtendedFieldValues(sfh.getOid()));
1055                return sfh;
1056            } else {
1057                return null;
1058            }
1059        } catch (SQLException e) {
1060            throw new IOFailure("SQL error getting sparse harvest\n" + ExceptionUtils.getSQLExceptionCause(e), e);
1061        }
1062    }
1063
1064    /**
1065     * Get a sorted list of all domain names of a HarvestDefinition.
1066     *
1067     * @param harvestName of HarvestDefinition
1068     * @return List of all domains of the HarvestDefinition.
1069     */
1070    @Override
1071    public List<String> getListOfDomainsOfHarvestDefinition(String harvestName) {
1072        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1073        try (Connection c = HarvestDBConnection.get();
1074                PreparedStatement s = c.prepareStatement(
1075                        // Note: the DISTINCT below is put in deliberately to fix
1076                        // bug 1878: Seeds for domain is shown twice on page
1077                        // History/Harveststatus-seeds.jsp
1078                        "SELECT DISTINCT domains.name" + " FROM     domains," + "          configurations,"
1079                                + "          harvest_configs," + "          harvestdefinitions"
1080                                + " WHERE    configurations.domain_id = domains.domain_id"
1081                                + " AND harvest_configs.config_id = " + "configurations.config_id"
1082                                + " AND harvest_configs.harvest_id = " + "harvestdefinitions.harvest_id"
1083                                + " AND harvestdefinitions.name = ?" + " ORDER BY domains.name");) {
1084            s.setString(1, harvestName);
1085            ResultSet res = s.executeQuery();
1086            List<String> domains = new ArrayList<String>();
1087
1088            while (res.next()) {
1089                domains.add(res.getString(1));
1090            }
1091            return domains;
1092        } catch (SQLException e) {
1093            throw new IOFailure("SQL error getting seeds of a domain of a harvest definition" + "\n"
1094                    + ExceptionUtils.getSQLExceptionCause(e), e);
1095        }
1096    }
1097
1098    /**
1099     * Get a list of all seeds of a Domain in a HarvestDefinition.
1100     *
1101     * @param harvestName of HarvestDefinition
1102     * @param domainName of Domain
1103     * @param needSort boolean if sort results
1104     * @return List of all seeds of the Domain in the HarvestDefinition.
1105     */
1106    @Override
1107    public List<String> getListOfSeedsOfDomainOfHarvestDefinition(String harvestName, String domainName,
1108            boolean needSort) {
1109        ArgumentNotValid.checkNotNullOrEmpty(harvestName, "harvestName");
1110        ArgumentNotValid.checkNotNullOrEmpty(domainName, "domainName");
1111        try (Connection c = HarvestDBConnection.get();
1112                PreparedStatement s = c.prepareStatement("SELECT seedlists.seeds" + " FROM   configurations,"
1113                        + "        harvest_configs," + "        harvestdefinitions," + "        seedlists,"
1114                        + "        config_seedlists," + "        domains" + " WHERE  config_seedlists.seedlist_id "
1115                        + "= seedlists.seedlist_id" + " AND configurations.config_id " + "= config_seedlists.config_id"
1116                        + " AND configurations.config_id " + "= harvest_configs.config_id"
1117                        + " AND harvest_configs.harvest_id " + "= harvestdefinitions.harvest_id"
1118                        + " AND configurations.domain_id = domains.domain_id" + " AND domains.name = ?"
1119                        + " AND harvestdefinitions.name = ?");) {
1120            s.setString(1, domainName);
1121            s.setString(2, harvestName);
1122            ResultSet res = s.executeQuery();
1123            List<String> seeds = new ArrayList<String>();
1124
1125            while (res.next()) {
1126                String seedsOfDomain = res.getString(1);
1127
1128                StringTokenizer st = new StringTokenizer(seedsOfDomain, "\n");
1129
1130                while (st.hasMoreTokens()) {
1131                    String seed = st.nextToken();
1132
1133                    boolean isDuplicate = false;
1134                    for (String entry : seeds) {
1135                        if (entry.equals(seed)) {
1136                            isDuplicate = true;
1137                            break;
1138                        }
1139                    }
1140                    if (!isDuplicate) { // duplicates will not be added
1141                        seeds.add(seed);
1142                    }
1143                }
1144            }
1145
1146            if (needSort)
1147                Collections.sort(seeds, Collator.getInstance());
1148
1149            return seeds;
1150        } catch (SQLException e) {
1151            throw new IOFailure("SQL error getting seeds of a domain\n" + ExceptionUtils.getSQLExceptionCause(e), e);
1152        }
1153    }
1154
1155    @Override
1156    public Set<Long> getJobIdsForSnapshotDeduplicationIndex(Long harvestId) {
1157        ArgumentNotValid.checkNotNull(harvestId, "Long harvestId");
1158        Set<Long> jobIds = new HashSet<Long>();
1159        if (!isSnapshot(harvestId)) {
1160            throw new NotImplementedException("This functionality only works for snapshot harvests");
1161        }
1162        List<Long> harvestDefinitions = getPreviousFullHarvests(harvestId);
1163        try (Connection c = HarvestDBConnection.get();) {
1164            List<Long> jobs = new ArrayList<Long>();
1165            if (!harvestDefinitions.isEmpty()) {
1166                // Select all jobs from a given list of harvest definitions
1167                jobs.addAll(DBUtils.selectLongList(c, "SELECT jobs.job_id FROM jobs WHERE jobs.harvest_id IN ("
1168                        + StringUtils.conjoin(",", harvestDefinitions) + ")"));
1169            }
1170            jobIds.addAll(jobs);
1171        } catch (SQLException e) {
1172            log.error("Unable to close DB connection", e);
1173        }
1174
1175        return jobIds;
1176    }
1177
1178    /**
1179     * Get list of harvests previous to this one.
1180     *
1181     * @param thisHarvest The id of this harvestdefinition
1182     * @return a list of IDs belonging to harvests previous to this one.
1183     */
1184    private List<Long> getPreviousFullHarvests(Long thisHarvest) {
1185        List<Long> results = new ArrayList<Long>();
1186        try (Connection c = HarvestDBConnection.get();) {
1187            // Follow the chain of originating IDs back
1188            for (Long originatingHarvest = thisHarvest; originatingHarvest != null;
1189                    // Compute next originatingHarvest
1190                    originatingHarvest = DBUtils.selectFirstLongValueIfAny(c,
1191                            "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?",
1192                            originatingHarvest)) {
1193                if (!originatingHarvest.equals(thisHarvest)) {
1194                    results.add(originatingHarvest);
1195                }
1196            }
1197
1198            // Find the first harvest in the chain (but last in the list).
1199            Long firstHarvest = thisHarvest;
1200            if (!results.isEmpty()) {
1201                firstHarvest = results.get(results.size() - 1);
1202            }
1203
1204            // Find the last harvest in the chain before
1205            Long olderHarvest = DBUtils.selectFirstLongValueIfAny(c,
1206                    "SELECT fullharvests.harvest_id" + " FROM fullharvests, harvestdefinitions,"
1207                            + "  harvestdefinitions AS currenthd" + " WHERE currenthd.harvest_id=?"
1208                            + " AND fullharvests.harvest_id " + "= harvestdefinitions.harvest_id"
1209                            + " AND harvestdefinitions.submitted " + "< currenthd.submitted"
1210                            + " ORDER BY harvestdefinitions.submitted " + HarvestStatusQuery.SORT_ORDER.DESC.name(),
1211                    firstHarvest);
1212            // Follow the chain of originating IDs back
1213            for (Long originatingHarvest = olderHarvest; originatingHarvest != null; originatingHarvest = DBUtils
1214                    .selectFirstLongValueIfAny(c,
1215                            "SELECT previoushd FROM fullharvests" + " WHERE fullharvests.harvest_id=?",
1216                            originatingHarvest)) {
1217                results.add(originatingHarvest);
1218            }
1219        } catch (SQLException e) {
1220            log.warn("Exception thrown while updating fullharvests.isindexready field: {}",
1221                    ExceptionUtils.getSQLExceptionCause(e), e);
1222        }
1223        return results;
1224    }
1225
1226    @Override
1227    public void setIndexIsReady(Long harvestId, boolean newValue) {
1228        if (!isSnapshot(harvestId)) {
1229            throw new NotImplementedException("Not implemented for non snapshot harvests");
1230        } else {
1231            try (Connection c = HarvestDBConnection.get();
1232                    PreparedStatement s = c
1233                            .prepareStatement("UPDATE fullharvests SET isindexready=? WHERE harvest_id=?");) {
1234                s.setBoolean(1, newValue);
1235                s.setLong(2, harvestId);
1236                int rows = s.executeUpdate();
1237                log.debug(rows + " entries of table fullharvests updated");
1238            } catch (SQLException e) {
1239                log.warn("Exception thrown while updating fullharvests.isindexready field: {}",
1240                        ExceptionUtils.getSQLExceptionCause(e), e);
1241            }
1242        }
1243    }
1244
1245    /*
1246     * Removes the entry in harvest_configs, that binds a certain domainconfiguration to this PartialHarvest. TODO maybe
1247     * update the edition as well.
1248     */
1249    @Override
1250    public void removeDomainConfiguration(Long harvestId, SparseDomainConfiguration key) {
1251        ArgumentNotValid.checkNotNull(key, "DomainConfigurationKey key");
1252        if (harvestId == null) {
1253            // Don't need to do anything, if PartialHarvest is not
1254            // yet stored in database
1255            log.warn("No removal of domainConfiguration, " + "as harvestId is null");
1256            return;
1257        }
1258        Connection connection = HarvestDBConnection.get();
1259        PreparedStatement s = null;
1260        try {
1261            s = connection.prepareStatement(
1262                    "DELETE FROM harvest_configs WHERE harvest_id = ? " + "AND config_id = (SELECT config_id "
1263                            + " FROM configurations, domains " + "WHERE domains.name = ? AND configurations.name = ?"
1264                            + "  AND domains.domain_id = configurations.domain_id)");
1265            s.setLong(1, harvestId);
1266            s.setString(2, key.getDomainName());
1267            s.setString(3, key.getConfigurationName());
1268            s.executeUpdate();
1269        } catch (SQLException e) {
1270            log.warn("Exception thrown while removing domainconfiguration: {}", ExceptionUtils.getSQLExceptionCause(e),
1271                    e);
1272        } finally {
1273            DBUtils.closeStatementIfOpen(s);
1274            DBUtils.rollbackIfNeeded(connection,
1275                    "removing DomainConfiguration from harvest w/id " + harvestId + " failed", harvestId);
1276            HarvestDBConnection.release(connection);
1277        }
1278    }
1279
1280    @Override
1281    public void updateNextdate(long harvestId, Date nextdate) {
1282        ArgumentNotValid.checkNotNull(harvestId, "Long harvest ID");
1283        ArgumentNotValid.checkNotNull(nextdate, "Date nextdate");
1284        if (harvestId < 0) {
1285            // Don't need to do anything, if PartialHarvest is not
1286            // yet stored in database
1287            return;
1288        }
1289        Connection connection = HarvestDBConnection.get();
1290        PreparedStatement s = null;
1291        try {
1292            connection.setAutoCommit(false);
1293            s = connection.prepareStatement("UPDATE partialharvests SET nextdate = ? " + "WHERE harvest_id = ?");
1294            DBUtils.setDateMaybeNull(s, 1, nextdate);
1295            s.setLong(2, harvestId);
1296            s.executeUpdate();
1297            connection.commit();
1298        } catch (SQLException e) {
1299            log.warn("Exception thrown while updating nextdate: {}", ExceptionUtils.getSQLExceptionCause(e), e);
1300        } finally {
1301            DBUtils.closeStatementIfOpen(s);
1302            DBUtils.rollbackIfNeeded(connection, "Updating nextdate from", harvestId);
1303            HarvestDBConnection.release(connection);
1304        }
1305    }
1306
1307    @Override
1308    public void addDomainConfiguration(PartialHarvest ph, SparseDomainConfiguration dcKey) {
1309        ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph");
1310        ArgumentNotValid.checkNotNull(dcKey, "DomainConfigurationKey dcKey");
1311
1312        try (Connection connection = HarvestDBConnection.get();
1313                PreparedStatement s = connection.prepareStatement("INSERT INTO harvest_configs "
1314                        + "( harvest_id, config_id ) " + "SELECT ?, config_id FROM configurations, domains "
1315                        + "WHERE domains.name = ? AND configurations.name = ?"
1316                        + "  AND domains.domain_id = configurations.domain_id");
1317
1318        ) {
1319            s.setLong(1, ph.getOid());
1320            s.setString(2, dcKey.getDomainName());
1321            s.setString(3, dcKey.getConfigurationName());
1322            s.executeUpdate();
1323            s.close();
1324        } catch (SQLException e) {
1325            log.warn("Exception thrown while adding domainConfiguration: {}", ExceptionUtils.getSQLExceptionCause(e),
1326                    e);
1327        }
1328    }
1329
1330    @Override
1331    public void resetDomainConfigurations(PartialHarvest ph, List<DomainConfiguration> dcList) {
1332        ArgumentNotValid.checkNotNull(ph, "PartialHarvest ph");
1333        ArgumentNotValid.checkNotNull(dcList, "List<DomainConfiguration> dcList");
1334
1335        try (Connection connection = HarvestDBConnection.get()) {
1336            createHarvestConfigsEntries(connection, ph, ph.getOid());
1337        } catch (SQLException e) {
1338            log.warn("Exception thrown while resetting domainConfigurations: {}",
1339                    ExceptionUtils.getSQLExceptionCause(e), e);
1340        }
1341    }
1342
1343    @Override
1344    public void mapToHarvestChannel(long harvestDefinitionId, HarvestChannel channel) {
1345        ArgumentNotValid.checkNotNull(channel, "HarvestChannel channel");
1346
1347        try (Connection connection = HarvestDBConnection.get();
1348                PreparedStatement s = connection
1349                        .prepareStatement("UPDATE harvestdefinitions SET channel_id=? WHERE harvest_id=?");) {
1350            s.setLong(1, channel.getId());
1351            s.setLong(2, harvestDefinitionId);
1352            if (s.executeUpdate() != 1) {
1353                throw new IOFailure("Could not map harvest channel " + channel.getId() + " to harvest definition "
1354                        + harvestDefinitionId);
1355            }
1356            s.close();
1357        } catch (SQLException e) {
1358            log.warn("Exception thrown while mapping to harvest channel: {}", ExceptionUtils.getSQLExceptionCause(e),
1359                    e);
1360        }
1361    }
1362
1363    /**
1364     * Saves all extended Field values for a HarvestDefinition in the Database.
1365     *
1366     * @param c Connection to Database
1367     * @param h HarvestDefinition where loaded extended Field Values will be set
1368     * @throws SQLException If database errors occur.
1369     */
1370    private void saveExtendedFieldValues(Connection c, HarvestDefinition h) throws SQLException {
1371        List<ExtendedFieldValue> list = h.getExtendedFieldValues();
1372        for (int i = 0; i < list.size(); i++) {
1373            ExtendedFieldValue efv = list.get(i);
1374            efv.setInstanceID(h.getOid());
1375
1376            ExtendedFieldValueDBDAO dao = (ExtendedFieldValueDBDAO) ExtendedFieldValueDAO.getInstance();
1377            if (efv.getExtendedFieldValueID() != null) {
1378                dao.update(c, efv, false);
1379            } else {
1380                dao.create(c, efv, false);
1381            }
1382        }
1383    }
1384
1385    /**
1386     * Reads all extended Field values from the database for a HarvestDefinition.
1387     *
1388     * @param h HarvestDefinition where loaded extended Field Values will be set
1389     * @throws SQLException If database errors occur.
1390     */
1391    private void readExtendedFieldValues(HarvestDefinition h) throws SQLException {
1392        h.setExtendedFieldValues(getExtendedFieldValues(h.getOid()));
1393    }
1394
1395    /**
1396     * Reads all extended Field values from the database for a HarvestDefinitionOid.
1397     *
1398     * @param aOid HarvestDefinition where loaded extended Field Values will be set
1399     * @return a list of ExtendedFieldValues belonging to the given harvest oid
1400     * @throws SQLException If database errors occur.
1401     */
1402    private List<ExtendedFieldValue> getExtendedFieldValues(Long aOid) throws SQLException {
1403        List<ExtendedFieldValue> extendedFieldValues = new ArrayList<ExtendedFieldValue>();
1404
1405        ExtendedFieldDAO dao = ExtendedFieldDAO.getInstance();
1406        List<ExtendedField> list = dao.getAll(ExtendedFieldTypes.HARVESTDEFINITION);
1407
1408        for (int i = 0; i < list.size(); i++) {
1409            ExtendedField ef = list.get(i);
1410
1411            ExtendedFieldValueDAO dao2 = ExtendedFieldValueDAO.getInstance();
1412            ExtendedFieldValue efv = dao2.read(ef.getExtendedFieldID(), aOid);
1413
1414            if (efv == null) {
1415                efv = new ExtendedFieldValue();
1416                efv.setExtendedFieldID(ef.getExtendedFieldID());
1417                efv.setInstanceID(aOid);
1418                efv.setContent(
1419                        new ExtendedFieldDefaultValue(ef.getDefaultValue(), ef.getFormattingPattern(), ef.getDatatype())
1420                                .getDBValue());
1421            }
1422
1423            extendedFieldValues.add(efv);
1424        }
1425
1426        return extendedFieldValues;
1427    }
1428}