001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.sql.Connection;
027import java.sql.PreparedStatement;
028import java.sql.ResultSet;
029import java.sql.SQLException;
030import java.sql.Statement;
031import java.util.ArrayList;
032import java.util.Arrays;
033import java.util.List;
034
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import dk.netarkivet.common.exceptions.ArgumentNotValid;
039import dk.netarkivet.common.exceptions.IOFailure;
040import dk.netarkivet.common.exceptions.UnknownID;
041import dk.netarkivet.common.utils.DBUtils;
042import dk.netarkivet.common.utils.ExceptionUtils;
043
044/**
045 * A singleton giving access to global crawler traps.
046 */
047public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO {
048
049    /** The logger for this class. */
050    private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class);
051
052    /**
053     * protected constructor of this class. Checks if any migration are needed before operation starts.
054     */
055    protected GlobalCrawlerTrapListDBDAO() {
056        Connection connection = HarvestDBConnection.get();
057        try {
058            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS);
059            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS);
060        } finally {
061            HarvestDBConnection.release(connection);
062        }
063    }
064
065    /**
066     * Statement to select all trap lists which are either active or inactive.
067     */
068    private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?";
069
070    /**
071     * Returns a list of either all active or all inactive trap lists.
072     *
073     * @param isActive whether to return active or inactive lists.
074     * @return a list if global crawler trap lists.
075     */
076    private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) {
077        List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>();
078        Connection conn = HarvestDBConnection.get();
079        PreparedStatement stmt = null;
080        try {
081            stmt = conn.prepareStatement(SELECT_BY_ACTIVITY);
082            stmt.setBoolean(1, isActive);
083            ResultSet rs = stmt.executeQuery();
084            while (rs.next()) {
085                result.add(read(rs.getInt(1)));
086            }
087            return result;
088        } catch (SQLException e) {
089            String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e);
090            log.warn(message, e);
091            throw new UnknownID(message, e);
092        } finally {
093            DBUtils.closeStatementIfOpen(stmt);
094            HarvestDBConnection.release(conn);
095        }
096    }
097
098    @Override
099    public List<GlobalCrawlerTrapList> getAllActive() {
100        return getAllByActivity(true);
101    }
102
103    @Override
104    public List<GlobalCrawlerTrapList> getAllInActive() {
105        return getAllByActivity(false);
106    }
107
108    @Override
109    public List<String> getAllActiveTrapExpressions() {
110        Connection conn = HarvestDBConnection.get();
111        List<String> result = new ArrayList<String>();
112        PreparedStatement stmt = null;
113        try {
114            stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, "
115                    + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = "
116                    + "crawler_trap_list_id " + "AND isActive = ?");
117            stmt.setBoolean(1, true);
118            ResultSet rs = stmt.executeQuery();
119            while (rs.next()) {
120                result.add(rs.getString(1));
121            }
122            return result;
123        } catch (SQLException e) {
124            String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e);
125            log.warn(message, e);
126            throw new IOFailure(message, e);
127        } finally {
128            DBUtils.closeStatementIfOpen(stmt);
129            HarvestDBConnection.release(conn);
130        }
131    }
132
133    /**
134     * Statement to insert a new trap list.
135     */
136    private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)"
137            + "VALUES (?,?,?)";
138
139    /**
140     * Statement to insert a new trap expression in a given list.
141     */
142    private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) ";
143
144    @Override
145    public int create(GlobalCrawlerTrapList trapList) {
146        ArgumentNotValid.checkNotNull(trapList, "trapList");
147        // Check for existence of a trapList in the database with the same name
148        // and throw argumentNotValid if not
149        if (exists(trapList.getName())) {
150            throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database");
151        }
152        int trapId;
153        Connection conn = HarvestDBConnection.get();
154        PreparedStatement stmt = null;
155        try {
156            conn.setAutoCommit(false);
157            stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS);
158            stmt.setString(1, trapList.getName());
159            stmt.setString(2, trapList.getDescription());
160            stmt.setBoolean(3, trapList.isActive());
161            stmt.executeUpdate();
162            ResultSet rs = stmt.getGeneratedKeys();
163            rs.next();
164            trapId = rs.getInt(1);
165            trapList.setId(trapId);
166            log.debug("Inserting {} traps into database for list {} (trapId={})", trapList.getTraps().size(), trapList.getName(), trapId);
167            for (String expr : trapList.getTraps()) {
168                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
169                stmt.setInt(1, trapId);
170                stmt.setString(2, expr);
171                stmt.executeUpdate();
172            }
173            conn.commit();
174        } catch (SQLException e) {
175            String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e);
176            log.warn(message, e);
177            throw new IOFailure(message, e);
178        } finally {
179            DBUtils.closeStatementIfOpen(stmt);
180            DBUtils.rollbackIfNeeded(conn, "create trap list", trapList);
181            HarvestDBConnection.release(conn);
182        }
183        return trapId;
184    }
185
186    /**
187     * Statement to delete a trap list.
188     */
189    private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
190
191    /**
192     * Statement to delete all expressions in a given trap list.
193     */
194    private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
195
196    @Override
197    public void delete(int id) {
198        Connection conn = HarvestDBConnection.get();
199        PreparedStatement stmt = null;
200        try {
201            conn.setAutoCommit(false);
202            // First delete the list.
203            stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT);
204            stmt.setInt(1, id);
205            stmt.executeUpdate();
206            stmt.close();
207            // Then delete all its expressions.
208            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
209            stmt.setInt(1, id);
210            stmt.executeUpdate();
211            conn.commit();
212        } catch (SQLException e) {
213            String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e);
214            log.warn(message, e);
215            throw new UnknownID(message, e);
216        } finally {
217            DBUtils.closeStatementIfOpen(stmt);
218            DBUtils.rollbackIfNeeded(conn, "delete trap list", id);
219            HarvestDBConnection.release(conn);
220        }
221    }
222
223    /**
224     * Statement to update the elementary properties of a trap list.
225     */
226    private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?";
227
228    /**
229     * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap
230     * expressions for the list then add all those in the updated version.
231     *
232     * @param trapList the trap list to update
233     */
234    @Override
235    public void update(GlobalCrawlerTrapList trapList) {
236        ArgumentNotValid.checkNotNull(trapList, "trapList");
237        Connection conn = HarvestDBConnection.get();
238        PreparedStatement stmt = null;
239        try {
240            conn.setAutoCommit(false);
241            stmt = conn.prepareStatement(LIST_UPDATE_STMT);
242            stmt.setString(1, trapList.getName());
243            stmt.setString(2, trapList.getDescription());
244            stmt.setBoolean(3, trapList.isActive());
245            stmt.setInt(4, trapList.getId());
246            stmt.executeUpdate();
247            stmt.close();
248
249            // Delete all the trap expressions.
250            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
251            stmt.setInt(1, trapList.getId());
252            stmt.executeUpdate();
253            stmt.close();
254            // Add the new trap expressions one by one.
255            for (String expr : trapList.getTraps()) {
256                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
257                stmt.setInt(1, trapList.getId());
258                stmt.setString(2, expr);
259                stmt.executeUpdate();
260                stmt.close();
261            }
262            conn.commit();
263        } catch (SQLException e) {
264            String message = "Error updating trap list :'" + trapList.getId() + "'\n"
265                    + ExceptionUtils.getSQLExceptionCause(e);
266            log.warn(message, e);
267            throw new UnknownID(message, e);
268        } finally {
269            DBUtils.closeStatementIfOpen(stmt);
270            DBUtils.rollbackIfNeeded(conn, "update trap list", trapList);
271            HarvestDBConnection.release(conn);
272        }
273    }
274
275    /**
276     * Statement to read the elementary properties of a trap list.
277     */
278    private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
279
280    /**
281     * Statement to read the trap expressions for a trap list.
282     */
283    private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
284
285    @Override
286    public GlobalCrawlerTrapList read(int id) {
287        Connection conn = HarvestDBConnection.get();
288        PreparedStatement stmt = null;
289        try {
290            stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT);
291            stmt.setInt(1, id);
292            ResultSet rs = stmt.executeQuery();
293            if (!rs.next()) {
294                throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'");
295            }
296            String name = rs.getString("name");
297            String description = rs.getString("description");
298            boolean isActive = rs.getBoolean("isActive");
299            stmt.close();
300            stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT);
301            stmt.setInt(1, id);
302            rs = stmt.executeQuery();
303            List<String> exprs = new ArrayList<String>();
304            while (rs.next()) {
305                exprs.add(rs.getString("trap_expression"));
306            }
307            return new GlobalCrawlerTrapList(id, exprs, name, description, isActive);
308        } catch (SQLException e) {
309            String message = "Error retrieving trap list for id '" + id + "'\n"
310                    + ExceptionUtils.getSQLExceptionCause(e);
311            log.warn(message, e);
312            throw new IOFailure(message, e);
313        } finally {
314            DBUtils.closeStatementIfOpen(stmt);
315            HarvestDBConnection.release(conn);
316        }
317    }
318
319    /**
320     * Statement to read the elementary properties of a trap list.
321     */
322    private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?";
323
324    /**
325     * Does crawlertrap with this name already exist.
326     *
327     * @param name The name for a crawlertrap
328     * @return true, if a crawlertrap with the given name already exists in the database; otherwise false
329     */
330    @Override
331    public boolean exists(String name) {
332        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
333        Connection conn = HarvestDBConnection.get();
334        PreparedStatement stmt = null;
335        boolean exists = false;
336        try {
337            stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT);
338            stmt.setString(1, name);
339            ResultSet rs = stmt.executeQuery();
340            if (rs.next()) {
341                exists = true;
342            }
343
344            return exists;
345        } catch (SQLException e) {
346            String message = "Error checking for existence of trap list " + "with name '" + name + "'\n"
347                    + ExceptionUtils.getSQLExceptionCause(e);
348            log.warn(message, e);
349            throw new IOFailure(message, e);
350        } finally {
351            DBUtils.closeStatementIfOpen(stmt);
352            HarvestDBConnection.release(conn);
353        }
354    }
355
356    /**
357     * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template
358     * for this job.
359     * @param orderXmlDoc The template to add crawlertraps to.
360     */
361    public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) {
362    
363        GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance();
364        dao.getAllActive().stream().forEach(globalCrawlerTrapList -> {
365            orderXmlDoc.insertCrawlerTraps(globalCrawlerTrapList.getName(),
366                    new ArrayList<>(globalCrawlerTrapList.getTraps()));
367        });
368
369    } 
370}