001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.sql.Connection;
027import java.sql.PreparedStatement;
028import java.sql.ResultSet;
029import java.sql.SQLException;
030import java.sql.Statement;
031import java.util.ArrayList;
032import java.util.List;
033
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import dk.netarkivet.common.exceptions.ArgumentNotValid;
038import dk.netarkivet.common.exceptions.IOFailure;
039import dk.netarkivet.common.exceptions.UnknownID;
040import dk.netarkivet.common.utils.DBUtils;
041import dk.netarkivet.common.utils.ExceptionUtils;
042
043/**
044 * A singleton giving access to global crawler traps.
045 */
046public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO {
047
048    /** The logger for this class. */
049    private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class);
050
051    /**
052     * protected constructor of this class. Checks if any migration are needed before operation starts.
053     */
054    protected GlobalCrawlerTrapListDBDAO() {
055        Connection connection = HarvestDBConnection.get();
056        try {
057            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS);
058            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS);
059        } finally {
060            HarvestDBConnection.release(connection);
061        }
062    }
063
064    /**
065     * Statement to select all trap lists which are either active or inactive.
066     */
067    private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?";
068
069    /**
070     * Returns a list of either all active or all inactive trap lists.
071     *
072     * @param isActive whether to return active or inactive lists.
073     * @return a list if global crawler trap lists.
074     */
075    private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) {
076        List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>();
077        Connection conn = HarvestDBConnection.get();
078        PreparedStatement stmt = null;
079        try {
080            stmt = conn.prepareStatement(SELECT_BY_ACTIVITY);
081            stmt.setBoolean(1, isActive);
082            ResultSet rs = stmt.executeQuery();
083            while (rs.next()) {
084                result.add(read(rs.getInt(1)));
085            }
086            return result;
087        } catch (SQLException e) {
088            String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e);
089            log.warn(message, e);
090            throw new UnknownID(message, e);
091        } finally {
092            DBUtils.closeStatementIfOpen(stmt);
093            HarvestDBConnection.release(conn);
094        }
095    }
096
097    @Override
098    public List<GlobalCrawlerTrapList> getAllActive() {
099        return getAllByActivity(true);
100    }
101
102    @Override
103    public List<GlobalCrawlerTrapList> getAllInActive() {
104        return getAllByActivity(false);
105    }
106
107    @Override
108    public List<String> getAllActiveTrapExpressions() {
109        Connection conn = HarvestDBConnection.get();
110        List<String> result = new ArrayList<String>();
111        PreparedStatement stmt = null;
112        try {
113            stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, "
114                    + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = "
115                    + "crawler_trap_list_id " + "AND isActive = ?");
116            stmt.setBoolean(1, true);
117            ResultSet rs = stmt.executeQuery();
118            while (rs.next()) {
119                result.add(rs.getString(1));
120            }
121            return result;
122        } catch (SQLException e) {
123            String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e);
124            log.warn(message, e);
125            throw new IOFailure(message, e);
126        } finally {
127            DBUtils.closeStatementIfOpen(stmt);
128            HarvestDBConnection.release(conn);
129        }
130    }
131
132    /**
133     * Statement to insert a new trap list.
134     */
135    private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)"
136            + "VALUES (?,?,?)";
137
138    /**
139     * Statement to insert a new trap expression in a given list.
140     */
141    private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) ";
142
143    @Override
144    public int create(GlobalCrawlerTrapList trapList) {
145        ArgumentNotValid.checkNotNull(trapList, "trapList");
146        // Check for existence of a trapList in the database with the same name
147        // and throw argumentNotValid if not
148        if (exists(trapList.getName())) {
149            throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database");
150        }
151        int trapId;
152        Connection conn = HarvestDBConnection.get();
153        PreparedStatement stmt = null;
154        try {
155            conn.setAutoCommit(false);
156            stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS);
157            stmt.setString(1, trapList.getName());
158            stmt.setString(2, trapList.getDescription());
159            stmt.setBoolean(3, trapList.isActive());
160            stmt.executeUpdate();
161            ResultSet rs = stmt.getGeneratedKeys();
162            rs.next();
163            trapId = rs.getInt(1);
164            trapList.setId(trapId);
165            log.debug("Inserting {} traps into database for list {} (trapId={})", trapList.getTraps().size(), trapList.getName(), trapId);
166            for (String expr : trapList.getTraps()) {
167                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
168                stmt.setInt(1, trapId);
169                stmt.setString(2, expr);
170                stmt.executeUpdate();
171            }
172            conn.commit();
173        } catch (SQLException e) {
174            String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e);
175            log.warn(message, e);
176            throw new IOFailure(message, e);
177        } finally {
178            DBUtils.closeStatementIfOpen(stmt);
179            DBUtils.rollbackIfNeeded(conn, "create trap list", trapList);
180            HarvestDBConnection.release(conn);
181        }
182        return trapId;
183    }
184
185    /**
186     * Statement to delete a trap list.
187     */
188    private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
189
190    /**
191     * Statement to delete all expressions in a given trap list.
192     */
193    private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
194
195    @Override
196    public void delete(int id) {
197        Connection conn = HarvestDBConnection.get();
198        PreparedStatement stmt = null;
199        try {
200            conn.setAutoCommit(false);
201            // First delete the list.
202            stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT);
203            stmt.setInt(1, id);
204            stmt.executeUpdate();
205            stmt.close();
206            // Then delete all its expressions.
207            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
208            stmt.setInt(1, id);
209            stmt.executeUpdate();
210            conn.commit();
211        } catch (SQLException e) {
212            String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e);
213            log.warn(message, e);
214            throw new UnknownID(message, e);
215        } finally {
216            DBUtils.closeStatementIfOpen(stmt);
217            DBUtils.rollbackIfNeeded(conn, "delete trap list", id);
218            HarvestDBConnection.release(conn);
219        }
220    }
221
222    /**
223     * Statement to update the elementary properties of a trap list.
224     */
225    private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?";
226
227    /**
228     * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap
229     * expressions for the list then add all those in the updated version.
230     *
231     * @param trapList the trap list to update
232     */
233    @Override
234    public void update(GlobalCrawlerTrapList trapList) {
235        ArgumentNotValid.checkNotNull(trapList, "trapList");
236        Connection conn = HarvestDBConnection.get();
237        PreparedStatement stmt = null;
238        try {
239            conn.setAutoCommit(false);
240            stmt = conn.prepareStatement(LIST_UPDATE_STMT);
241            stmt.setString(1, trapList.getName());
242            stmt.setString(2, trapList.getDescription());
243            stmt.setBoolean(3, trapList.isActive());
244            stmt.setInt(4, trapList.getId());
245            stmt.executeUpdate();
246            stmt.close();
247
248            // Delete all the trap expressions.
249            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
250            stmt.setInt(1, trapList.getId());
251            stmt.executeUpdate();
252            stmt.close();
253            // Add the new trap expressions one by one.
254            for (String expr : trapList.getTraps()) {
255                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
256                stmt.setInt(1, trapList.getId());
257                stmt.setString(2, expr);
258                stmt.executeUpdate();
259                stmt.close();
260            }
261            conn.commit();
262        } catch (SQLException e) {
263            String message = "Error updating trap list :'" + trapList.getId() + "'\n"
264                    + ExceptionUtils.getSQLExceptionCause(e);
265            log.warn(message, e);
266            throw new UnknownID(message, e);
267        } finally {
268            DBUtils.closeStatementIfOpen(stmt);
269            DBUtils.rollbackIfNeeded(conn, "update trap list", trapList);
270            HarvestDBConnection.release(conn);
271        }
272    }
273
274    /**
275     * Statement to read the elementary properties of a trap list.
276     */
277    private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
278
279    /**
280     * Statement to read the trap expressions for a trap list.
281     */
282    private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
283
284    @Override
285    public GlobalCrawlerTrapList read(int id) {
286        Connection conn = HarvestDBConnection.get();
287        PreparedStatement stmt = null;
288        try {
289            stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT);
290            stmt.setInt(1, id);
291            ResultSet rs = stmt.executeQuery();
292            if (!rs.next()) {
293                throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'");
294            }
295            String name = rs.getString("name");
296            String description = rs.getString("description");
297            boolean isActive = rs.getBoolean("isActive");
298            stmt.close();
299            stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT);
300            stmt.setInt(1, id);
301            rs = stmt.executeQuery();
302            List<String> exprs = new ArrayList<String>();
303            while (rs.next()) {
304                exprs.add(rs.getString("trap_expression"));
305            }
306            return new GlobalCrawlerTrapList(id, exprs, name, description, isActive);
307        } catch (SQLException e) {
308            String message = "Error retrieving trap list for id '" + id + "'\n"
309                    + ExceptionUtils.getSQLExceptionCause(e);
310            log.warn(message, e);
311            throw new IOFailure(message, e);
312        } finally {
313            DBUtils.closeStatementIfOpen(stmt);
314            HarvestDBConnection.release(conn);
315        }
316    }
317
318    /**
319     * Statement to read the elementary properties of a trap list.
320     */
321    private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?";
322
323    /**
324     * Does crawlertrap with this name already exist.
325     *
326     * @param name The name for a crawlertrap
327     * @return true, if a crawlertrap with the given name already exists in the database; otherwise false
328     */
329    @Override
330    public boolean exists(String name) {
331        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
332        Connection conn = HarvestDBConnection.get();
333        PreparedStatement stmt = null;
334        boolean exists = false;
335        try {
336            stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT);
337            stmt.setString(1, name);
338            ResultSet rs = stmt.executeQuery();
339            if (rs.next()) {
340                exists = true;
341            }
342
343            return exists;
344        } catch (SQLException e) {
345            String message = "Error checking for existence of trap list " + "with name '" + name + "'\n"
346                    + ExceptionUtils.getSQLExceptionCause(e);
347            log.warn(message, e);
348            throw new IOFailure(message, e);
349        } finally {
350            DBUtils.closeStatementIfOpen(stmt);
351            HarvestDBConnection.release(conn);
352        }
353    }
354
355    /**
356     * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template
357     * for this job.
358     * @param orderXmlDoc The template to add crawlertraps to.
359     */
360    public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) {
361    
362        GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance();
363        orderXmlDoc.insertCrawlerTraps(Constants.GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME,
364                                       dao.getAllActiveTrapExpressions());
365    } 
366}