001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.sql.Connection;
027import java.sql.PreparedStatement;
028import java.sql.ResultSet;
029import java.sql.SQLException;
030import java.sql.Statement;
031import java.util.ArrayList;
032import java.util.List;
033
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import dk.netarkivet.common.exceptions.ArgumentNotValid;
038import dk.netarkivet.common.exceptions.IOFailure;
039import dk.netarkivet.common.exceptions.UnknownID;
040import dk.netarkivet.common.utils.DBUtils;
041import dk.netarkivet.common.utils.ExceptionUtils;
042
043/**
044 * A singleton giving access to global crawler traps.
045 */
046public class GlobalCrawlerTrapListDBDAO extends GlobalCrawlerTrapListDAO {
047
048    /** The logger for this class. */
049    private static final Logger log = LoggerFactory.getLogger(GlobalCrawlerTrapListDBDAO.class);
050
051    /**
052     * protected constructor of this class. Checks if any migration are needed before operation starts.
053     */
054    protected GlobalCrawlerTrapListDBDAO() {
055        Connection connection = HarvestDBConnection.get();
056        try {
057            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPEXPRESSIONS);
058            HarvesterDatabaseTables.checkVersion(connection, HarvesterDatabaseTables.GLOBALCRAWLERTRAPLISTS);
059        } finally {
060            HarvestDBConnection.release(connection);
061        }
062    }
063
064    /**
065     * Statement to select all trap lists which are either active or inactive.
066     */
067    private static final String SELECT_BY_ACTIVITY = "SELECT global_crawler_trap_list_id FROM global_crawler_trap_lists WHERE isActive = ?";
068
069    /**
070     * Returns a list of either all active or all inactive trap lists.
071     *
072     * @param isActive whether to return active or inactive lists.
073     * @return a list if global crawler trap lists.
074     */
075    private List<GlobalCrawlerTrapList> getAllByActivity(boolean isActive) {
076        List<GlobalCrawlerTrapList> result = new ArrayList<GlobalCrawlerTrapList>();
077        Connection conn = HarvestDBConnection.get();
078        PreparedStatement stmt = null;
079        try {
080            stmt = conn.prepareStatement(SELECT_BY_ACTIVITY);
081            stmt.setBoolean(1, isActive);
082            ResultSet rs = stmt.executeQuery();
083            while (rs.next()) {
084                result.add(read(rs.getInt(1)));
085            }
086            return result;
087        } catch (SQLException e) {
088            String message = "Error reading trap list\n" + ExceptionUtils.getSQLExceptionCause(e);
089            log.warn(message, e);
090            throw new UnknownID(message, e);
091        } finally {
092            DBUtils.closeStatementIfOpen(stmt);
093            HarvestDBConnection.release(conn);
094        }
095    }
096
097    @Override
098    public List<GlobalCrawlerTrapList> getAllActive() {
099        return getAllByActivity(true);
100    }
101
102    @Override
103    public List<GlobalCrawlerTrapList> getAllInActive() {
104        return getAllByActivity(false);
105    }
106
107    @Override
108    public List<String> getAllActiveTrapExpressions() {
109        Connection conn = HarvestDBConnection.get();
110        List<String> result = new ArrayList<String>();
111        PreparedStatement stmt = null;
112        try {
113            stmt = conn.prepareStatement("SELECT DISTINCT trap_expression FROM global_crawler_trap_lists, "
114                    + "global_crawler_trap_expressions " + "WHERE global_crawler_trap_list_id = "
115                    + "crawler_trap_list_id " + "AND isActive = ?");
116            stmt.setBoolean(1, true);
117            ResultSet rs = stmt.executeQuery();
118            while (rs.next()) {
119                result.add(rs.getString(1));
120            }
121            return result;
122        } catch (SQLException e) {
123            String message = "Error retrieving expressions.\n" + ExceptionUtils.getSQLExceptionCause(e);
124            log.warn(message, e);
125            throw new IOFailure(message, e);
126        } finally {
127            DBUtils.closeStatementIfOpen(stmt);
128            HarvestDBConnection.release(conn);
129        }
130    }
131
132    /**
133     * Statement to insert a new trap list.
134     */
135    private static final String INSERT_TRAPLIST_STMT = "INSERT INTO global_crawler_trap_lists (name, description, isActive)"
136            + "VALUES (?,?,?)";
137
138    /**
139     * Statement to insert a new trap expression in a given list.
140     */
141    private static final String INSERT_TRAP_EXPR_STMT = "INSERT INTO global_crawler_trap_expressions (crawler_trap_list_id, trap_expression) VALUES (?,?) ";
142
143    @Override
144    public int create(GlobalCrawlerTrapList trapList) {
145        ArgumentNotValid.checkNotNull(trapList, "trapList");
146        // Check for existence of a trapList in the database with the same name
147        // and throw argumentNotValid if not
148        if (exists(trapList.getName())) {
149            throw new ArgumentNotValid("Crawlertrap with name '" + trapList.getName() + "'already exists in database");
150        }
151        int trapId;
152        Connection conn = HarvestDBConnection.get();
153        PreparedStatement stmt = null;
154        try {
155            conn.setAutoCommit(false);
156            stmt = conn.prepareStatement(INSERT_TRAPLIST_STMT, Statement.RETURN_GENERATED_KEYS);
157            stmt.setString(1, trapList.getName());
158            stmt.setString(2, trapList.getDescription());
159            stmt.setBoolean(3, trapList.isActive());
160            stmt.executeUpdate();
161            ResultSet rs = stmt.getGeneratedKeys();
162            rs.next();
163            trapId = rs.getInt(1);
164            trapList.setId(trapId);
165            for (String expr : trapList.getTraps()) {
166                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
167                stmt.setInt(1, trapId);
168                stmt.setString(2, expr);
169                stmt.executeUpdate();
170            }
171            conn.commit();
172        } catch (SQLException e) {
173            String message = "SQL error creating global crawler trap list \n" + ExceptionUtils.getSQLExceptionCause(e);
174            log.warn(message, e);
175            throw new IOFailure(message, e);
176        } finally {
177            DBUtils.closeStatementIfOpen(stmt);
178            DBUtils.rollbackIfNeeded(conn, "create trap list", trapList);
179            HarvestDBConnection.release(conn);
180        }
181        return trapId;
182    }
183
184    /**
185     * Statement to delete a trap list.
186     */
187    private static final String DELETE_TRAPLIST_STMT = "DELETE from global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
188
189    /**
190     * Statement to delete all expressions in a given trap list.
191     */
192    private static final String DELETE_EXPR_STMT = "DELETE FROM global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
193
194    @Override
195    public void delete(int id) {
196        Connection conn = HarvestDBConnection.get();
197        PreparedStatement stmt = null;
198        try {
199            conn.setAutoCommit(false);
200            // First delete the list.
201            stmt = conn.prepareStatement(DELETE_TRAPLIST_STMT);
202            stmt.setInt(1, id);
203            stmt.executeUpdate();
204            stmt.close();
205            // Then delete all its expressions.
206            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
207            stmt.setInt(1, id);
208            stmt.executeUpdate();
209            conn.commit();
210        } catch (SQLException e) {
211            String message = "Error deleting trap list: '" + id + "'\n" + ExceptionUtils.getSQLExceptionCause(e);
212            log.warn(message, e);
213            throw new UnknownID(message, e);
214        } finally {
215            DBUtils.closeStatementIfOpen(stmt);
216            DBUtils.rollbackIfNeeded(conn, "delete trap list", id);
217            HarvestDBConnection.release(conn);
218        }
219    }
220
221    /**
222     * Statement to update the elementary properties of a trap list.
223     */
224    private static final String LIST_UPDATE_STMT = "UPDATE global_crawler_trap_lists SET name = ?, description = ?, isActive = ? WHERE global_crawler_trap_list_id = ?";
225
226    /**
227     * Update a trap list. In order to update the trap expressions for this list, we first delete all the existing trap
228     * expressions for the list then add all those in the updated version.
229     *
230     * @param trapList the trap list to update
231     */
232    @Override
233    public void update(GlobalCrawlerTrapList trapList) {
234        ArgumentNotValid.checkNotNull(trapList, "trapList");
235        Connection conn = HarvestDBConnection.get();
236        PreparedStatement stmt = null;
237        try {
238            conn.setAutoCommit(false);
239            stmt = conn.prepareStatement(LIST_UPDATE_STMT);
240            stmt.setString(1, trapList.getName());
241            stmt.setString(2, trapList.getDescription());
242            stmt.setBoolean(3, trapList.isActive());
243            stmt.setInt(4, trapList.getId());
244            stmt.executeUpdate();
245            stmt.close();
246
247            // Delete all the trap expressions.
248            stmt = conn.prepareStatement(DELETE_EXPR_STMT);
249            stmt.setInt(1, trapList.getId());
250            stmt.executeUpdate();
251            stmt.close();
252            // Add the new trap expressions one by one.
253            for (String expr : trapList.getTraps()) {
254                stmt = conn.prepareStatement(INSERT_TRAP_EXPR_STMT);
255                stmt.setInt(1, trapList.getId());
256                stmt.setString(2, expr);
257                stmt.executeUpdate();
258                stmt.close();
259            }
260            conn.commit();
261        } catch (SQLException e) {
262            String message = "Error updating trap list :'" + trapList.getId() + "'\n"
263                    + ExceptionUtils.getSQLExceptionCause(e);
264            log.warn(message, e);
265            throw new UnknownID(message, e);
266        } finally {
267            DBUtils.closeStatementIfOpen(stmt);
268            DBUtils.rollbackIfNeeded(conn, "update trap list", trapList);
269            HarvestDBConnection.release(conn);
270        }
271    }
272
273    /**
274     * Statement to read the elementary properties of a trap list.
275     */
276    private static final String SELECT_TRAPLIST_STMT = "SELECT name, description, isActive FROM global_crawler_trap_lists WHERE global_crawler_trap_list_id = ?";
277
278    /**
279     * Statement to read the trap expressions for a trap list.
280     */
281    private static final String SELECT_TRAP_EXPRESSIONS_STMT = "SELECT trap_expression from global_crawler_trap_expressions WHERE crawler_trap_list_id = ?";
282
283    @Override
284    public GlobalCrawlerTrapList read(int id) {
285        Connection conn = HarvestDBConnection.get();
286        PreparedStatement stmt = null;
287        try {
288            stmt = conn.prepareStatement(SELECT_TRAPLIST_STMT);
289            stmt.setInt(1, id);
290            ResultSet rs = stmt.executeQuery();
291            if (!rs.next()) {
292                throw new UnknownID("No such GlobalCrawlerTrapList: '" + id + "'");
293            }
294            String name = rs.getString("name");
295            String description = rs.getString("description");
296            boolean isActive = rs.getBoolean("isActive");
297            stmt.close();
298            stmt = conn.prepareStatement(SELECT_TRAP_EXPRESSIONS_STMT);
299            stmt.setInt(1, id);
300            rs = stmt.executeQuery();
301            List<String> exprs = new ArrayList<String>();
302            while (rs.next()) {
303                exprs.add(rs.getString("trap_expression"));
304            }
305            return new GlobalCrawlerTrapList(id, exprs, name, description, isActive);
306        } catch (SQLException e) {
307            String message = "Error retrieving trap list for id '" + id + "'\n"
308                    + ExceptionUtils.getSQLExceptionCause(e);
309            log.warn(message, e);
310            throw new IOFailure(message, e);
311        } finally {
312            DBUtils.closeStatementIfOpen(stmt);
313            HarvestDBConnection.release(conn);
314        }
315    }
316
317    /**
318     * Statement to read the elementary properties of a trap list.
319     */
320    private static final String EXISTS_TRAPLIST_STMT = "SELECT * FROM global_crawler_trap_lists WHERE name = ?";
321
322    /**
323     * Does crawlertrap with this name already exist.
324     *
325     * @param name The name for a crawlertrap
326     * @return true, if a crawlertrap with the given name already exists in the database; otherwise false
327     */
328    @Override
329    public boolean exists(String name) {
330        ArgumentNotValid.checkNotNullOrEmpty(name, "name");
331        Connection conn = HarvestDBConnection.get();
332        PreparedStatement stmt = null;
333        boolean exists = false;
334        try {
335            stmt = conn.prepareStatement(EXISTS_TRAPLIST_STMT);
336            stmt.setString(1, name);
337            ResultSet rs = stmt.executeQuery();
338            if (rs.next()) {
339                exists = true;
340            }
341
342            return exists;
343        } catch (SQLException e) {
344            String message = "Error checking for existence of trap list " + "with name '" + name + "'\n"
345                    + ExceptionUtils.getSQLExceptionCause(e);
346            log.warn(message, e);
347            throw new IOFailure(message, e);
348        } finally {
349            DBUtils.closeStatementIfOpen(stmt);
350            HarvestDBConnection.release(conn);
351        }
352    }
353
354    /**
355     * Reads a list of all active global crawler trap expressions from the database and adds them to the crawl template
356     * for this job.
357     * @param orderXmlDoc The template to add crawlertraps to.
358     */
359    public void addGlobalCrawlerTraps(HeritrixTemplate orderXmlDoc) {
360        //System.out.println("Calling addGlobalCrawlerTraps w/arg: " + orderXmlDoc);
361    
362        GlobalCrawlerTrapListDAO dao = GlobalCrawlerTrapListDAO.getInstance();
363        orderXmlDoc.insertCrawlerTraps(Constants.GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME,
364                                       dao.getAllActiveTrapExpressions());
365    } 
366}