001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.sql.Connection; 027import java.sql.PreparedStatement; 028import java.sql.SQLException; 029 030import org.slf4j.Logger; 031import org.slf4j.LoggerFactory; 032 033import dk.netarkivet.common.exceptions.ArgumentNotValid; 034import dk.netarkivet.common.exceptions.IOFailure; 035import dk.netarkivet.common.utils.DBUtils; 036 037/** 038 * Derby-specific implementation of DB methods. 039 */ 040public abstract class DerbySpecifics extends DBSpecifics { 041 042 private static final Logger log = LoggerFactory.getLogger(DerbySpecifics.class); 043 044 /** 045 * Get a temporary table for short-time use. The table should be disposed of with dropTemporaryTable. The table has 046 * two columns domain_name varchar(Constants.MAX_NAME_SIZE) + config_name varchar(Constants.MAX_NAME_SIZE) All rows 047 * in the table must be deleted at commit or rollback. 048 * 049 * @param c The DB connection to use. 050 * @return The name of the created table 051 * @throws SQLException if there is a problem getting the table. 052 */ 053 public String getJobConfigsTmpTable(Connection c) throws SQLException { 054 ArgumentNotValid.checkNotNull(c, "Connection c"); 055 PreparedStatement s = c.prepareStatement("DECLARE GLOBAL TEMPORARY TABLE jobconfignames " 056 + "( domain_name varchar(" + Constants.MAX_NAME_SIZE + "), " + " config_name varchar(" 057 + Constants.MAX_NAME_SIZE + ") )" + " ON COMMIT DELETE ROWS NOT LOGGED ON ROLLBACK DELETE ROWS"); 058 s.execute(); 059 s.close(); 060 return "session.jobconfignames"; 061 } 062 063 /** 064 * Dispose of a temporary table gotten with getTemporaryTable. This can be expected to be called from within a 065 * finally clause, so it mustn't throw exceptions. 066 * 067 * @param c The DB connection to use. 068 * @param tableName The name of the temporary table 069 */ 070 public void dropJobConfigsTmpTable(Connection c, String tableName) { 071 ArgumentNotValid.checkNotNull(c, "Connection c"); 072 ArgumentNotValid.checkNotNullOrEmpty(tableName, "String tableName"); 073 PreparedStatement s = null; 074 try { 075 // Now drop the temporary table 076 s = c.prepareStatement("DROP TABLE " + tableName); 077 s.execute(); 078 } catch (SQLException e) { 079 log.warn("Couldn't drop temporary table {}\n", tableName, e); 080 } 081 } 082 083 @Override 084 public String getOrderByLimitAndOffsetSubClause(long limit, long offset) { 085 // LIMIT sub-clause supported by Derby 10.5.3 086 // see http://db.apache.org/derby/docs/10.5/ref/rrefsqljoffsetfetch.html 087 return "OFFSET " + offset + " ROWS FETCH NEXT " + limit + " ROWS ONLY"; 088 } 089 090 @Override 091 public boolean supportsClob() { 092 return true; 093 } 094 095 /** 096 * Migrates the 'jobs' table from version 3 to version 4 consisting of a change of the field forcemaxbytes from int 097 * to bigint and setting its default to -1. Furthermore the default value for field num_configs is set to 0. 098 * 099 * @throws IOFailure in case of problems in interacting with the database 100 */ 101 protected synchronized void migrateJobsv3tov4() { 102 // Change the forcemaxbytes from 'int' to 'bigint'. 103 // Procedure for changing the datatype of a derby table was found here: 104 // https://issues.apache.org/jira/browse/DERBY-1515 105 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN forcemaxbytes_new bigint NOT NULL DEFAULT -1", 106 "UPDATE jobs SET forcemaxbytes_new = forcemaxbytes", "ALTER TABLE jobs DROP COLUMN forcemaxbytes", 107 "RENAME COLUMN jobs.forcemaxbytes_new TO forcemaxbytes", 108 "ALTER TABLE jobs ALTER COLUMN num_configs SET DEFAULT 0"}; 109 HarvestDBConnection.updateTable("jobs", 4, sqlStatements); 110 } 111 112 /** 113 * Migrates the 'jobs' table from version 4 to version 5 consisting of adding new fields 'resubmitted_as_job' and 114 * 'submittedDate'. 115 * 116 * @throws IOFailure in case of problems in interacting with the database 117 */ 118 protected synchronized void migrateJobsv4tov5() { 119 // Update jobs table to version 5 120 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN submitteddate timestamp", 121 "ALTER TABLE jobs ADD COLUMN resubmitted_as_job bigint"}; 122 HarvestDBConnection.updateTable("jobs", 5, sqlStatements); 123 } 124 125 /** 126 * Migrates the 'configurations' table from version 3 to version 4. This consists of altering the default value of 127 * field 'maxbytes' to -1. 128 */ 129 protected synchronized void migrateConfigurationsv3ov4() { 130 // Update configurations table to version 4 131 String[] sqlStatements = {"ALTER TABLE configurations ALTER maxbytes WITH DEFAULT -1"}; 132 HarvestDBConnection.updateTable("configurations", 4, sqlStatements); 133 } 134 135 /** 136 * Migrates the 'fullharvests' table from version 2 to version 3. This consists of altering the default value of 137 * field 'maxbytes' to -1. 138 */ 139 protected synchronized void migrateFullharvestsv2tov3() { 140 // Update fullharvests table to version 3 141 String[] sqlStatements = {"ALTER TABLE fullharvests ALTER maxbytes WITH DEFAULT -1"}; 142 HarvestDBConnection.updateTable("fullharvests", 3, sqlStatements); 143 } 144 145 @Override 146 protected void createGlobalCrawlerTrapLists() { 147 String createStatement = "CREATE TABLE global_crawler_trap_lists(" 148 + " global_crawler_trap_list_id INT NOT NULL " + "GENERATED ALWAYS AS IDENTITY PRIMARY KEY," 149 + " name VARCHAR(300) NOT NULL UNIQUE, " + " description VARCHAR(30000)," 150 + " isActive INT NOT NULL) "; 151 HarvestDBConnection.updateTable("global_crawler_trap_lists", 1, createStatement); 152 } 153 154 @Override 155 protected void createGlobalCrawlerTrapExpressions() { 156 String createStatement = "CREATE TABLE global_crawler_trap_expressions(" 157 + " crawler_trap_list_id INT NOT NULL, " + " trap_expression VARCHAR(1000)," 158 + " PRIMARY KEY (crawler_trap_list_id, " + "trap_expression))"; 159 HarvestDBConnection.updateTable("global_crawler_trap_expressions", 1, createStatement); 160 } 161 162 @Override 163 public void createFrontierReportMonitorTable() { 164 String createStatement = "CREATE TABLE frontierReportMonitor (" + " jobId bigint NOT NULL, " 165 + "filterId varchar(200) NOT NULL," + "tstamp timestamp NOT NULL," 166 + "domainName varchar(300) NOT NULL," + "currentSize bigint NOT NULL," 167 + "totalEnqueues bigint NOT NULL," + "sessionBalance bigint NOT NULL," + "lastCost numeric NOT NULL," 168 + "averageCost numeric NOT NULL," + "lastDequeueTime varchar(100) NOT NULL," 169 + "wakeTime varchar(100) NOT NULL," + "totalSpend bigint NOT NULL," + "totalBudget bigint NOT NULL," 170 + "errorCount bigint NOT NULL," + "lastPeekUri varchar(1000) NOT NULL," 171 + "lastQueuedUri varchar(1000) NOT NULL," + "UNIQUE (jobId, filterId, domainName)" + ")"; 172 HarvestDBConnection.updateTable("frontierreportmonitor", 1, createStatement); 173 } 174 175 @Override 176 public void createRunningJobsHistoryTable() { 177 String createStatement = "CREATE TABLE runningJobsHistory (" + "jobId bigint NOT NULL, " 178 + "harvestName varchar(300) NOT NULL," + "hostUrl varchar(300) NOT NULL," 179 + "progress numeric NOT NULL," + "queuedFilesCount bigint NOT NULL," 180 + "totalQueuesCount bigint NOT NULL," + "activeQueuesCount bigint NOT NULL," 181 + "exhaustedQueuesCount bigint NOT NULL," + "elapsedSeconds bigint NOT NULL," 182 + "alertsCount bigint NOT NULL," + "downloadedFilesCount bigint NOT NULL," 183 + "currentProcessedKBPerSec int NOT NULL," + "processedKBPerSec int NOT NULL," 184 + "currentProcessedDocsPerSec numeric NOT NULL," + "processedDocsPerSec numeric NOT NULL," 185 + "activeToeCount integer NOT NULL," + "status int NOT NULL," + "tstamp timestamp NOT NULL," 186 + "PRIMARY KEY (jobId, harvestName, elapsedSeconds, tstamp)" + ")"; 187 HarvestDBConnection.updateTable("runningjobshistory", 1, createStatement); 188 189 Connection c = HarvestDBConnection.get(); 190 try { 191 DBUtils.executeSQL(c, "CREATE INDEX runningJobsHistoryCrawlJobId on runningJobsHistory (jobId)", 192 "CREATE INDEX runningJobsHistoryCrawlTime on runningJobsHistory (elapsedSeconds)", 193 "CREATE INDEX runningJobsHistoryHarvestName on runningJobsHistory (harvestName)"); 194 } finally { 195 HarvestDBConnection.release(c); 196 } 197 } 198 199 @Override 200 public void createRunningJobsMonitorTable() { 201 String createStatement = "CREATE TABLE runningJobsMonitor (" + "jobId bigint NOT NULL, " 202 + "harvestName varchar(300) NOT NULL," + "hostUrl varchar(300) NOT NULL," 203 + "progress numeric NOT NULL," + "queuedFilesCount bigint NOT NULL," 204 + "totalQueuesCount bigint NOT NULL," + "activeQueuesCount bigint NOT NULL," 205 + "exhaustedQueuesCount bigint NOT NULL," + "elapsedSeconds bigint NOT NULL," 206 + "alertsCount bigint NOT NULL," + "downloadedFilesCount bigint NOT NULL," 207 + "currentProcessedKBPerSec integer NOT NULL," + "processedKBPerSec integer NOT NULL," 208 + "currentProcessedDocsPerSec numeric NOT NULL," + "processedDocsPerSec numeric NOT NULL," 209 + "activeToeCount integer NOT NULL," + "status integer NOT NULL," + "tstamp timestamp NOT NULL, " 210 + "PRIMARY KEY (jobId, harvestName)" + ")"; 211 HarvestDBConnection.updateTable("runningjobsmonitor", 1, createStatement); 212 213 Connection c = HarvestDBConnection.get(); 214 try { 215 DBUtils.executeSQL(c, "CREATE INDEX runningJobsMonitorJobId on runningJobsMonitor (jobId)", 216 "CREATE INDEX runningJobsMonitorHarvestName on runningJobsMonitor (harvestName)"); 217 } finally { 218 HarvestDBConnection.release(c); 219 } 220 } 221 222 // Below DB changes introduced with development release 3.15 223 // with changes to tables 'runningjobshistory', 'runningjobsmonitor', 224 // 'configurations', 'fullharvests', and 'jobs'. 225 226 /** 227 * Migrates the 'runningjobshistory' table from version 1 to version 2. This consists of adding the new column 228 * 'retiredQueuesCount'. 229 */ 230 @Override 231 protected void migrateRunningJobsHistoryTableV1ToV2() { 232 String[] sqlStatements = {"ALTER TABLE runningjobshistory ADD COLUMN retiredQueuesCount bigint not null DEFAULT 0"}; 233 HarvestDBConnection.updateTable("runningjobshistory", 2, sqlStatements); 234 } 235 236 /** 237 * Migrates the 'runningjobsmonitor' table from version 1 to version 2. This consists of adding the new column 238 * 'retiredQueuesCount'. 239 */ 240 @Override 241 protected void migrateRunningJobsMonitorTableV1ToV2() { 242 String[] sqlStatements = {"ALTER TABLE runningjobsmonitor ADD COLUMN retiredQueuesCount bigint not null DEFAULT 0"}; 243 HarvestDBConnection.updateTable("runningjobsmonitor", 2, sqlStatements); 244 } 245 246 @Override 247 protected void migrateConfigurationsv4tov5() { 248 // Change the maxobjects from 'int' to 'bigint'. 249 // Procedure for changing the datatype of a derby table was found here: 250 // https://issues.apache.org/jira/browse/DERBY-1515 251 String[] sqlStatements = {"ALTER TABLE configurations ADD COLUMN maxobjects_new bigint NOT NULL DEFAULT -1", 252 "UPDATE configurations SET maxobjects_new = maxobjects", 253 "ALTER TABLE configurations DROP COLUMN maxobjects", 254 "RENAME COLUMN configurations.maxobjects_new TO maxobjects"}; 255 HarvestDBConnection.updateTable("configurations", 5, sqlStatements); 256 } 257 258 @Override 259 protected void migrateFullharvestsv3tov4() { 260 // Add new bigint field maxjobrunningtime with default 0 261 String[] sqlStatements = {"ALTER TABLE fullharvests ADD COLUMN maxjobrunningtime bigint NOT NULL DEFAULT 0"}; 262 HarvestDBConnection.updateTable("fullharvests", 4, sqlStatements); 263 } 264 265 @Override 266 protected void migrateJobsv5tov6() { 267 // Add new bigint field with default 0 268 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN forcemaxrunningtime bigint NOT NULL DEFAULT 0"}; 269 HarvestDBConnection.updateTable("jobs", 6, sqlStatements); 270 } 271 272 @Override 273 protected void migrateFullharvestsv4tov5() { 274 // Add new bigint field isindexready (0 is not ready, 1 is ready). 275 String[] sqlStatements = {"ALTER TABLE fullharvests ADD COLUMN isindexready int NOT NULL DEFAULT 0"}; 276 HarvestDBConnection.updateTable("fullharvests", 5, sqlStatements); 277 } 278 279 @Override 280 protected void createExtendedFieldTypeTable() { 281 String[] statements = new String[3]; 282 // TODO WTF?! 283 statements[0] = "" + "CREATE TABLE extendedfieldtype " + " ( " 284 + " extendedfieldtype_id BIGINT NOT NULL PRIMARY KEY, " 285 + " name VARCHAR(50) NOT NULL " + " )"; 286 statements[1] = "INSERT INTO extendedfieldtype ( extendedfieldtype_id, name )" + " VALUES ( 1, 'domains')"; 287 statements[2] = "INSERT INTO extendedfieldtype ( extendedfieldtype_id, name ) " 288 + "VALUES ( 2, 'harvestdefinitions')"; 289 HarvestDBConnection.updateTable("extendedfieldtype", 1, statements); 290 } 291 292 @Override 293 protected void createExtendedFieldTable() { 294 String createStatement = "" + "CREATE TABLE extendedfield " + " ( " 295 + " extendedfield_id BIGINT NOT NULL PRIMARY KEY, " + " extendedfieldtype_id BIGINT NOT NULL, " 296 + " name VARCHAR(50) NOT NULL, " + " format VARCHAR(50) NOT NULL, " 297 + " defaultvalue VARCHAR(50) NOT NULL, " + " options VARCHAR(50) NOT NULL, " 298 + " datatype INT NOT NULL, " + " mandatory INT NOT NULL, " 299 + " sequencenr INT " + " )"; 300 HarvestDBConnection.updateTable("extendedfield", 1, createStatement); 301 } 302 303 @Override 304 protected void createExtendedFieldValueTable() { 305 String createStatement = "" + "CREATE TABLE extendedfieldvalue " + " ( " 306 + " extendedfieldvalue_id BIGINT NOT NULL PRIMARY KEY, " 307 + " extendedfield_id BIGINT NOT NULL, " + " instance_id BIGINT NOT NULL, " 308 + " content VARCHAR(100) NOT NULL " + " )"; 309 HarvestDBConnection.updateTable("extendedfieldvalue", 1, createStatement); 310 } 311 312 @Override 313 protected synchronized void migrateJobsv6tov7() { 314 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN continuationof BIGINT DEFAULT NULL"}; 315 HarvestDBConnection.updateTable("jobs", 7, sqlStatements); 316 } 317 318 @Override 319 protected void migrateDomainsv2tov3() { 320 String[] sqlStatements = {"ALTER TABLE domains ADD COLUMN NEW_COLUMN CLOB(64M)", 321 "UPDATE domains SET NEW_COLUMN=crawlertraps", "ALTER TABLE domains DROP COLUMN crawlertraps", 322 "RENAME COLUMN domains.NEW_COLUMN TO crawlertraps"}; 323 HarvestDBConnection.updateTable("domains", 3, sqlStatements); 324 } 325 326 @Override 327 protected void migrateJobsv7tov8() { 328 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN creationdate TIMESTAMP DEFAULT NULL"}; 329 HarvestDBConnection.updateTable("jobs", 8, sqlStatements); 330 } 331 332 @Override 333 protected void migrateJobsv8tov9() { 334 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN harvestname_prefix VARCHAR(100) DEFAULT NULL"}; 335 HarvestDBConnection.updateTable("jobs", 9, sqlStatements); 336 } 337 338 @Override 339 protected void migrateHarvestdefinitionsv2tov3() { 340 String[] sqlStatements = {"ALTER TABLE harvestdefinitions ADD COLUMN audience VARCHAR(100) DEFAULT NULL"}; 341 HarvestDBConnection.updateTable("harvestdefinitions", 3, sqlStatements); 342 } 343 344 @Override 345 protected void migrateHarvestdefinitionsv3tov4() { 346 String[] sqlStatements = {"ALTER TABLE harvestdefinitions ADD COLUMN channel_id BIGINT DEFAULT NULL"}; 347 HarvestDBConnection.updateTable("harvestdefinitions", 4, sqlStatements); 348 } 349 350 @Override 351 protected void migrateJobsv9tov10() { 352 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN channel VARCHAR(300) DEFAULT NULL", 353 "ALTER TABLE jobs ADD COLUMN snapshot BOOLEAN", 354 "UPDATE jobs SET channel = 'snapshot' WHERE priority=0", 355 "UPDATE jobs SET channel = 'focused' WHERE priority=1", 356 "UPDATE jobs SET snapshot = true WHERE priority=0", 357 "UPDATE jobs SET snapshot = false WHERE priority=1", "ALTER TABLE jobs DROP COLUMN priority"}; 358 HarvestDBConnection.updateTable("jobs", 10, sqlStatements); 359 } 360 361 @Override 362 public void createHarvestChannelTable() { 363 String createStatement = "CREATE TABLE harvestchannel (" 364 + "id BIGINT NOT NULL GENERATED ALWAYS AS IDENTITY PRIMARY KEY, " 365 + "name VARCHAR(300) NOT NULL UNIQUE," + "issnapshot BOOLEAN NOT NULL," + "isdefault BOOLEAN NOT NULL," 366 + "comments VARCHAR(30000)" + ")"; 367 String insertStatementOne = "INSERT INTO harvestchannel(name, issnapshot, isdefault, comments) " 368 + "VALUES(\'SNAPSHOT\', true, true, \'Channel for snapshot harvests\')"; 369 String insertStatementTwo = "INSERT INTO harvestchannel(name, issnapshot, isdefault, comments) " 370 + "VALUES(\'FOCUSED\', false, true, \'Channel for focused harvests\')"; 371 HarvestDBConnection.updateTable("harvestchannel", 1, new String[] {createStatement, insertStatementOne, 372 insertStatementTwo}); 373 } 374 375 /** 376 * Migrates the 'ExtendedFieldTable' from version 1 to version 2 consisting of adding the maxlen field 377 */ 378 protected void migrateExtendedFieldTableV1toV2() { 379 String[] sqlStatements = {"ALTER TABLE extendedfield ADD COLUMN maxlen INT", 380 "ALTER TABLE extendedfield ALTER options SET DATA TYPE VARCHAR(1000)"}; 381 HarvestDBConnection.updateTable("extendedfield", 2, sqlStatements); 382 } 383 384 /** 385 * Migrates the 'ExtendedFieldValueTable' from version 1 to version 2 changing the maxlen of content to 30000 386 */ 387 protected void migrateExtendedFieldTableValueV1toV2() { 388 String[] sqlStatements = {"ALTER TABLE extendedfieldvalue ALTER content SET DATA TYPE VARCHAR(30000)", 389 "ALTER TABLE extendedfieldvalue ALTER content NOT NULL"}; 390 HarvestDBConnection.updateTable("extendedfieldvalue", 2, sqlStatements); 391 } 392 393}