001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.datamodel; 024 025import java.sql.Connection; 026import java.sql.PreparedStatement; 027import java.sql.SQLException; 028 029import org.slf4j.Logger; 030import org.slf4j.LoggerFactory; 031 032import dk.netarkivet.common.exceptions.ArgumentNotValid; 033import dk.netarkivet.common.exceptions.IOFailure; 034import dk.netarkivet.common.utils.DBUtils; 035import dk.netarkivet.common.utils.ExceptionUtils; 036 037/** 038 * MySQL-specific implementation of DB methods. 039 */ 040public class MySQLSpecifics extends DBSpecifics { 041 042 /** The log. */ 043 private static final Logger log = LoggerFactory.getLogger(MySQLSpecifics.class); 044 045 /** 046 * Get an instance of the MySQL specifics class. 047 * 048 * @return Instance of the MySQL specifics class. 049 */ 050 public static DBSpecifics getInstance() { 051 return new MySQLSpecifics(); 052 } 053 054 /** 055 * Get a temporary table for short-time use. The table should be disposed of with dropTemporaryTable. The table has 056 * two columns domain_name varchar(Constants.MAX_NAME_SIZE) config_name varchar(Constants.MAX_NAME_SIZE) 057 * 058 * @param c The DB connection to use. 059 * @return The name of the created table 060 * @throws SQLException if there is a problem getting the table. 061 */ 062 public String getJobConfigsTmpTable(Connection c) throws SQLException { 063 ArgumentNotValid.checkNotNull(c, "Connection c"); 064 PreparedStatement s = c.prepareStatement("CREATE TEMPORARY TABLE jobconfignames " + "( domain_name varchar(" 065 + Constants.MAX_NAME_SIZE + "), " + " config_name varchar(" + Constants.MAX_NAME_SIZE + ") )"); 066 s.execute(); 067 s.close(); 068 return "jobconfignames"; 069 } 070 071 /** 072 * Dispose of a temporary table created with getTemporaryTable. This can be expected to be called from within a 073 * finally clause, so it mustn't throw exceptions. 074 * 075 * @param c The DB connection to use. 076 * @param tableName The name of the temporary table 077 */ 078 public void dropJobConfigsTmpTable(Connection c, String tableName) { 079 ArgumentNotValid.checkNotNull(c, "Connection c"); 080 ArgumentNotValid.checkNotNullOrEmpty(tableName, "String tableName"); 081 PreparedStatement s = null; 082 try { 083 // Now drop the temporary table 084 s = c.prepareStatement("DROP TEMPORARY TABLE " + tableName); 085 s.execute(); 086 } catch (SQLException e) { 087 log.warn("Couldn't drop temporary table {}\n{}", ExceptionUtils.getSQLExceptionCause(e), tableName, e); 088 } 089 } 090 091 /** 092 * Get the name of the JDBC driver class that handles interfacing to this server. 093 * 094 * @return The name of a JDBC driver class 095 */ 096 public String getDriverClassName() { 097 return "com.mysql.jdbc.Driver"; 098 } 099 100 /** 101 * Migrates the 'jobs' table from version 3 to version 4 consisting of a change of the field forcemaxbytes from int 102 * to bigint and setting its default to -1. Furthermore the default value for field num_configs is set to 0. 103 * 104 * @throws IOFailure in case of problems in interacting with the database 105 */ 106 protected synchronized void migrateJobsv3tov4() { 107 String[] sqlStatements = { 108 "ALTER TABLE jobs CHANGE COLUMN forcemaxbytes forcemaxbytes bigint not null default -1", 109 "ALTER TABLE jobs CHANGE COLUMN num_configs num_configs int not null default 0"}; 110 HarvestDBConnection.updateTable("jobs", 4, sqlStatements); 111 } 112 113 /** 114 * Migrates the 'jobs' table from version 4 to version 5 consisting of adding new fields 'resubmitted_as_job' and 115 * 'submittedDate'. 116 * 117 * @throws IOFailure in case of problems in interacting with the database 118 */ 119 protected synchronized void migrateJobsv4tov5() { 120 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN submitteddate datetime AFTER enddate", 121 "ALTER TABLE jobs ADD COLUMN resubmitted_as_job bigint"}; 122 HarvestDBConnection.updateTable("jobs", 5, sqlStatements); 123 } 124 125 /** 126 * Migrates the 'configurations' table from version 3 to version 4. This consists of altering the default value of 127 * field 'maxbytes' to -1. 128 */ 129 protected synchronized void migrateConfigurationsv3ov4() { 130 // Update configurations table to version 4 131 String[] sqlStatements = {"ALTER TABLE configurations ALTER maxbytes SET DEFAULT -1"}; 132 HarvestDBConnection.updateTable("configurations", 4, sqlStatements); 133 } 134 135 /** 136 * Migrates the 'fullharvests' table from version 2 to version 3. This consists of altering the default value of 137 * field 'maxbytes' to -1 138 */ 139 protected synchronized void migrateFullharvestsv2tov3() { 140 // Update fullharvests table to version 3 141 String[] sqlStatements = {"ALTER TABLE fullharvests ALTER maxbytes SET DEFAULT -1"}; 142 HarvestDBConnection.updateTable("fullharvests", 3, sqlStatements); 143 } 144 145 /** Creates the initial (version 1) of table 'global_crawler_trap_lists'. */ 146 protected void createGlobalCrawlerTrapLists() { 147 String createStatement = "CREATE TABLE global_crawler_trap_lists(\n" 148 + " global_crawler_trap_list_id INT NOT NULL " + "AUTO_INCREMENT PRIMARY KEY,\n" 149 + " name VARCHAR(300) NOT NULL UNIQUE, " + " description VARCHAR(20000), " 150 + " isActive INT NOT NULL )"; 151 HarvestDBConnection.updateTable("global_crawler_trap_lists", 1, createStatement); 152 } 153 154 /** 155 * Creates the initial (version 1) of table 'global_crawler_trap_expressions'. 156 */ 157 protected void createGlobalCrawlerTrapExpressions() { 158 String createStatement = "CREATE TABLE global_crawler_trap_expressions(" 159 + " id bigint not null AUTO_INCREMENT " + "primary key," + " crawler_trap_list_id INT NOT NULL, " 160 + " trap_expression VARCHAR(1000) )"; 161 HarvestDBConnection.updateTable("global_crawler_trap_expressions", 1, createStatement); 162 } 163 164 @Override 165 public boolean supportsClob() { 166 return true; 167 } 168 169 @Override 170 public String getOrderByLimitAndOffsetSubClause(long limit, long offset) { 171 return "LIMIT " + offset + ", " + limit; 172 } 173 174 @Override 175 public void createFrontierReportMonitorTable() { 176 String createStatement = "CREATE TABLE frontierReportMonitor (" + "jobId bigint NOT NULL," 177 + "filterId varchar(200) NOT NULL," + "tstamp timestamp NOT NULL," 178 + "domainName varchar(300) NOT NULL," + "currentSize bigint NOT NULL," 179 + "totalEnqueues bigint NOT NULL," + "sessionBalance bigint NOT NULL," + "lastCost numeric NOT NULL," 180 + "averageCost numeric NOT NULL," + "lastDequeueTime varchar(100) NOT NULL," 181 + "wakeTime varchar(100) NOT NULL," + "totalSpend bigint NOT NULL," + "totalBudget bigint NOT NULL," 182 + "errorCount bigint NOT NULL," + "lastPeekUri varchar(1000) NOT NULL," 183 + "lastQueuedUri varchar(1000) NOT NULL," 184 // NB see http://bugs.mysql.com/bug.php?id=6604 about index key length. 185 + "UNIQUE (jobId, filterId(100), domainName(100))" + ")"; 186 HarvestDBConnection.updateTable("frontierreportmonitor", 1, createStatement); 187 188 } 189 190 @Override 191 public void createRunningJobsHistoryTable() { 192 String createStatement = "CREATE TABLE runningJobsHistory (" + "jobId bigint NOT NULL, " 193 + "harvestName varchar(300) NOT NULL," + "hostUrl varchar(300) NOT NULL," 194 + "progress numeric NOT NULL," + "queuedFilesCount bigint NOT NULL," 195 + "totalQueuesCount bigint NOT NULL," + "activeQueuesCount bigint NOT NULL," 196 + "exhaustedQueuesCount bigint NOT NULL," + "elapsedSeconds bigint NOT NULL," 197 + "alertsCount bigint NOT NULL," + "downloadedFilesCount bigint NOT NULL," 198 + "currentProcessedKBPerSec int NOT NULL," + "processedKBPerSec int NOT NULL," 199 + "currentProcessedDocsPerSec numeric NOT NULL," + "processedDocsPerSec numeric NOT NULL," 200 + "activeToeCount integer NOT NULL," + "status integer NOT NULL," + "tstamp timestamp NOT NULL, " 201 + "PRIMARY KEY (jobId, harvestName, elapsedSeconds, tstamp)" + ")"; 202 HarvestDBConnection.updateTable("runningjobshistory", 1, createStatement); 203 204 Connection c = HarvestDBConnection.get(); 205 try { 206 DBUtils.executeSQL(c, "CREATE INDEX runningJobsHistoryCrawlJobId on runningJobsHistory (jobId)", 207 "CREATE INDEX runningJobsHistoryCrawlTime on runningJobsHistory (elapsedSeconds)", 208 "CREATE INDEX runningJobsHistoryHarvestName on runningJobsHistory (harvestName)", 209 "GRANT SELECT,INSERT,UPDATE,DELETE ON TABLE runningJobsHistory TO netarchivesuite"); 210 } finally { 211 HarvestDBConnection.release(c); 212 } 213 } 214 215 @Override 216 public void createRunningJobsMonitorTable() { 217 String createStatement = "CREATE TABLE runningJobsMonitor (" + "jobId bigint NOT NULL," 218 + "harvestName varchar(300) NOT NULL," + "hostUrl varchar(300) NOT NULL," 219 + "progress numeric NOT NULL," + "queuedFilesCount bigint NOT NULL," 220 + "totalQueuesCount bigint NOT NULL," + "activeQueuesCount bigint NOT NULL," 221 + "exhaustedQueuesCount bigint NOT NULL," + "elapsedSeconds bigint NOT NULL," 222 + "alertsCount bigint NOT NULL," + "downloadedFilesCount bigint NOT NULL," 223 + "currentProcessedKBPerSec integer NOT NULL," + "processedKBPerSec integer NOT NULL," 224 + "currentProcessedDocsPerSec numeric NOT NULL," + "processedDocsPerSec numeric NOT NULL," 225 + "activeToeCount integer NOT NULL," + "status integer NOT NULL," + "tstamp timestamp NOT NULL," 226 + "PRIMARY KEY (jobId, harvestName)" + ")"; 227 HarvestDBConnection.updateTable("runningjobsmonitor", 1, createStatement); 228 229 Connection c = HarvestDBConnection.get(); 230 try { 231 DBUtils.executeSQL(c, "CREATE INDEX runningJobsMonitorJobId on runningJobsMonitor (jobId)", 232 "CREATE INDEX runningJobsMonitorHarvestName on runningJobsMonitor (harvestName)"); 233 } finally { 234 HarvestDBConnection.release(c); 235 } 236 } 237 238 // Below DB changes introduced with development release 3.15 239 // with changes to tables 'runningjobshistory', 'runningjobsmonitor', 240 // 'configurations', 'fullharvests', and 'jobs'. 241 242 /** 243 * Migrates the 'runningjobshistory' table from version 1 to version 2. This consists of adding the new column 244 * 'retiredQueuesCount'. 245 */ 246 @Override 247 protected void migrateRunningJobsHistoryTableV1ToV2() { 248 String[] sqlStatements = {"ALTER TABLE runningJobsHistory ADD COLUMN retiredQueuesCount bigint not null"}; 249 HarvestDBConnection.updateTable("runningjobshistory", 2, sqlStatements); 250 } 251 252 /** 253 * Migrates the 'runningjobsmonitor' table from version 1 to version 2. This consists of adding the new column 254 * 'retiredQueuesCount'. 255 */ 256 @Override 257 protected void migrateRunningJobsMonitorTableV1ToV2() { 258 String[] sqlStatements = {"ALTER TABLE runningJobsMonitor ADD COLUMN retiredQueuesCount bigint not null"}; 259 HarvestDBConnection.updateTable("runningjobsmonitor", 2, sqlStatements); 260 } 261 262 @Override 263 protected void migrateDomainsv2tov3() { 264 String[] sqlStatements = {"ALTER TABLE domains MODIFY crawlertraps LONGTEXT "}; 265 HarvestDBConnection.updateTable("domains", 3, sqlStatements); 266 } 267 268 @Override 269 protected void migrateConfigurationsv4tov5() { 270 // Update configurations table to version 5 271 String[] sqlStatements = {"ALTER TABLE configurations MODIFY maxobjects bigint"}; 272 HarvestDBConnection.updateTable("configurations", 5, sqlStatements); 273 } 274 275 @Override 276 protected void migrateFullharvestsv3tov4() { 277 // Update fullharvests table to version 4 278 String[] sqlStatements = {"ALTER TABLE fullharvests ADD COLUMN maxjobrunningtime bigint NOT NULL DEFAULT 0"}; 279 HarvestDBConnection.updateTable("fullharvests", 4, sqlStatements); 280 } 281 282 @Override 283 protected void migrateJobsv5tov6() { 284 // Update jobs table to version 6 285 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN forcemaxrunningtime bigint NOT NULL DEFAULT 0 AFTER forcemaxcount"}; 286 HarvestDBConnection.updateTable("jobs", 6, sqlStatements); 287 288 } 289 290 @Override 291 protected void migrateFullharvestsv4tov5() { 292 // Update fullharvests table to version 4 293 String[] sqlStatements = {"ALTER TABLE fullharvests ADD COLUMN isindexready int NOT NULL DEFAULT 0"}; 294 HarvestDBConnection.updateTable("fullharvests", 5, sqlStatements); 295 } 296 297 @Override 298 protected void createExtendedFieldTypeTable() { 299 String[] statements = new String[3]; 300 statements[0] = "CREATE TABLE extendedfieldtype " + " ( " 301 + " extendedfieldtype_id BIGINT NOT NULL PRIMARY KEY, " 302 + " name VARCHAR(50) NOT NULL " + " )"; 303 304 statements[1] = "INSERT INTO extendedfieldtype ( extendedfieldtype_id, name )" + " VALUES ( 1, 'domains')"; 305 statements[2] = "INSERT INTO extendedfieldtype ( extendedfieldtype_id, name )" 306 + " VALUES ( 2, 'harvestdefinitions')"; 307 308 HarvestDBConnection.updateTable("extendedfieldtype", 1, statements); 309 } 310 311 @Override 312 protected void createExtendedFieldTable() { 313 String createStatement = "CREATE TABLE extendedfield " + " ( " 314 + " extendedfield_id BIGINT NOT NULL PRIMARY KEY, " + " extendedfieldtype_id BIGINT NOT NULL, " 315 + " name VARCHAR(50) NOT NULL, " + " format VARCHAR(50) NOT NULL, " 316 + " defaultvalue VARCHAR(50) NOT NULL, " + " options VARCHAR(50) NOT NULL, " 317 + " datatype INT NOT NULL, " + " mandatory INT NOT NULL, " 318 + " sequencenr INT " + " )"; 319 320 HarvestDBConnection.updateTable("extendedfield", 1, createStatement); 321 } 322 323 @Override 324 protected void createExtendedFieldValueTable() { 325 String createStatement = "CREATE TABLE extendedfieldvalue " + " ( " 326 + " extendedfieldvalue_id BIGINT NOT NULL PRIMARY KEY, " 327 + " extendedfield_id BIGINT NOT NULL, " + " instance_id BIGINT NOT NULL, " 328 + " content VARCHAR(100) NOT NULL " + " )"; 329 330 HarvestDBConnection.updateTable("extendedfieldvalue", 1, createStatement); 331 } 332 333 @Override 334 protected synchronized void migrateJobsv6tov7() { 335 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN continuationof BIGINT DEFAULT NULL"}; 336 HarvestDBConnection.updateTable("jobs", 7, sqlStatements); 337 } 338 339 @Override 340 protected void migrateJobsv7tov8() { 341 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN creationdate TIMESTAMP"}; 342 HarvestDBConnection.updateTable("jobs", 8, sqlStatements); 343 } 344 345 @Override 346 protected void migrateJobsv8tov9() { 347 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN harvestname_prefix VARCHAR(100)"}; 348 HarvestDBConnection.updateTable("jobs", 9, sqlStatements); 349 } 350 351 @Override 352 protected void migrateHarvestdefinitionsv2tov3() { 353 String[] sqlStatements = {"ALTER TABLE harvestdefinitions ADD COLUMN audience VARCHAR(100) DEFAULT NULL"}; 354 HarvestDBConnection.updateTable("harvestdefinitions", 3, sqlStatements); 355 } 356 357 @Override 358 protected void migrateHarvestdefinitionsv3tov4() { 359 String[] sqlStatements = {"ALTER TABLE harvestdefinitions ADD COLUMN channel_id BIGINT DEFAULT NULL"}; 360 HarvestDBConnection.updateTable("harvestdefinitions", 4, sqlStatements); 361 } 362 363 @Override 364 protected void migrateJobsv9tov10() { 365 String[] sqlStatements = {"ALTER TABLE jobs ADD COLUMN channel VARCHAR(300) DEFAULT NULL", 366 "ALTER TABLE jobs ADD COLUMN snapshot BOOL", "UPDATE jobs SET channel = 'snapshot' WHERE priority=0", 367 "UPDATE jobs SET channel = 'focused' WHERE priority=1", 368 "UPDATE jobs SET snapshot = true WHERE priority=0", 369 "UPDATE jobs SET snapshot = false WHERE priority=1", "ALTER TABLE jobs DROP COLUMN priority"}; 370 HarvestDBConnection.updateTable("jobs", 10, sqlStatements); 371 } 372 373 @Override 374 protected void createHarvestChannelTable() { 375 String createStatement = "CREATE TABLE harvestchannel (" + "id BIGINT NOT NULL PRIMARY KEY, " 376 + "name VARCHAR(250) NOT NULL UNIQUE," + "issnapshot BOOL NOT NULL," + "isdefault BOOL NOT NULL," 377 // + "comments VARCHAR(30000)" 378 + "comments TEXT" + ")"; 379 380 String insertStatementOne = "INSERT INTO harvestchannel(id, name, issnapshot, isdefault, comments) " 381 + "VALUES(1, \'SNAPSHOT\', true, true, \'Channel for snapshot harvests\')"; 382 String insertStatementTwo = "INSERT INTO harvestchannel(id, name, issnapshot, isdefault, comments) " 383 + "VALUES(2, \'FOCUSED\', false, true, \'Channel for focused harvests\')"; 384 HarvestDBConnection.updateTable("harvestchannel", 1, new String[] {createStatement, insertStatementOne, 385 insertStatementTwo}); 386 } 387 388 /** 389 * Migrates the 'ExtendedFieldTable' from version 1 to version 2 consisting of adding the maxlen field 390 */ 391 protected void migrateExtendedFieldTableV1toV2() { 392 String[] sqlStatements = {"ALTER TABLE extendedfield ADD COLUMN maxlen INT", 393 "ALTER TABLE extendedfield MODIFY options TEXT"}; 394 HarvestDBConnection.updateTable("extendedfield", 2, sqlStatements); 395 } 396 397 /** 398 * Migrates the 'ExtendedFieldValueTable' from version 1 to version 2 changing the maxlen of content to 30000 399 */ 400 protected void migrateExtendedFieldTableValueV1toV2() { 401 String[] sqlStatements = { 402 // "ALTER TABLE extendedfieldvalue MODIFY content VARCHAR(30000) NOT NULL" 403 "ALTER TABLE extendedfieldvalue MODIFY content TEXT NOT NULL"}; 404 HarvestDBConnection.updateTable("extendedfieldvalue", 2, sqlStatements); 405 } 406 407}