001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.datamodel; 024 025import java.util.Iterator; 026import java.util.List; 027 028import dk.netarkivet.common.exceptions.ArgumentNotValid; 029import dk.netarkivet.common.exceptions.IOFailure; 030import dk.netarkivet.common.exceptions.IllegalState; 031import dk.netarkivet.common.exceptions.PermissionDenied; 032import dk.netarkivet.common.exceptions.UnknownID; 033import dk.netarkivet.harvester.webinterface.HarvestStatus; 034import dk.netarkivet.harvester.webinterface.HarvestStatusQuery; 035 036/** 037 * Interface for creating and accessing jobs in persistent storage. 038 */ 039public abstract class JobDAO implements DAO, Iterable<Job> { 040 041 /** The database singleton model. */ 042 private static JobDAO instance; 043 044 /** 045 * constructor used when creating singleton. Do not call directly. 046 */ 047 protected JobDAO() { 048 } 049 050 /** 051 * Gets the JobDAO singleton. 052 * 053 * @return the JobDAO singleton 054 */ 055 public static synchronized JobDAO getInstance() { 056 if (instance == null) { 057 instance = new JobDBDAO(); 058 } 059 return instance; 060 } 061 062 /** 063 * Creates an instance in persistent storage of the given job. If the job doesn't have an ID, one is generated for 064 * it. 065 * 066 * @param job a job to create in persistent storage. 067 * @throws PermissionDenied If a job already exists in persistent storage with id of the given job 068 * @throws IOFailure If some IOException occurs while writing the job 069 */ 070 public abstract void create(Job job); 071 072 /** 073 * Check whether a particular job exists. 074 * 075 * @param jobID Id of the job. 076 * @return true if the job exists in any state. 077 */ 078 public abstract boolean exists(Long jobID); 079 080 /** 081 * Returns the number of jobs existing. 082 * 083 * @return Number of jobs in jobs directory 084 */ 085 public abstract int getCountJobs(); 086 087 /** 088 * Reads a job from persistent storage. 089 * 090 * @param jobID The ID of the job to read 091 * @return a Job instance 092 * @throws ArgumentNotValid If failed to create job instance in case the configuration or priority is null, or the 093 * harvestID is invalid. 094 * @throws UnknownID If the job with the given jobID does not exist in persistent storage. 095 * @throws IOFailure If the loaded ID of job does not match the expected. 096 */ 097 public abstract Job read(long jobID) throws ArgumentNotValid, UnknownID, IOFailure; 098 099 /** 100 * Update a Job in persistent storage. 101 * 102 * @param job The Job to update 103 * @throws ArgumentNotValid If the Job is null 104 * @throws UnknownID If the Job doesn't exist in the DAO 105 * @throws IOFailure If writing the job to persistent storage fails 106 * @throws PermissionDenied If the job has been updated behind our backs 107 */ 108 public abstract void update(Job job) throws IOFailure; 109 110 /** 111 * Reset the DAO instance. Only for use from within tests. 112 */ 113 public static void reset() { 114 instance = null; 115 } 116 117 /** 118 * Return a list of all jobs with the given status. 119 * 120 * @param status A given status. 121 * @return A list of all job with given status 122 * @throws ArgumentNotValid If the given status is not one of the six valid states specified in JobStatus. 123 */ 124 public abstract Iterator<Job> getAll(JobStatus status); 125 126 /** 127 * Return a list of all job_id's representing jobs with the given status. 128 * 129 * @param status A given status. 130 * @return A list of all job_id's representing jobs with given status 131 * @throws ArgumentNotValid If the given status is not one of the six valid states specified in JobStatus. 132 */ 133 public abstract Iterator<Long> getAllJobIds(JobStatus status); 134 135 /** 136 * Return a list of all job_id's representing jobs with the given status and channel. 137 * 138 * @param status A given status 139 * @param channel A given {@link HarvestChannel} 140 * @return A list of all job_id's representing jobs with given status and channel. 141 */ 142 public abstract Iterator<Long> getAllJobIds(JobStatus status, HarvestChannel channel); 143 144 /** 145 * Return a list of all jobs . 146 * 147 * @return A list of all jobs 148 */ 149 public abstract Iterator<Job> getAll(); 150 151 /** 152 * Gets an iterator of all jobs. Implements the Iterable interface. 153 * 154 * @return Iterator of all jobs, regardless of status. 155 */ 156 public Iterator<Job> iterator() { 157 return getAll(); 158 } 159 160 /** 161 * Return a list of all job_ids . 162 * 163 * @return A list of all job_ids 164 */ 165 public abstract Iterator<Long> getAllJobIds(); 166 167 /** 168 * Return status information for all jobs defined by the supplied query. 169 * 170 * @param query the user query 171 * @return A HarvestStatus object corresponding to the given query. 172 * @throws IOFailure on trouble in database access 173 */ 174 public abstract HarvestStatus getStatusInfo(HarvestStatusQuery query); 175 176 /** 177 * Return status information for all jobs with given job status. 178 * 179 * @param status The status asked for. 180 * @return A list of status objects with the pertinent information for all jobs with given job status. 181 * @throws IOFailure on trouble in database access 182 */ 183 public abstract List<JobStatusInfo> getStatusInfo(JobStatus status); 184 185 /** 186 * Calculate all jobIDs to use for duplication reduction. 187 * <p> 188 * More precisely, this method calculates the following: If the job ID corresponds to a partial harvest, all jobIDs 189 * from the previous scheduled harvest are returned, or the empty list if this harvest hasn't been scheduled before. 190 * <p> 191 * If the job ID corresponds to a full harvest, the entire chain of harvests this is based on is returned, and all 192 * jobIDs from the previous chain of full harvests is returned. 193 * 194 * @param jobID The job ID to find duplicate reduction data for. 195 * @return A list of job IDs (possibly empty) of potential previous harvests of this job, to use for duplicate 196 * reduction. 197 * @throws UnknownID if job ID is unknown 198 * @throws IOFailure on trouble getting jobIDs for deduplication from the metadata archive file. 199 */ 200 public abstract List<Long> getJobIDsForDuplicateReduction(long jobID) throws UnknownID; 201 202 /** 203 * Reschedule a job by creating a new job (in status NEW) and setting the old job to status RESUBMITTED. 204 * <p> 205 * Notice the slightly confusing naming: The only job is marked RESUBMITTED, but the new job is not really 206 * submitted, that happens in a separate stage, the new job is in status NEW. 207 * 208 * @param oldJobID ID of a job to reschedule 209 * @return ID of the newly created job 210 * @throws UnknownID if no job exists with id jobID 211 * @throws IllegalState if the job with id jobID is not SUBMITTED or FAILED. 212 */ 213 public abstract long rescheduleJob(long oldJobID); 214 215 /** 216 * Get Jobstatus for the job with the given id. 217 * 218 * @param jobID A given Jobid 219 * @return the Jobstatus for the job with the given id. 220 * @throws UnknownID if no job exists with id jobID 221 */ 222 public abstract JobStatus getJobStatus(Long jobID); 223 224 /** 225 * Get a list of AliasInfo objects for all the domains included in the job. 226 * 227 * @return a list of AliasInfo objects for all the domains included in the job. 228 */ 229 public abstract List<AliasInfo> getJobAliasInfo(Job job); 230}