001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.datamodel;
024
025import java.util.Iterator;
026import java.util.List;
027
028import dk.netarkivet.common.exceptions.ArgumentNotValid;
029import dk.netarkivet.common.exceptions.IOFailure;
030import dk.netarkivet.common.exceptions.IllegalState;
031import dk.netarkivet.common.exceptions.PermissionDenied;
032import dk.netarkivet.common.exceptions.UnknownID;
033import dk.netarkivet.harvester.webinterface.HarvestStatus;
034import dk.netarkivet.harvester.webinterface.HarvestStatusQuery;
035
036/**
037 * Interface for creating and accessing jobs in persistent storage.
038 */
039public abstract class JobDAO implements DAO, Iterable<Job> {
040
041    /** The database singleton model. */
042    private static JobDAO instance;
043
044    /**
045     * constructor used when creating singleton. Do not call directly.
046     */
047    protected JobDAO() {
048    }
049
050    /**
051     * Gets the JobDAO singleton.
052     *
053     * @return the JobDAO singleton
054     */
055    public static synchronized JobDAO getInstance() {
056        if (instance == null) {
057            instance = new JobDBDAO();
058        }
059        return instance;
060    }
061
062    /**
063     * Creates an instance in persistent storage of the given job. If the job doesn't have an ID, one is generated for
064     * it.
065     *
066     * @param job a job to create in persistent storage.
067     * @throws PermissionDenied If a job already exists in persistent storage with id of the given job
068     * @throws IOFailure If some IOException occurs while writing the job
069     */
070    public abstract void create(Job job);
071
072    /**
073     * Check whether a particular job exists.
074     *
075     * @param jobID Id of the job.
076     * @return true if the job exists in any state.
077     */
078    public abstract boolean exists(Long jobID);
079
080    /**
081     * Returns the number of jobs existing.
082     *
083     * @return Number of jobs in jobs directory
084     */
085    public abstract int getCountJobs();
086
087    /**
088     * Reads a job from persistent storage.
089     *
090     * @param jobID The ID of the job to read
091     * @return a Job instance
092     * @throws ArgumentNotValid If failed to create job instance in case the configuration or priority is null, or the
093     * harvestID is invalid.
094     * @throws UnknownID If the job with the given jobID does not exist in persistent storage.
095     * @throws IOFailure If the loaded ID of job does not match the expected.
096     */
097    public abstract Job read(long jobID) throws ArgumentNotValid, UnknownID, IOFailure;
098
099    /**
100     * Update a Job in persistent storage.
101     *
102     * @param job The Job to update
103     * @throws ArgumentNotValid If the Job is null
104     * @throws UnknownID If the Job doesn't exist in the DAO
105     * @throws IOFailure If writing the job to persistent storage fails
106     * @throws PermissionDenied If the job has been updated behind our backs
107     */
108    public abstract void update(Job job) throws IOFailure;
109
110    /**
111     * Reset the DAO instance. Only for use from within tests.
112     */
113    public static void reset() {
114        instance = null;
115    }
116
117    /**
118     * Return a list of all jobs with the given status.
119     *
120     * @param status A given status.
121     * @return A list of all job with given status
122     * @throws ArgumentNotValid If the given status is not one of the six valid states specified in JobStatus.
123     */
124    public abstract Iterator<Job> getAll(JobStatus status);
125
126    /**
127     * Return a list of all job_id's representing jobs with the given status.
128     *
129     * @param status A given status.
130     * @return A list of all job_id's representing jobs with given status
131     * @throws ArgumentNotValid If the given status is not one of the six valid states specified in JobStatus.
132     */
133    public abstract Iterator<Long> getAllJobIds(JobStatus status);
134
135    /**
136     * Return a list of all job_id's representing jobs with the given status and channel.
137     *
138     * @param status A given status
139     * @param channel A given {@link HarvestChannel}
140     * @return A list of all job_id's representing jobs with given status and channel.
141     */
142    public abstract Iterator<Long> getAllJobIds(JobStatus status, HarvestChannel channel);
143
144    /**
145     * Return a list of all jobs .
146     *
147     * @return A list of all jobs
148     */
149    public abstract Iterator<Job> getAll();
150
151    /**
152     * Gets an iterator of all jobs. Implements the Iterable interface.
153     *
154     * @return Iterator of all jobs, regardless of status.
155     */
156    public Iterator<Job> iterator() {
157        return getAll();
158    }
159
160    /**
161     * Return a list of all job_ids .
162     *
163     * @return A list of all job_ids
164     */
165    public abstract Iterator<Long> getAllJobIds();
166
167    /**
168     * Return status information for all jobs defined by the supplied query.
169     *
170     * @param query the user query
171     * @return A HarvestStatus object corresponding to the given query.
172     * @throws IOFailure on trouble in database access
173     */
174    public abstract HarvestStatus getStatusInfo(HarvestStatusQuery query);
175
176    /**
177     * Return status information for all jobs with given job status.
178     *
179     * @param status The status asked for.
180     * @return A list of status objects with the pertinent information for all jobs with given job status.
181     * @throws IOFailure on trouble in database access
182     */
183    public abstract List<JobStatusInfo> getStatusInfo(JobStatus status);
184
185    /**
186     * Calculate all jobIDs to use for duplication reduction.
187     * <p>
188     * More precisely, this method calculates the following: If the job ID corresponds to a partial harvest, all jobIDs
189     * from the previous scheduled harvest are returned, or the empty list if this harvest hasn't been scheduled before.
190     * <p>
191     * If the job ID corresponds to a full harvest, the entire chain of harvests this is based on is returned, and all
192     * jobIDs from the previous chain of full harvests is returned.
193     *
194     * @param jobID The job ID to find duplicate reduction data for.
195     * @return A list of job IDs (possibly empty) of potential previous harvests of this job, to use for duplicate
196     * reduction.
197     * @throws UnknownID if job ID is unknown
198     * @throws IOFailure on trouble getting jobIDs for deduplication from the metadata archive file. 
199     */
200    public abstract List<Long> getJobIDsForDuplicateReduction(long jobID) throws UnknownID;
201
202    /**
203     * Reschedule a job by creating a new job (in status NEW) and setting the old job to status RESUBMITTED.
204     * <p>
205     * Notice the slightly confusing naming: The only job is marked RESUBMITTED, but the new job is not really
206     * submitted, that happens in a separate stage, the new job is in status NEW.
207     *
208     * @param oldJobID ID of a job to reschedule
209     * @return ID of the newly created job
210     * @throws UnknownID if no job exists with id jobID
211     * @throws IllegalState if the job with id jobID is not SUBMITTED or FAILED.
212     */
213    public abstract long rescheduleJob(long oldJobID);
214
215    /**
216     * Get Jobstatus for the job with the given id.
217     *
218     * @param jobID A given Jobid
219     * @return the Jobstatus for the job with the given id.
220     * @throws UnknownID if no job exists with id jobID
221     */
222    public abstract JobStatus getJobStatus(Long jobID);
223
224    /**
225     * Get a list of AliasInfo objects for all the domains included in the job.
226     *
227     * @return a list of AliasInfo objects for all the domains included in the job.
228     */
229    public abstract List<AliasInfo> getJobAliasInfo(Job job);
230}