001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.webinterface;
025
026import java.sql.SQLException;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Set;
030
031import javax.inject.Provider;
032import javax.servlet.ServletRequest;
033import javax.servlet.jsp.PageContext;
034
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import com.antiaction.raptor.dao.AttributeBase;
039import com.antiaction.raptor.dao.AttributeTypeBase;
040
041import dk.netarkivet.common.distribute.indexserver.IndexClientFactory;
042import dk.netarkivet.common.distribute.indexserver.JobIndexCache;
043import dk.netarkivet.common.exceptions.ArgumentNotValid;
044import dk.netarkivet.common.exceptions.ForwardedToErrorPage;
045import dk.netarkivet.common.exceptions.UnknownID;
046import dk.netarkivet.common.utils.I18n;
047import dk.netarkivet.common.utils.Settings;
048import dk.netarkivet.common.webinterface.HTMLUtils;
049import dk.netarkivet.harvester.HarvesterSettings;
050import dk.netarkivet.harvester.datamodel.DomainDAO;
051import dk.netarkivet.harvester.datamodel.FullHarvest;
052import dk.netarkivet.harvester.datamodel.HarvestDefinition;
053import dk.netarkivet.harvester.datamodel.HarvestDefinitionDAO;
054import dk.netarkivet.harvester.datamodel.JobDAO;
055import dk.netarkivet.harvester.datamodel.JobStatus;
056import dk.netarkivet.harvester.datamodel.JobStatusInfo;
057import dk.netarkivet.harvester.datamodel.dao.DAOProviderFactory;
058import dk.netarkivet.harvester.datamodel.eav.EAV;
059import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType;
060import dk.netarkivet.harvester.datamodel.extendedfield.ExtendedFieldDAO;
061
062/**
063 * Contains utility methods for supporting GUI for updating snapshot harvests.
064 */
065public class SnapshotHarvestDefinition {
066    protected static final Logger log = LoggerFactory.getLogger(SnapshotHarvestDefinition.class);
067    private final Provider<HarvestDefinitionDAO> hdDaoProvider;
068    private final Provider<JobDAO> jobDaoProvider;
069    private final Provider<ExtendedFieldDAO> extendedFieldDAOProvider;
070    private final Provider<DomainDAO> domainDAOProvider;
071    private final Provider<EAV> eavDAOProvider;
072
073    /**
074     * Constructor.
075     * @param hdDaoProvider Provider for HarvestDefinitions
076     * @param jobDaoProvider Provider for Jobs
077     * @param extendedFieldDAOProvider Provider ExtendedFields 
078     * @param domainDAOProvider Provider for Domains
079     */
080    public SnapshotHarvestDefinition(Provider<HarvestDefinitionDAO> hdDaoProvider, Provider<JobDAO> jobDaoProvider,
081            Provider<ExtendedFieldDAO> extendedFieldDAOProvider, Provider<DomainDAO> domainDAOProvider, Provider<EAV> eavDAOProvider) {
082        this.hdDaoProvider = hdDaoProvider;
083        this.jobDaoProvider = jobDaoProvider;
084        this.extendedFieldDAOProvider = extendedFieldDAOProvider;
085        this.domainDAOProvider = domainDAOProvider;
086        this.eavDAOProvider = eavDAOProvider;
087    }
088
089    /**
090     * 
091     * @return a default SnapshotHarvestDefinition
092     */
093    public static SnapshotHarvestDefinition createSnapshotHarvestDefinitionWithDefaultDAOs() {
094        return new SnapshotHarvestDefinition(DAOProviderFactory.getHarvestDefinitionDAOProvider(),
095                DAOProviderFactory.getJobDAOProvider(), DAOProviderFactory.getExtendedFieldDAOProvider(),
096                DAOProviderFactory.getDomainDAOProvider(), DAOProviderFactory.getEAVDAOProvider());
097    }
098
099    /**
100     * Extracts all required parameters from the request, checks for any inconsistencies, and passes the requisite data
101     * to the updateHarvestDefinition method for processing. If the "update" parameter is not set, this method does
102     * nothing.
103     * <p>
104     * The parameters in the request are defined in Definitions-edit-snapshot-harvest.jsp.
105     *
106     * @param context The context of the web request.
107     * @param i18n Translation information
108     * @throws ForwardedToErrorPage if an error happened that caused a forward to the standard error page, in which case
109     * further JSP processing should be aborted.
110     */
111    public void processRequest(PageContext context, I18n i18n) {
112        ArgumentNotValid.checkNotNull(context, "PageContext context");
113        ArgumentNotValid.checkNotNull(i18n, "I18n i18n");
114
115        ServletRequest request = context.getRequest();
116        if (request.getParameter(Constants.UPDATE_PARAM) == null) {
117            return;
118        }
119
120        HTMLUtils.forwardOnEmptyParameter(context, Constants.HARVEST_PARAM);
121
122        String oldname = request.getParameter(Constants.HARVEST_OLD_PARAM);
123        if (oldname == null) {
124            oldname = "";
125        }
126        String name = request.getParameter(Constants.HARVEST_PARAM);
127        String comments = request.getParameter(Constants.COMMENTS_PARAM);
128
129        long objectLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_OBJECTLIMIT_PARAM,
130                dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS);
131        long byteLimit = HTMLUtils.parseOptionalLong(context, Constants.DOMAIN_BYTELIMIT_PARAM,
132                dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES);
133        long runningtimeLimit = HTMLUtils.parseOptionalLong(context, Constants.JOB_TIMELIMIT_PARAM,
134                dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME);
135
136        Long oldHarvestId = HTMLUtils.parseOptionalLong(context, Constants.OLDSNAPSHOT_PARAM, null);
137
138        if (oldHarvestId != null && !hdDaoProvider.get().exists(oldHarvestId)) {
139            HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist",
140                    oldHarvestId);
141            throw new ForwardedToErrorPage("Old harvestdefinition " + oldHarvestId + " does not exist");
142        }
143
144        FullHarvest hd;
145        if ((request.getParameter(Constants.CREATENEW_PARAM) != null)) {
146            if (hdDaoProvider.get().getHarvestDefinition(name) != null) {
147                HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.definition.0.already.exists", name);
148                throw new ForwardedToErrorPage("Harvest definition '" + name + "' already exists");
149            }
150            // Note, object/bytelimit set to default values, if not set
151            hd = new FullHarvest(name, comments, oldHarvestId, objectLimit, byteLimit, runningtimeLimit, false,
152                    hdDaoProvider, jobDaoProvider, extendedFieldDAOProvider, domainDAOProvider);
153            hd.setActive(false);
154            hdDaoProvider.get().create(hd);
155        } else {
156            if (oldname.equals(name)) { // name is unchanged
157                hd = (FullHarvest) hdDaoProvider.get().getHarvestDefinition(name);
158            } else {
159                // test that the name does not exist already
160                if (hdDaoProvider.get().exists(name)) {
161                    HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.definition.0.already.exists", name);
162                    throw new ForwardedToErrorPage("Harvest definition '" + name + "' already exists");
163                } else {
164                    hd = (FullHarvest) hdDaoProvider.get().getHarvestDefinition(oldname);
165                    hd.setName(name);
166                }
167            }
168            if (hd == null) {
169                HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvest.0.does.not.exist", name);
170                throw new UnknownID("Harvest definition '" + name + "' doesn't exist!");
171            }
172            long edition = HTMLUtils.parseOptionalLong(context, Constants.EDITION_PARAM, Constants.NO_EDITION);
173
174            if (hd.getEdition() != edition) {
175                HTMLUtils.forwardWithRawErrorMessage(context, i18n, "errormsg;harvest.definition.changed.0.retry.1",
176                        "<br/><a href=\"Definitions-edit-snapshot-harvest.jsp?" + Constants.HARVEST_PARAM + "="
177                                + HTMLUtils.encodeAndEscapeHTML(name) + "\">", "</a>");
178
179                throw new ForwardedToErrorPage("Harvest definition '" + name + "' has changed");
180            } 
181
182            // MaxBytes is set to
183            // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_BYTES
184            // if parameter snapshot_byte_Limit is not defined
185            hd.setMaxBytes(byteLimit);
186
187            // MaxCountObjects is set to
188            // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_OBJECTS
189            // if parameter snapshot_object_limit is not defined
190            hd.setMaxCountObjects(objectLimit);
191
192            // MaxJobRunningTime is set to
193            // dk.netarkivet.harvester.datamodel.Constants.DEFAULT_MAX_JOB_RUNNING_TIME
194            // if parameter snapshot_time_limit is not defined
195            hd.setMaxJobRunningTime(runningtimeLimit);
196
197            hd.setPreviousHarvestDefinition(oldHarvestId);
198            hd.setComments(comments);
199            hdDaoProvider.get().update(hd);
200        }
201
202        // EAV
203        try {
204                Long entity_id = hd.getOid();
205                if (entity_id == null) {
206                        entity_id = 0L;
207                }
208                EAV eav = eavDAOProvider.get();
209            List<AttributeAndType> attributesAndTypes = eav.getAttributesAndTypes(EAV.SNAPSHOT_TREE_ID, (int)((long)entity_id));
210            AttributeAndType attributeAndType;
211            AttributeTypeBase attributeType;
212            AttributeBase attribute;
213            for (int i=0; i<attributesAndTypes.size(); ++i) {
214                attributeAndType = attributesAndTypes.get(i);
215                attributeType = attributeAndType.attributeType;
216                attribute = attributeAndType.attribute;
217                if (attribute == null) {
218                        attribute = attributeType.instanceOf();
219                        attribute.entity_id = (int)((long)entity_id);
220                }
221                switch (attributeType.viewtype) {
222                case 1:
223                        long l = HTMLUtils.parseOptionalLong(context, attributeType.name, (long)attributeType.def_int);
224                        attribute.setInteger((int)l);
225                        break;
226                case 5:
227                case 6:
228                    String paramValue = context.getRequest().getParameter(attributeType.name);
229                    int intVal = 0;
230                    if (paramValue != null && !"0".equals(paramValue)) {
231                        intVal = 1;
232                    }
233                        attribute.setInteger(intVal);
234                        break;
235                }
236                eav.saveAttribute(attribute);
237            }
238        } catch (SQLException e) {
239                throw new RuntimeException("Unable to store EAV data!", e);
240        }
241    }
242    
243
244    /**
245     * Flip the active status of a harvestdefinition named in the "flipactive" parameter.
246     *
247     * @param context The context of the web servlet
248     * @param i18n Translation information
249     * @return True if a harvest definition changed state.
250     */
251    public boolean flipActive(PageContext context, I18n i18n) {
252        ArgumentNotValid.checkNotNull(context, "PageContext context");
253        ArgumentNotValid.checkNotNull(i18n, "I18n i18n");
254
255        ServletRequest request = context.getRequest();
256        String flipactive = request.getParameter(Constants.FLIPACTIVE_PARAM);
257        // Change activation if requested
258        if (flipactive != null) {
259            HarvestDefinition hd = hdDaoProvider.get().getHarvestDefinition(flipactive);
260            if (hd != null) {
261                boolean isActive = hd.getActive();
262                boolean useDeduplication = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED);
263                if (!isActive) {
264                    if (hd instanceof FullHarvest) {
265                        FullHarvest fhd = (FullHarvest) hd;
266                        validatePreviousHd(fhd, context, i18n);
267                        if (useDeduplication) {
268                            // The client for requesting job index.
269                            JobIndexCache jobIndexCache = IndexClientFactory.getDedupCrawllogInstance();
270                            Long harvestId = fhd.getOid();
271                            Set<Long> jobSet = hdDaoProvider.get().getJobIdsForSnapshotDeduplicationIndex(harvestId);
272                            jobIndexCache.requestIndex(jobSet, harvestId);
273                        } else {
274                            // If deduplication disabled set indexReady to true
275                            // right now, so the job generation can proceed.
276                            fhd.setIndexReady(true);
277                        }
278                    } else { // hd is not Fullharvest
279                        log.warn("Harvestdefinition #" + hd.getOid() + " is not a FullHarvest " + " but a "
280                                + hd.getClass().getName());
281                        return false;
282                    }
283                }
284                hd.setActive(!hd.getActive());
285                hdDaoProvider.get().update(hd);
286                return true;
287            } else {
288                HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.does.not.exist",
289                        flipactive);
290                throw new ForwardedToErrorPage("Harvest definition " + flipactive + " doesn't exist");
291            }
292        }
293        return false;
294    }
295
296    /**
297     * Validate the previous harvestDefinition of this FullHarvest. The validation checks, that the given hs arguments
298     * represents a completed Fullharvest: Check 1: It has one or more jobs. Check 2: None of the jobs have status
299     * NEW,SUBMITTED, or STARTED.
300     *
301     * @param hd A given FullHarvest
302     * @param context The context of the web request.
303     * @param i18n Translation information
304     */
305    private void validatePreviousHd(FullHarvest hd, PageContext context, I18n i18n) {
306        HarvestDefinition preHd = hd.getPreviousHarvestDefinition();
307        if (preHd == null) {
308            return; // no validation needed
309        }
310
311        // This query represents check one
312        HarvestStatusQuery hsq1 = new HarvestStatusQuery(preHd.getOid(), 1);
313        // This query represents check two
314        HarvestStatusQuery hsq2 = new HarvestStatusQuery(preHd.getOid(), 1);
315        // States needed to update the query for check two.
316        Set<JobStatus> chosenStates = new HashSet<JobStatus>();
317        chosenStates.add(JobStatus.NEW);
318        chosenStates.add(JobStatus.SUBMITTED);
319        chosenStates.add(JobStatus.STARTED);
320        hsq2.setJobStatus(chosenStates);
321        HarvestStatus hs1 = jobDaoProvider.get().getStatusInfo(hsq1);
322        HarvestStatus hs2 = jobDaoProvider.get().getStatusInfo(hsq2);
323        if (hs1.getJobStatusInfo().isEmpty() || !hs2.getJobStatusInfo().isEmpty()) {
324            if (hs1.getJobStatusInfo().isEmpty()) {
325                log.debug("Cannot base snapshot job on old job, because no jobs generated for " + preHd.getName());
326            }
327            if (!hs2.getJobStatusInfo().isEmpty()) {
328                for (JobStatusInfo jobStatusInfo: hs2.getJobStatusInfo()) {
329                     log.debug("Cannot activate new jobs for {} because found job {} in state  {}.", hd.getName(), jobStatusInfo.getJobID(),jobStatusInfo.getStatus().name());
330                }
331            }
332            HTMLUtils.forwardWithErrorMessage(context, i18n, "errormsg;harvestdefinition.0.is.based.on."
333                    + "unfinished.definition.1", hd.getName(), preHd.getName());
334            throw new ForwardedToErrorPage("Harvest definition " + hd.getName() + " is based on unfinished definition "
335                    + preHd.getName());
336        }
337    }
338}