package dk.netarkivet.harvester.scheduler.jobgen;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.datamodel.DomainConfiguration;
import dk.netarkivet.harvester.datamodel.FullHarvest;
import dk.netarkivet.harvester.datamodel.GlobalCrawlerTrapListDAO;
import dk.netarkivet.harvester.datamodel.HarvestChannel;
import dk.netarkivet.harvester.datamodel.HarvestChannelDAO;
import dk.netarkivet.harvester.datamodel.HarvestDefinition;
import dk.netarkivet.harvester.datamodel.HarvestDefinitionDAO;
import dk.netarkivet.harvester.datamodel.HeritrixTemplate;
import dk.netarkivet.harvester.datamodel.Job;
import dk.netarkivet.harvester.datamodel.PartialHarvest;
import dk.netarkivet.harvester.datamodel.Schedule;
import dk.netarkivet.harvester.datamodel.SeedList;
import dk.netarkivet.harvester.datamodel.TemplateDAO;
import dk.netarkivet.harvester.datamodel.eav.EAV;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/harvester/scheduler/jobgen/AbstractJobGenerator.class */
abstract class AbstractJobGenerator implements JobGenerator {
    private static final Logger log = LoggerFactory.getLogger(AbstractJobGenerator.class);
    private final long DOMAIN_CONFIG_SUBSET_SIZE = Settings.getLong(HarvesterSettings.JOBGEN_DOMAIN_CONFIG_SUBSET_SIZE);
    private final boolean DEDUPLICATION_ENABLED = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED);

    @Override // dk.netarkivet.harvester.scheduler.jobgen.JobGenerator
    public int generateJobs(HarvestDefinition harvestDefinition) {
        log.info("Generating jobs for harvestdefinition #{} using class '{}'", harvestDefinition.getOid(), getClass());
        int i = 0;
        Iterator domainConfigurations = harvestDefinition.getDomainConfigurations();
        log.info("Now ready to iterate over the domainConfigurations for harvestdefinition #{}", harvestDefinition.getOid());
        harvestDefinition.setNumEvents(harvestDefinition.getNumEvents() + 1);
        if (harvestDefinition.isSnapShot()) {
            HarvestDefinitionDAO.getInstance().update(harvestDefinition);
        }
        while (domainConfigurations.hasNext()) {
            ArrayList arrayList = new ArrayList();
            while (domainConfigurations.hasNext() && arrayList.size() < this.DOMAIN_CONFIG_SUBSET_SIZE) {
                arrayList.add((DomainConfiguration) domainConfigurations.next());
            }
            Comparator<DomainConfiguration> domainConfigurationSubsetComparator = getDomainConfigurationSubsetComparator(harvestDefinition);
            log.trace("Sorting domains with instance of " + domainConfigurationSubsetComparator.getClass().getName());
            Collections.sort(arrayList, domainConfigurationSubsetComparator);
            log.trace("{} domainconfigs now sorted and ready to processing for harvest #{}", Integer.valueOf(arrayList.size()), harvestDefinition.getOid());
            if (arrayList.size() == 0) {
                log.warn("Processing a domain config subset of zero size for HD #{}.", harvestDefinition.getOid());
            }
            i += processDomainConfigurationSubset(harvestDefinition, arrayList.iterator());
            if (i == 0) {
                log.warn("Created 0 jobs for HD #{} from domain cfg subset size {}.", harvestDefinition.getOid(), Integer.valueOf(arrayList.size()));
            } else {
                log.info("Now created {} jobs for HD #{} from domain cfg subset size {}.", new Object[]{Integer.valueOf(i), harvestDefinition.getOid(), Integer.valueOf(arrayList.size())});
            }
        }
        if (!harvestDefinition.isSnapShot()) {
            PartialHarvest partialHarvest = (PartialHarvest) harvestDefinition;
            Schedule schedule = partialHarvest.getSchedule();
            int numEvents = harvestDefinition.getNumEvents();
            Date date = new Date();
            Date nextEvent = schedule.getNextEvent(partialHarvest.getNextDate(), numEvents);
            if (nextEvent != null && nextEvent.before(date)) {
                int i2 = 0;
                while (nextEvent != null && nextEvent.before(date)) {
                    nextEvent = schedule.getNextEvent(nextEvent, numEvents);
                    i2++;
                }
                log.warn("Refusing to schedule harvest definition '{}' in the past. Skipped {} events. Old nextDate was {} new nextDate is {}", new Object[]{harvestDefinition.getName(), Integer.valueOf(i2), partialHarvest.getNextDate(), nextEvent});
            }
            partialHarvest.setNextDate(nextEvent);
            if (log.isTraceEnabled()) {
                log.trace("Next event for harvest definition {} happens: {}", harvestDefinition.getName(), nextEvent == null ? "Never" : nextEvent.toString());
            }
        }
        log.info("Finished generating {} jobs for harvestdefinition #{}", Integer.valueOf(i), harvestDefinition.getOid());
        return i;
    }

    public Job getNewJob(HarvestDefinition harvestDefinition, DomainConfiguration domainConfiguration) {
        HarvestChannelDAO harvestChannelDAO = HarvestChannelDAO.getInstance();
        HarvestChannel channelForHarvestDefinition = harvestChannelDAO.getChannelForHarvestDefinition(harvestDefinition.getOid().longValue());
        if (channelForHarvestDefinition == null) {
            log.info("No channel mapping registered for harvest id {}, will use default.", harvestDefinition.getOid());
            channelForHarvestDefinition = harvestChannelDAO.getDefaultChannel(harvestDefinition.isSnapShot());
        }
        HeritrixTemplate loadOrderXMLdoc = loadOrderXMLdoc(domainConfiguration.getOrderXmlName());
        return harvestDefinition.isSnapShot() ? new Job(harvestDefinition.getOid(), domainConfiguration, loadOrderXMLdoc, channelForHarvestDefinition, harvestDefinition.getMaxCountObjects(), harvestDefinition.getMaxBytes(), ((FullHarvest) harvestDefinition).getMaxJobRunningTime(), harvestDefinition.getNumEvents()) : new Job(harvestDefinition.getOid(), domainConfiguration, loadOrderXMLdoc, channelForHarvestDefinition, -1L, -1L, 0L, harvestDefinition.getNumEvents());
    }

    protected abstract Comparator<DomainConfiguration> getDomainConfigurationSubsetComparator(HarvestDefinition harvestDefinition);

    protected abstract int processDomainConfigurationSubset(HarvestDefinition harvestDefinition, Iterator<DomainConfiguration> it);

    @Override // dk.netarkivet.harvester.scheduler.jobgen.JobGenerator
    public boolean canAccept(Job job, DomainConfiguration domainConfiguration, DomainConfiguration domainConfiguration2) {
        log.trace("Comparing current cfg {} with previous cfg {} when adding configs to HD #{}", new Object[]{domainConfiguration, domainConfiguration2, job.getOrigHarvestDefinitionID()});
        if (!checkAddDomainConfInvariant(job, domainConfiguration, domainConfiguration2)) {
            log.debug("Unable to add incompatible config(domain,configname='{}','{}') to current job for HD #{}", new Object[]{domainConfiguration.getDomainName(), domainConfiguration.getName(), job.getOrigHarvestDefinitionID()});
            return false;
        }
        boolean checkSpecificAcceptConditions = checkSpecificAcceptConditions(job, domainConfiguration);
        if (!checkSpecificAcceptConditions) {
            log.debug("Unable to add config(domain,configname='{}','{}') to current job for HD #{}. The specific accept conditions fail", new Object[]{domainConfiguration.getDomainName(), domainConfiguration.getName(), job.getOrigHarvestDefinitionID()});
        }
        return checkSpecificAcceptConditions;
    }

    protected abstract boolean checkSpecificAcceptConditions(Job job, DomainConfiguration domainConfiguration);

    /* JADX INFO: Access modifiers changed from: protected */
    public void editJobOrderXml(Job job) {
        job.getOrderXMLdoc().enableOrDisableDeduplication(this.DEDUPLICATION_ENABLED);
    }

    private boolean checkAddDomainConfInvariant(Job job, DomainConfiguration domainConfiguration, DomainConfiguration domainConfiguration2) {
        ArgumentNotValid.checkNotNull(job, "job");
        ArgumentNotValid.checkNotNull(domainConfiguration, "cfg");
        if (domainConfiguration2 != null && EAV.compare(domainConfiguration.getAttributesAndTypes(), domainConfiguration2.getAttributesAndTypes()) != 0) {
            log.debug("Attributes have changed between configurations {} and {}", DomainConfiguration.cfgToString(domainConfiguration2), DomainConfiguration.cfgToString(domainConfiguration));
            return false;
        }
        if (job.getDomainConfigurationMap().containsKey(domainConfiguration.getDomainName())) {
            log.debug("Job already has a configuration for Domain '{}'.", domainConfiguration.getDomainName());
            return false;
        }
        String orderXMLName = job.getOrderXMLName();
        if (orderXMLName.equals(domainConfiguration.getOrderXmlName())) {
            return true;
        }
        log.debug("This Job only accept configurations using the harvest template '{}'. This configuration uses the harvest template '{}'.", orderXMLName, domainConfiguration.getOrderXmlName());
        return false;
    }

    private HeritrixTemplate loadOrderXMLdoc(String str) {
        HeritrixTemplate read = TemplateDAO.getInstance().read(str);
        GlobalCrawlerTrapListDAO.getInstance().addGlobalCrawlerTraps(read);
        return read;
    }

    @Override // dk.netarkivet.harvester.scheduler.jobgen.JobGenerator
    public boolean ignoreConfiguration(DomainConfiguration domainConfiguration) {
        Iterator seedLists = domainConfiguration.getSeedLists();
        while (1 != 0 && seedLists.hasNext()) {
            Iterator it = ((SeedList) seedLists.next()).getSeeds().iterator();
            while (it.hasNext()) {
                String trim = ((String) it.next()).trim();
                if (trim.length() > 0 && !trim.startsWith("#")) {
                    return false;
                }
            }
        }
        return true;
    }
}
