package dk.netarkivet.harvester.harvesting.distribute;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.Constants;
import dk.netarkivet.common.distribute.ChannelID;
import dk.netarkivet.common.distribute.JMSConnection;
import dk.netarkivet.common.distribute.JMSConnectionFactory;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.exceptions.UnknownID;
import dk.netarkivet.common.lifecycle.PeriodicTaskExecutor;
import dk.netarkivet.common.utils.ApplicationUtils;
import dk.netarkivet.common.utils.CleanupIF;
import dk.netarkivet.common.utils.DomainUtils;
import dk.netarkivet.common.utils.ExceptionUtils;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.NotificationType;
import dk.netarkivet.common.utils.NotificationsFactory;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.SystemUtils;
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.datamodel.HarvestDefinitionInfo;
import dk.netarkivet.harvester.datamodel.Job;
import dk.netarkivet.harvester.datamodel.JobStatus;
import dk.netarkivet.harvester.distribute.HarvesterChannels;
import dk.netarkivet.harvester.distribute.HarvesterMessageHandler;
import dk.netarkivet.harvester.harvesting.DomainnameQueueAssignmentPolicy;
import dk.netarkivet.harvester.harvesting.HarvestController;
import dk.netarkivet.harvester.harvesting.HeritrixFiles;
import dk.netarkivet.harvester.harvesting.PersistentJobData;
import dk.netarkivet.harvester.harvesting.SeedUriDomainnameQueueAssignmentPolicy;
import dk.netarkivet.harvester.harvesting.metadata.MetadataEntry;
import dk.netarkivet.harvester.harvesting.report.HarvestReport;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:dk/netarkivet/harvester/harvesting/distribute/HarvestControllerServer.class */
public class HarvestControllerServer extends HarvesterMessageHandler implements CleanupIF {
    private static HarvestControllerServer instance;
    private final String applicationInstanceId = Settings.get(CommonSettings.APPLICATION_INSTANCE_ID);
    private final String physicalServerName = DomainUtils.reduceHostname(SystemUtils.getLocalHostName());
    private static final String STARTING_MESSAGE = "Starting HarvestControllerServer.";
    private static final String STARTED_MESSAGE = "HarvestControllerServer started.";
    private static final String CLOSING_MESSAGE = "Closing HarvestControllerServer.";
    private static final String CLOSED_MESSAGE = "Closed down HarvestControllerServer";
    private static final String STARTCRAWL_MESSAGE = "Starting crawl of job :";
    private static final String ENDCRAWL_MESSAGE = "Ending crawl of job :";
    static final int WAIT_FOR_HOSTS_REPORT_TIMEOUT_SECS = 30;
    private static final String HERITRIX_VERSION_PROPERTY = "heritrix.version";
    private static final String HERITRIX_QUEUE_ASSIGNMENT_POLICY_PROPERTY = "org.archive.crawler.frontier.AbstractFrontier.queue-assignment-policy";
    private JMSConnection jmsConnection;
    private final HarvestController controller;
    private ChannelID jobChannel;
    private final long minSpaceRequired;
    private final File serverDir;
    private CrawlStatus status;
    private static final Logger log = LoggerFactory.getLogger(HarvestControllerServer.class);
    private static final String CHANNEL = Settings.get(HarvesterSettings.HARVEST_CONTROLLER_CHANNEL);
    public static final ChannelID HARVEST_CHAN_VALID_RESP_ID = HarvesterChannels.getHarvesterRegistrationResponseChannel();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:dk/netarkivet/harvester/harvesting/distribute/HarvestControllerServer$CrawlStatus.class */
    public class CrawlStatus {
        private boolean running;
        private boolean channelIsValid;
        private PeriodicTaskExecutor statusTransmitter;
        private final int SEND_READY_DELAY;

        private CrawlStatus() {
            this.running = false;
            this.channelIsValid = false;
            this.SEND_READY_DELAY = Settings.getInt(HarvesterSettings.SEND_READY_DELAY);
        }

        public boolean isRunning() {
            return this.running;
        }

        public synchronized void setRunning(boolean z) {
            this.running = z;
        }

        protected final boolean isChannelValid() {
            return this.channelIsValid;
        }

        public void startSending() {
            this.channelIsValid = true;
            this.statusTransmitter = new PeriodicTaskExecutor("HarvesterStatus", new Runnable() { // from class: dk.netarkivet.harvester.harvesting.distribute.HarvestControllerServer.CrawlStatus.1
                @Override // java.lang.Runnable
                public void run() {
                    CrawlStatus.this.sendStatus();
                }
            }, 0L, Settings.getInt(HarvesterSettings.SEND_READY_INTERVAL));
        }

        public void stopSending() {
            if (this.statusTransmitter != null) {
                this.statusTransmitter.shutdown();
                this.statusTransmitter = null;
            }
        }

        /* JADX INFO: Access modifiers changed from: private */
        public synchronized void sendStatus() {
            try {
                Thread.sleep(this.SEND_READY_DELAY);
            } catch (Exception e) {
                HarvestControllerServer.log.error("Unable to sleep", e);
            }
            if (this.running) {
                return;
            }
            HarvestControllerServer.this.jmsConnection.send(new HarvesterReadyMessage(HarvestControllerServer.this.applicationInstanceId + " on " + HarvestControllerServer.this.physicalServerName, HarvestControllerServer.CHANNEL));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:dk/netarkivet/harvester/harvesting/distribute/HarvestControllerServer$HarvesterThread.class */
    public class HarvesterThread extends Thread {
        private final Job job;
        private final HarvestDefinitionInfo origHarvestInfo;
        private final List<MetadataEntry> metadataEntries;

        public HarvesterThread(Job job, HarvestDefinitionInfo harvestDefinitionInfo, List<MetadataEntry> list) {
            this.job = job;
            this.origHarvestInfo = harvestDefinitionInfo;
            this.metadataEntries = list;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            try {
                try {
                    HarvestControllerServer.this.removeListener();
                    HeritrixFiles writeHarvestFiles = HarvestControllerServer.this.controller.writeHarvestFiles(createCrawlDir(), this.job, this.origHarvestInfo, this.metadataEntries);
                    HarvestControllerServer.log.info("Starting crawl of job : {}", this.job.getJobID());
                    try {
                        try {
                            HarvestControllerServer.this.controller.runHarvest(writeHarvestFiles);
                            HarvestControllerServer.this.processHarvestInfoFile(writeHarvestFiles.getCrawlDir(), null);
                            HarvestControllerServer.log.info("Ending crawl of job : {}", this.job.getJobID());
                            HarvestControllerServer.this.processOldJobs();
                            shutdownNowOrContinue();
                            HarvestControllerServer.this.startAcceptingJobs();
                            HarvestControllerServer.this.beginListeningIfSpaceAvailable();
                        } catch (Throwable th) {
                            HarvestControllerServer.this.processHarvestInfoFile(writeHarvestFiles.getCrawlDir(), null);
                            throw th;
                        }
                    } catch (Throwable th2) {
                        HarvestControllerServer.log.warn("Error during crawling. The crawl may have been only partially completed.", th2);
                        throw new IOFailure("Error during crawling. The crawl may have been only partially completed.", th2);
                    }
                } catch (Throwable th3) {
                    String str = "Fatal error while operating job '" + this.job + "'";
                    HarvestControllerServer.log.error(str, th3);
                    NotificationsFactory.getInstance().notify(str, NotificationType.ERROR, th3);
                    HarvestControllerServer.log.info("Ending crawl of job : {}", this.job.getJobID());
                    HarvestControllerServer.this.processOldJobs();
                    shutdownNowOrContinue();
                    HarvestControllerServer.this.startAcceptingJobs();
                    HarvestControllerServer.this.beginListeningIfSpaceAvailable();
                }
            } catch (Throwable th4) {
                HarvestControllerServer.log.info("Ending crawl of job : {}", this.job.getJobID());
                HarvestControllerServer.this.processOldJobs();
                shutdownNowOrContinue();
                HarvestControllerServer.this.startAcceptingJobs();
                HarvestControllerServer.this.beginListeningIfSpaceAvailable();
                throw th4;
            }
        }

        private void shutdownNowOrContinue() {
            if (new File(HarvestControllerServer.this.serverDir, "shutdown.txt").exists()) {
                HarvestControllerServer.log.info("Found shutdown-file in serverdir - shutting down the application");
                HarvestControllerServer.instance.cleanup();
                System.exit(0);
            }
        }

        private File createCrawlDir() {
            try {
                File file = new File(new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_SERVERDIR)), this.job.getJobID() + "_" + System.currentTimeMillis());
                FileUtils.createDir(file);
                HarvestControllerServer.log.info("Created crawl directory: '{}'", file);
                return file;
            } catch (PermissionDenied e) {
                String str = "Couldn't create the directory for job " + this.job.getJobID();
                HarvestControllerServer.log.warn(str, e);
                HarvestControllerServer.this.sendErrorMessage(this.job.getJobID().longValue(), str, ExceptionUtils.getStackTrace(e));
                throw e;
            }
        }
    }

    private HarvestControllerServer() throws IOFailure {
        log.info(STARTING_MESSAGE);
        log.info("Bound to harvest channel '{}'", CHANNEL);
        this.serverDir = new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_SERVERDIR));
        ApplicationUtils.dirMustExist(this.serverDir);
        log.info("Serverdir: '{}'", this.serverDir);
        this.minSpaceRequired = Settings.getLong(HarvesterSettings.HARVEST_SERVERDIR_MINSPACE);
        if (this.minSpaceRequired <= 0) {
            log.warn("Wrong setting of minSpaceLeft read from Settings: {}", Long.valueOf(this.minSpaceRequired));
            throw new ArgumentNotValid("Wrong setting of minSpaceLeft read from Settings: " + this.minSpaceRequired);
        }
        log.info("Harvesting requires at least {} bytes free.", Long.valueOf(this.minSpaceRequired));
        this.controller = HarvestController.getInstance();
        System.setProperty(HERITRIX_VERSION_PROPERTY, Constants.getHeritrixVersionString());
        System.setProperty(HERITRIX_QUEUE_ASSIGNMENT_POLICY_PROPERTY, "org.archive.crawler.frontier.HostnameQueueAssignmentPolicy,org.archive.crawler.frontier.IPQueueAssignmentPolicy,org.archive.crawler.frontier.BucketQueueAssignmentPolicy,org.archive.crawler.frontier.SurtAuthorityQueueAssignmentPolicy," + DomainnameQueueAssignmentPolicy.class.getName() + "," + SeedUriDomainnameQueueAssignmentPolicy.class.getName());
        this.jmsConnection = JMSConnectionFactory.getInstance();
        log.debug("Obtained JMS connection.");
        this.status = new CrawlStatus();
        processOldJobs();
        JMSConnectionFactory.getInstance().setListener(HARVEST_CHAN_VALID_RESP_ID, this);
        this.jmsConnection.send(new HarvesterRegistrationRequest(CHANNEL, this.applicationInstanceId));
        log.info("Requested to check the validity of harvest channel '{}'", CHANNEL);
    }

    public static synchronized HarvestControllerServer getInstance() throws IOFailure {
        if (instance == null) {
            instance = new HarvestControllerServer();
        }
        return instance;
    }

    public synchronized void close() {
        log.info(CLOSING_MESSAGE);
        cleanup();
        log.info(CLOSED_MESSAGE);
    }

    public void cleanup() {
        if (this.controller != null) {
            this.controller.cleanup();
        }
        if (this.jmsConnection != null) {
            this.jmsConnection.removeListener(HARVEST_CHAN_VALID_RESP_ID, this);
            if (this.jobChannel != null) {
                this.jmsConnection.removeListener(this.jobChannel, this);
            }
        }
        this.status.stopSending();
        instance = null;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void processOldJobs() {
        for (File file : new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_SERVERDIR)).listFiles()) {
            if (PersistentJobData.existsIn(file)) {
                String str = "Found old unprocessed job data in dir '" + file.getAbsolutePath() + "'. Crawl probably interrupted by shutdown of HarvestController. Processing data.";
                log.warn(str);
                NotificationsFactory.getInstance().notify(str, NotificationType.WARNING);
                processHarvestInfoFile(file, new IOFailure("Crawl probably interrupted by shutdown of HarvestController"));
            }
        }
    }

    private void onDoOneCrawl(DoOneCrawlMessage doOneCrawlMessage) throws IOFailure, UnknownID, ArgumentNotValid, PermissionDenied {
        synchronized (this) {
            if (this.status.isRunning()) {
                log.warn("Received crawl request, but sent it back to queue, as another crawl is already running: '{}'", doOneCrawlMessage);
                this.jmsConnection.resend(doOneCrawlMessage, this.jobChannel);
                try {
                    Thread.sleep(1000L);
                } catch (InterruptedException e) {
                }
                return;
            }
            stopAcceptingJobs();
            try {
                Job job = doOneCrawlMessage.getJob();
                Long jobID = job.getJobID();
                if (jobID == null) {
                    log.warn("DoOneCrawlMessage arrived without JobID: '{}'", doOneCrawlMessage.toString());
                    throw new UnknownID("DoOneCrawlMessage arrived without JobID");
                }
                log.info("Received crawlrequest for job {}: '{}'", jobID, doOneCrawlMessage);
                this.jmsConnection.send(new CrawlStatusMessage(jobID.longValue(), JobStatus.STARTED));
                if (job.getStatus() != JobStatus.SUBMITTED) {
                    String str = "Message '" + doOneCrawlMessage.toString() + "' arrived with status " + job.getStatus() + " for job " + jobID + ", should have been STATUS_SUBMITTED";
                    log.warn(str);
                    sendErrorMessage(jobID.longValue(), str, str);
                    throw new ArgumentNotValid(str);
                }
                HarvesterThread harvesterThread = new HarvesterThread(job, doOneCrawlMessage.getOrigHarvestInfo(), doOneCrawlMessage.getMetadata());
                harvesterThread.start();
                log.info("Started harvester thread for job {}", jobID);
                if (harvesterThread == null) {
                    startAcceptingJobs();
                }
            } catch (Throwable th) {
                if (0 == 0) {
                    startAcceptingJobs();
                }
                throw th;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void sendErrorMessage(long j, String str, String str2) {
        CrawlStatusMessage crawlStatusMessage = new CrawlStatusMessage(j, JobStatus.FAILED, (HarvestReport) null);
        crawlStatusMessage.setHarvestErrors(str);
        crawlStatusMessage.setHarvestErrorDetails(str2);
        this.jmsConnection.send(crawlStatusMessage);
    }

    private synchronized void stopAcceptingJobs() {
        this.status.setRunning(true);
        log.debug("No longer accepting jobs.");
    }

    /* JADX INFO: Access modifiers changed from: private */
    public synchronized void startAcceptingJobs() {
        this.status.setRunning(false);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void removeListener() {
        log.debug("Removing listener on CHANNEL '{}'", this.jobChannel);
        this.jmsConnection.removeListener(this.jobChannel, this);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void beginListeningIfSpaceAvailable() {
        long bytesFree = FileUtils.getBytesFree(this.serverDir);
        if (bytesFree > this.minSpaceRequired) {
            log.info("Starts to listen to new jobs on queue '{}'", this.jobChannel);
            this.jmsConnection.setListener(this.jobChannel, this);
            log.info(STARTED_MESSAGE);
        } else {
            String str = "Not enough available diskspace. Only " + bytesFree + " bytes available. Harvester is paused.";
            log.error(str);
            NotificationsFactory.getInstance().notify(str, NotificationType.ERROR);
        }
    }

    private void setErrorMessages(CrawlStatusMessage crawlStatusMessage, Throwable th, String str, boolean z, int i) {
        if (th != null) {
            crawlStatusMessage.setHarvestErrors(th.toString());
            crawlStatusMessage.setHarvestErrorDetails(ExceptionUtils.getStackTrace(th));
        }
        if (str.length() > 0) {
            String str2 = z ? "No hosts report found" : "";
            if (i > 0) {
                if (str2.length() > 0) {
                    str2 = str2 + ", ";
                }
                str2 = str2 + i + " files failed to upload";
            }
            crawlStatusMessage.setUploadErrors(str2);
            crawlStatusMessage.setUploadErrorDetails(str);
        }
    }

    public void visit(DoOneCrawlMessage doOneCrawlMessage) throws IOFailure, UnknownID, ArgumentNotValid, PermissionDenied {
        onDoOneCrawl(doOneCrawlMessage);
    }

    public void visit(HarvesterRegistrationResponse harvesterRegistrationResponse) {
        String harvestChannelName = harvesterRegistrationResponse.getHarvestChannelName();
        if (this.status.isChannelValid() || !CHANNEL.equals(harvestChannelName)) {
            this.jmsConnection.resend(harvesterRegistrationResponse, harvesterRegistrationResponse.getTo());
            if (log.isDebugEnabled()) {
                log.debug("Resending harvest channel validity message for channel '{}'", harvestChannelName);
                return;
            }
            return;
        }
        if (!harvesterRegistrationResponse.isValid()) {
            log.error("Received message stating that channel '{}' is invalid. Will stop. Probable cause: the channel is not one of the known channels stored in the channels table", harvestChannelName);
            close();
            return;
        }
        log.info("Received message stating that channel '{}' is valid.", harvestChannelName);
        this.jobChannel = HarvesterChannels.getHarvestJobChannelId(harvestChannelName, harvesterRegistrationResponse.isSnapshot());
        beginListeningIfSpaceAvailable();
        startAcceptingJobs();
        this.status.startSending();
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void processHarvestInfoFile(File file, Throwable th) throws IOFailure {
        CrawlStatusMessage crawlStatusMessage;
        CrawlStatusMessage crawlStatusMessage2;
        CrawlStatusMessage crawlStatusMessage3;
        log.debug("Post-processing files in '{}'", file.getAbsolutePath());
        if (!PersistentJobData.existsIn(file)) {
            throw new IOFailure("No harvestInfo found in directory: " + file.getAbsolutePath());
        }
        PersistentJobData persistentJobData = new PersistentJobData(file);
        Long jobID = persistentJobData.getJobID();
        StringBuilder sb = new StringBuilder();
        HarvestReport harvestReport = null;
        ArrayList arrayList = new ArrayList();
        HeritrixFiles h1HeritrixFilesWithDefaultJmxFiles = HeritrixFiles.getH1HeritrixFilesWithDefaultJmxFiles(file, persistentJobData);
        try {
            try {
                log.info("Store files in directory '{}' from jobID: {}.", file, jobID);
                harvestReport = this.controller.storeFiles(h1HeritrixFilesWithDefaultJmxFiles, sb, arrayList);
                if (th == null && sb.length() == 0) {
                    log.info("Job with ID {} finished with status DONE", jobID);
                    crawlStatusMessage3 = new CrawlStatusMessage(jobID.longValue(), JobStatus.DONE, harvestReport);
                } else {
                    log.warn("Job with ID {} finished with status FAILED", jobID);
                    crawlStatusMessage3 = new CrawlStatusMessage(jobID.longValue(), JobStatus.FAILED, harvestReport);
                    setErrorMessages(crawlStatusMessage3, th, sb.toString(), harvestReport == null, arrayList.size());
                }
            } catch (Throwable th2) {
                if (th == null && sb.length() == 0) {
                    log.info("Job with ID {} finished with status DONE", jobID);
                    crawlStatusMessage2 = new CrawlStatusMessage(jobID.longValue(), JobStatus.DONE, harvestReport);
                } else {
                    log.warn("Job with ID {} finished with status FAILED", jobID);
                    crawlStatusMessage2 = new CrawlStatusMessage(jobID.longValue(), JobStatus.FAILED, harvestReport);
                    setErrorMessages(crawlStatusMessage2, th, sb.toString(), harvestReport == null, arrayList.size());
                }
                try {
                    this.jmsConnection.send(crawlStatusMessage2);
                    if (th == null && sb.length() == 0) {
                        h1HeritrixFilesWithDefaultJmxFiles.deleteFinalLogs();
                    }
                    log.info("Cleanup after harvesting job with id: {}.", jobID);
                    h1HeritrixFilesWithDefaultJmxFiles.cleanUpAfterHarvest(new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_OLDJOBSDIR)));
                    throw th2;
                } finally {
                    log.info("Cleanup after harvesting job with id: {}.", jobID);
                    h1HeritrixFilesWithDefaultJmxFiles.cleanUpAfterHarvest(new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_OLDJOBSDIR)));
                }
            }
        } catch (Exception e) {
            String str = "Trouble during postprocessing of files in '" + file.getAbsolutePath() + "'";
            log.warn(str, e);
            sb.append(e.getMessage()).append("\n");
            NotificationsFactory.getInstance().notify(str + ". Errors accumulated during the postprocessing: " + sb.toString(), NotificationType.ERROR, e);
            if (th == null && sb.length() == 0) {
                log.info("Job with ID {} finished with status DONE", jobID);
                crawlStatusMessage = new CrawlStatusMessage(jobID.longValue(), JobStatus.DONE, harvestReport);
            } else {
                log.warn("Job with ID {} finished with status FAILED", jobID);
                crawlStatusMessage = new CrawlStatusMessage(jobID.longValue(), JobStatus.FAILED, harvestReport);
                setErrorMessages(crawlStatusMessage, th, sb.toString(), harvestReport == null, arrayList.size());
            }
            try {
                this.jmsConnection.send(crawlStatusMessage);
                if (th == null && sb.length() == 0) {
                    h1HeritrixFilesWithDefaultJmxFiles.deleteFinalLogs();
                }
                log.info("Cleanup after harvesting job with id: {}.", jobID);
                h1HeritrixFilesWithDefaultJmxFiles.cleanUpAfterHarvest(new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_OLDJOBSDIR)));
            } finally {
            }
        }
        try {
            this.jmsConnection.send(crawlStatusMessage3);
            if (th == null && sb.length() == 0) {
                h1HeritrixFilesWithDefaultJmxFiles.deleteFinalLogs();
            }
            log.info("Cleanup after harvesting job with id: {}.", jobID);
            h1HeritrixFilesWithDefaultJmxFiles.cleanUpAfterHarvest(new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_OLDJOBSDIR)));
            log.info("Done post-processing files for job {} in dir: '{}'", jobID, file.getAbsolutePath());
        } finally {
        }
    }
}
