001/*
002 * #%L
003 * NetarchiveSuite - heritrix3 controller
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, the National Library of France and the Austrian National Library.
006 * %%
007 * This program is free software: you can redistribute it and/or modify
008 * it under the terms of the GNU Lesser General Public License as
009 * published by the Free Software Foundation, either version 2.1 of the
010 * License, or (at your option) any later version.
011 * 
012 * This program is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015 * GNU General Lesser Public License for more details.
016 * 
017 * You should have received a copy of the GNU General Lesser Public
018 * License along with this program.  If not, see
019 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
020 * #L%
021 */
022package dk.netarkivet.harvester.heritrix3.controller;
023
024import java.io.File;
025import java.io.IOException;
026import java.io.UnsupportedEncodingException;
027import java.util.ArrayList;
028import java.util.Arrays;
029import java.util.List;
030
031import org.apache.commons.lang.StringUtils;
032import org.apache.http.client.methods.HttpPost;
033import org.apache.http.entity.StringEntity;
034import org.netarchivesuite.heritrix3wrapper.EngineResult;
035import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper;
036import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper.CrawlControllerState;
037import org.netarchivesuite.heritrix3wrapper.JobResult;
038import org.netarchivesuite.heritrix3wrapper.ResultStatus;
039import org.netarchivesuite.heritrix3wrapper.ScriptResult;
040import org.netarchivesuite.heritrix3wrapper.jaxb.JobShort;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044import dk.netarkivet.common.exceptions.HeritrixLaunchException;
045import dk.netarkivet.common.exceptions.IOFailure;
046import dk.netarkivet.common.exceptions.IllegalState;
047import dk.netarkivet.common.exceptions.NotImplementedException;
048import dk.netarkivet.common.utils.SystemUtils;
049import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage;
050import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceInfo;
051import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceJobInfo;
052import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlStatus;
053import dk.netarkivet.harvester.harvesting.frontier.FullFrontierReport;
054import dk.netarkivet.harvester.heritrix3.Heritrix3Files;
055
056/**
057 * This implementation of the HeritrixController interface starts Heritrix3 as a separate process and uses JMX to
058 * communicate with it. Each instance executes exactly one process that runs exactly one crawl job.
059 */
060public class HeritrixController extends AbstractRestHeritrixController {
061
062    /** The logger for this class. */
063    private static final Logger log = LoggerFactory.getLogger(HeritrixController.class);
064
065    /**
066     * The name that Heritrix3 gives to the job we ask it to create.
067     */
068    private String jobName;
069
070    /** The header line (legend) for the statistics report. */
071    private String progressStatisticsLegend;
072
073    private int heritrix3EngineRetries;
074    private int heritrix3EngineIntervalBetweenRetriesInMillis;
075
076    private String baseUrl;
077
078    /**
079     * Create a BnfHeritrixController object.
080     *
081     * @param files Files that are used to set up Heritrix3.
082     */
083    public HeritrixController(Heritrix3Files files, String jobName) {
084        super(files);
085        this.jobName = jobName;
086    }
087
088    /**
089     * Initialize the JMXconnection to the Heritrix3.
090     *
091     * @throws IOFailure If Heritrix3 dies before initialisation, or we encounter any problems during the
092     * initialisation.
093     * @see IHeritrixController#initialize()
094     */
095    @Override
096    public void initialize() {
097
098        /////////////////////////////////////////////////////
099        // Initialize H3 wrapper
100        /////////////////////////////////////////////////////
101
102        // TODO these numbers could be settings
103        this.heritrix3EngineRetries = 60;
104        this.heritrix3EngineIntervalBetweenRetriesInMillis = 1000; // 1 second
105
106        h3wrapper = Heritrix3Wrapper.getInstance(getHostName(), getGuiPort(), null, null, getHeritrixAdminName(),
107                getHeritrixAdminPassword());
108
109        EngineResult engineResult;
110        try {
111            engineResult = h3wrapper.waitForEngineReady(heritrix3EngineRetries,
112                    heritrix3EngineIntervalBetweenRetriesInMillis);
113        } catch (Throwable e) {
114            e.printStackTrace();
115            throw new IOFailure("Heritrix3 engine not started: " + e);
116        }
117
118        if (engineResult != null) {
119            if (engineResult.status != ResultStatus.OK) {
120                String errMsg = "Heritrix3 wrapper could not connect to Heritrix3. Resultstate = "
121                        + engineResult.status;
122                log.error(errMsg, engineResult.t);
123                throw new IOFailure(errMsg, engineResult.t);
124            }
125        } else {
126            throw new IOFailure("Unexpected error: Heritrix3 wrapper returned null engine result.");
127        }
128
129        baseUrl = "https://" + getHostName() + ":" + Integer.toString(getGuiPort()) + "/engine/";
130
131        // POST: Heritrix3 is up and running and responds nicely
132        log.info("Heritrix3 REST interface up and running");
133    }
134
135    @Override
136    public void requestCrawlStart() {
137        // Create a new job
138        File cxmlFile = getHeritrixFiles().getOrderFile();
139        File seedsFile = getHeritrixFiles().getSeedsFile();
140        JobResult jobResult;
141
142        File jobDir = files.getHeritrixJobDir();
143        if (!jobDir.exists()) {
144            jobDir.mkdirs();
145        }
146
147        try {
148            log.info("Copying the crawler-beans.cxml file and seeds.txt to the heritrix3 jobdir '{}'", jobDir);
149            Heritrix3Wrapper.copyFile(cxmlFile, jobDir);
150            Heritrix3Wrapper.copyFileAs(seedsFile, jobDir, "seeds.txt");
151        } catch (IOException e) {
152            throw new IOFailure("Problem occurred during the copying of files to our heritrix job", e);
153        }
154
155        // PRE: h3 is running, and the job files copied to their final location
156        EngineResult engineResult = null;
157        try {
158            engineResult = h3wrapper.rescanJobDirectory();
159            log.info("H3 jobs available for building: {}", knownJobsToString(engineResult));
160
161            log.trace("Result of rescanJobDirectory() operation: " + new String(engineResult.response, "UTF-8"));
162
163            jobResult = h3wrapper.buildJobConfiguration(jobName);
164            log.trace("Result of buildJobConfiguration() operation: " + new String(jobResult.response, "UTF-8"));
165            if (jobResult.status == ResultStatus.OK) {
166                if (jobResult.job.statusDescription.equalsIgnoreCase("Unbuilt")) {
167                    throw new HeritrixLaunchException("The job '" + jobName + "' could not be built. Last loglines are "
168                            + StringUtils.join(jobResult.job.jobLogTail, "\n"));
169                } else if (jobResult.job.statusDescription.equalsIgnoreCase("Ready")) {
170                    log.info("Job {} built successfully", jobName);
171                } else if (jobResult.job.statusDescription.startsWith("Finished")) { // Created but not launchable
172                    log.warn("The job {} seems unlaunchable. Tearing down the job. Last loglines are ", jobName,
173                            StringUtils.join(jobResult.job.jobLogTail, "\n"));
174                    jobResult = h3wrapper.teardownJob(jobName);
175                    log.trace("Result of teardown() operation: " + new String(jobResult.response, "UTF-8"));
176                    throw new HeritrixLaunchException("Job '" + jobName + "' failed to launch: "
177                            + StringUtils.join(jobResult.job.jobLogTail, "\n"));
178                } else {
179                    throw new IllegalState(
180                            "Unknown job.statusdescription returned from h3: " + jobResult.job.statusDescription);
181                }
182            } else {
183                throw new IllegalState(
184                        "Unknown ResultStatus returned from h3wrapper: " + ResultStatus.toString(jobResult.status));
185            }
186
187            jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.NASCENT, 60, 1000);
188            if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.NASCENT.toString())) {
189                log.info("The H3 job {} in now in state CrawlControllerState.NASCENT", jobName);
190            } else {
191                log.warn("The job state is now {}. Should have been CrawlControllerState.NASCENT",
192                        jobResult.job.crawlControllerState);
193            }
194            jobResult = h3wrapper.launchJob(jobName);
195
196            log.trace("Result of launchJob() operation: " + new String(jobResult.response, "UTF-8"));
197            jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.PAUSED, 60, 1000);
198            if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.PAUSED.toString())) {
199                log.info("The H3 job {} in now in state CrawlControllerState.PAUSED", jobName);
200            } else {
201                log.warn("The job state is now {}. Should have been CrawlControllerState.PAUSED",
202                        jobResult.job.crawlControllerState);
203            }
204
205            // check if param pauseAtStart is true
206            ScriptResult scriptResult = h3wrapper.ExecuteShellScriptInJob(jobName, "groovy",
207                    "rawOut.println crawlController.pauseAtStart\n");
208            boolean pauseAtStart = false;
209            if (scriptResult != null && scriptResult.script != null) {
210                String rawOutput = scriptResult.script.rawOutput; // false\n or true\n
211                if (rawOutput.endsWith("\n") || rawOutput.endsWith("\r")) {
212                    rawOutput = rawOutput.substring(0, rawOutput.length() - 1);
213                }
214                pauseAtStart = Boolean.parseBoolean(rawOutput);
215            }
216            log.info("The parameter pauseAtStart is {}", pauseAtStart);
217            // if param pauseAtStart is false
218            if (pauseAtStart == false) {
219                jobResult = h3wrapper.unpauseJob(jobName);
220                log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState);
221
222                // POST: h3 is running, and the job with name 'jobName' is running
223                log.trace("h3-State after unpausing job '{}': {}", jobName, new String(jobResult.response, "UTF-8"));
224            } else {
225                log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState);
226            }
227
228        } catch (UnsupportedEncodingException e) {
229            throw new IOFailure("Unexpected error during communication with heritrix3", e);
230        }
231    }
232
233    @Override
234    public void requestCrawlStop(String reason) {
235        log.info("Terminating job {}. Reason: {}", this.jobName, reason);
236        JobResult jobResult = h3wrapper.job(jobName);
237        if (jobResult != null) {
238            if (jobResult.job.isRunning) {
239                JobResult result = h3wrapper.terminateJob(this.jobName);
240                if (!result.job.isRunning) {
241                    log.warn("Job '{}' terminated", this.jobName);
242                } else {
243                    log.warn("Job '{}' not terminated correctly", this.jobName);
244                }
245            } else {
246                log.warn("Job '{}' not terminated, as it was not running", this.jobName);
247            }
248        } else {
249            log.warn("Job '{}' has maybe already been terminated and/or heritrix3 is no longer running", this.jobName);
250        }
251    }
252
253    @Override
254    public void stopHeritrix() {
255        log.debug("Stopping Heritrix3");
256        try {
257            // Check if a heritrix3 process still exists for this jobName
258            ProcessBuilder processBuilder = new ProcessBuilder("pgrep", "-f", jobName);
259            log.info("Looking up heritrix3 process with. " + processBuilder.command());
260            if (processBuilder.start().waitFor() == 0) { // Yes, ask heritrix3 to shutdown, ignoring any jobs named
261                                                         // jobName
262                log.info("Heritrix running, requesting heritrix to stop and ignoring running job '{}'", jobName);
263                h3wrapper.exitJavaProcess(Arrays.asList(new String[] {jobName}));
264            } else {
265                log.info("Heritrix3 process not running for job '{}'", jobName);
266            }
267            // Check again
268            if (processBuilder.start().waitFor() == 0) { // The process is still alive, kill it
269                log.info("Heritrix3 process still running, pkill'ing heritrix3 ");
270                ProcessBuilder killerProcessBuilder = new ProcessBuilder("pkill", "-f", jobName);
271                int pkillExitValue = killerProcessBuilder.start().exitValue();
272                if (pkillExitValue != 0) {
273                    log.warn("Non xero exit value ({}) when trying to pkill Heritrix3.", pkillExitValue);
274                } else {
275                    log.info("Heritrix process terminated successfully with the pkill command {}",
276                            killerProcessBuilder.command());
277                }
278            } else {
279                log.info("Heritrix3 stopped successfully.");
280            }
281        } catch (IOException e) {
282            log.warn("Exception while trying to shutdown heritrix", e);
283        } catch (InterruptedException e) {
284            log.debug("stopHeritrix call interupted", e);
285        }
286    }
287
288    /**
289     * Return the URL for monitoring this instance.
290     *
291     * @return the URL for monitoring this instance.
292     */
293    public String getHeritrixConsoleURL() {
294        return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine/job/";
295    }
296
297    /**
298     * Return the URL for monitoring the job of this instance.
299     *
300     * @return the URL for monitoring the job of this instance.
301     */
302    public String getHeritrixJobConsoleURL() {
303        return getHeritrixConsoleURL() + files.getCrawlDir().getName();
304    }
305
306    /**
307     * Cleanup after an Heritrix3 process. This entails sending the shutdown command to the Heritrix3 process, and
308     * killing it forcefully, if it is still alive after waiting the period of time specified by the
309     * CommonSettings.PROCESS_TIMEOUT setting.
310     *
311     * @param crawlDir the crawldir to cleanup (argument is currently not used)
312     * @see IHeritrixController#cleanup()
313     */
314    public void cleanup(File crawlDir) {
315        JobResult jobResult;
316        try {
317            // Check engine status
318            EngineResult engineResult = h3wrapper.rescanJobDirectory();
319            if (engineResult != null) {
320                List<JobShort> knownJobs = engineResult.engine.jobs;
321                if (knownJobs.size() != 1) {
322                    log.warn("Should be one job but there is {} jobs: {}", knownJobs.size(),
323                            knownJobsToString(engineResult));
324                }
325            } else {
326                log.warn("Unresponsive Heritrix3 engine. Let's try continuing the cleanup anyway");
327            }
328
329            // Check that job jobName still exists in H3 engine
330            jobResult = h3wrapper.job(jobName);
331            if (jobResult != null) {
332                if (jobResult.status == ResultStatus.OK && jobResult.job.crawlControllerState != null) {
333                    String TEARDOWN = "teardown";
334                    if (jobResult.job.availableActions.contains(TEARDOWN)) {
335                        log.info("Tearing down h3 job {}", jobName);
336                        jobResult = h3wrapper.teardownJob(jobName);
337                    } else {
338                        String errMsg = "Tearing down h3 job '" + jobName
339                                + "' not possible. Not one of the actions available: "
340                                + StringUtils.join(jobResult.job.availableActions, ",");
341                        log.warn(errMsg);
342                        throw new IOFailure(errMsg);
343                    }
344                }
345            } else {
346                throw new IOFailure("Unexpected error during communication with heritrix3 during cleanup");
347            }
348            // Wait for the state: jobResult.job.crawlControllerState == null (but we only try ten times with 1 second
349            // interval
350            jobResult = h3wrapper.waitForJobState(jobName, null, 10, heritrix3EngineIntervalBetweenRetriesInMillis);
351            // Did we get the expected state?
352            if (jobResult.job.crawlControllerState != null) {
353                log.warn("The job {} is still lurking about. Shutdown heritrix3 and ignore the job", jobName);
354                List<String> jobsToIgnore = new ArrayList<String>();
355                jobsToIgnore.add(jobName);
356                EngineResult result = h3wrapper.exitJavaProcess(jobsToIgnore);
357                if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION
358                        && result.status != ResultStatus.OFFLINE)) {
359                    throw new IOFailure("Heritrix3 could not be shut down");
360                }
361            } else {
362                EngineResult result = h3wrapper.exitJavaProcess(null);
363                if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION
364                        && result.status != ResultStatus.OFFLINE)) {
365                    throw new IOFailure("Heritrix3 could not be shut down");
366                }
367            }
368        } catch (Throwable e) {
369            throw new IOFailure("Unknown error during communication with heritrix3", e);
370        }
371    }
372
373    private String knownJobsToString(EngineResult engineResult) {
374        String result = "";
375        if (engineResult == null || engineResult.engine == null || engineResult.engine.jobs == null) {
376            result = null;
377        } else {
378            List<JobShort> knownjobs = engineResult.engine.jobs;
379            for (JobShort js : knownjobs) {
380                result += js.shortName + " ";
381            }
382        }
383
384        return result;
385    }
386
387    /**
388     * Return the URL for monitoring this instance.
389     *
390     * @return the URL for monitoring this instance.
391     */
392    public String getAdminInterfaceUrl() {
393        return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine";
394    }
395
396    /**
397     * Gets a message that stores the information summarizing the crawl progress.
398     *
399     * @return a message that stores the information summarizing the crawl progress.
400     */
401    public CrawlProgressMessage getCrawlProgress() {
402        Heritrix3Files files = getHeritrixFiles();
403        CrawlProgressMessage cpm = new CrawlProgressMessage(files.getHarvestID(), files.getJobID(),
404                progressStatisticsLegend);
405        cpm.setHostUrl(getHeritrixJobConsoleURL());
406        JobResult jobResult = h3wrapper.job(jobName);
407        if (jobResult != null) {
408            getCrawlServiceAttributes(cpm, jobResult);
409        } else {
410            log.warn("Unable to get Heritrix3 status for job '{}'", jobName);
411        }
412        if (cpm.crawlIsFinished()) {
413            cpm.setStatus(CrawlStatus.CRAWLING_FINISHED);
414            // No need to go further, CrawlService.Job bean does not exist
415            return cpm;
416        }
417        if (jobResult != null) {
418            fetchCrawlServiceJobAttributes(cpm, jobResult);
419        } else {
420            log.warn("Unable to get JobAttributes for job '{}'", jobName);
421        }
422        return cpm;
423    }
424
425    /**
426     * Retrieve the values of the crawl service attributes and add them to the CrawlProgressMessage being put together.
427     *
428     * @param cpm the crawlProgress message being prepared
429     */
430    private void getCrawlServiceAttributes(CrawlProgressMessage cpm, JobResult job) {
431        // TODO check job state??
432        CrawlServiceInfo hStatus = cpm.getHeritrixStatus();
433        hStatus.setAlertCount(job.job.alertCount); // info taken from job information
434        hStatus.setCurrentJob(this.jobName); // Note:Information not taken from H3
435        hStatus.setCrawling(job.job.isRunning);// info taken from job information
436    }
437
438    /**
439     * Retrieve the values of the crawl service job attributes and add them to the CrawlProgressMessage being put
440     * together.
441     *
442     * @param cpm the crawlProgress message being prepared
443     */
444    private void fetchCrawlServiceJobAttributes(CrawlProgressMessage cpm, JobResult job) {
445        CrawlServiceJobInfo jStatus = cpm.getJobStatus();
446
447        /*
448         * timestamp discovered queued downloaded doc/s(avg) KB/s(avg) dl-failures busy-thread mem-use-KB heap-size-KB
449         * congestion max-depth avg-depth 2015-04-29T12:42:54Z 774 573 185 0.9(2.31) 49(41) 16 2 61249 270848 1 456 114
450         */
451        /*
452         * jStatus.setProgressStatistics(newProgressStats); if (progressStatisticsLegend == null) {
453         * progressStatisticsLegend = (String) executeMBeanOperation(CrawlServiceJobOperation.progressStatisticsLegend);
454         * }
455         */
456
457        long totalUriCount = job.job.uriTotalsReport.totalUriCount;
458        long downloadedUriCount = job.job.uriTotalsReport.downloadedUriCount;
459        Double progress;
460        if (totalUriCount == 0) {
461            progress = 0.0;
462        } else {
463            progress = downloadedUriCount * 100.0 / totalUriCount;
464        }
465        jStatus.setProgressStatistics(progress + "%");
466
467        Long elapsedSeconds = job.job.elapsedReport.elapsedMilliseconds;
468        if (elapsedSeconds == null) {
469            elapsedSeconds = -1L;
470        } else {
471            elapsedSeconds = elapsedSeconds / 1000L;
472        }
473        jStatus.setElapsedSeconds(elapsedSeconds);
474
475        Double currentProcessedDocsPerSec = job.job.rateReport.currentDocsPerSecond;
476        if (currentProcessedDocsPerSec == null) {
477            currentProcessedDocsPerSec = new Double(-1L);
478        }
479        jStatus.setCurrentProcessedDocsPerSec(currentProcessedDocsPerSec);
480
481        Double processedDocsPerSec = job.job.rateReport.averageDocsPerSecond;
482        if (processedDocsPerSec == null) {
483            processedDocsPerSec = new Double(-1L);
484        }
485        jStatus.setProcessedDocsPerSec(processedDocsPerSec);
486
487        Integer kbRate = job.job.rateReport.currentKiBPerSec;
488        if (kbRate == null) {
489            kbRate = -1;
490        }
491        jStatus.setCurrentProcessedKBPerSec(kbRate);
492
493        Integer processedKBPerSec = job.job.rateReport.averageKiBPerSec;
494        if (processedKBPerSec == null) {
495            processedKBPerSec = -1;
496        }
497        jStatus.setProcessedKBPerSec(processedKBPerSec);
498
499        Long discoveredFilesCount = job.job.uriTotalsReport.totalUriCount;
500        if (discoveredFilesCount == null) {
501            discoveredFilesCount = -1L;
502        }
503        jStatus.setDiscoveredFilesCount(discoveredFilesCount);
504
505        Long downloadedCount = job.job.uriTotalsReport.downloadedUriCount;
506        if (downloadedCount == null) {
507            downloadedCount = -1L;
508        }
509        jStatus.setDownloadedFilesCount(downloadedCount);
510        /*
511         * 27 queues: 5 active (1 in-process; 0 ready; 4 snoozed); 0 inactive; 0 retired; 22 exhausted
512         */
513        String frontierShortReport = String.format(
514                "%d queues: %d active (%d in-process; %d ready; %d snoozed); %d inactive; %d retired; %d exhausted",
515                job.job.frontierReport.totalQueues, job.job.frontierReport.activeQueues,
516                job.job.frontierReport.inProcessQueues, job.job.frontierReport.readyQueues,
517                job.job.frontierReport.snoozedQueues, job.job.frontierReport.inactiveQueues,
518                job.job.frontierReport.retiredQueues, job.job.frontierReport.exhaustedQueues);
519        jStatus.setFrontierShortReport(frontierShortReport);
520
521        String newStatus = "?";
522        String StringValue = job.job.crawlControllerState;
523        if (StringValue != null) {
524            newStatus = (String) StringValue;
525        }
526        jStatus.setStatus(newStatus);
527
528        switch (newStatus) {
529        case "PAUSING":
530            cpm.setStatus(CrawlStatus.CRAWLER_PAUSING);
531            break;
532        case "PAUSED":
533            cpm.setStatus(CrawlStatus.CRAWLER_PAUSED);
534            break;
535        case "EMPTY":
536            cpm.setStatus(CrawlStatus.CRAWLER_EMPTY);
537            break;
538        default:
539            cpm.setStatus(CrawlStatus.CRAWLER_ACTIVE);
540            break;
541        }
542
543        Integer currentActiveToecount = job.job.loadReport.busyThreads;
544        if (currentActiveToecount == null) {
545            currentActiveToecount = -1;
546        }
547        jStatus.setActiveToeCount(currentActiveToecount);
548    }
549
550    /**
551     * Generates a full frontier report from H3 using an REST call (Groovy script)
552     *
553     * @return a Full frontier report.
554     */
555    public FullFrontierReport getFullFrontierReport() {
556        // construct script request to send
557        HttpPost postRequest = new HttpPost(baseUrl + "job/" + jobName + "/script");
558        StringEntity postEntity = null;
559        try {
560            postEntity = new StringEntity("engine=beanshell&script="
561                    + dk.netarkivet.harvester.heritrix3.Constants.FRONTIER_REPORT_GROOVY_SCRIPT);
562        } catch (UnsupportedEncodingException e) {
563            e.printStackTrace();
564        }
565        postEntity.setContentType("application/x-www-form-urlencoded");
566        postRequest.addHeader("Accept", "application/xml");
567        postRequest.setEntity(postEntity);
568        ScriptResult result = h3wrapper.scriptResult(postRequest);
569        return FullFrontierReport.parseContentsAsXML(jobName, result.response,
570                dk.netarkivet.harvester.heritrix3.Constants.XML_RAWOUT_TAG);
571    }
572
573    @Override
574    public boolean atFinish() {
575        throw new NotImplementedException("Not implemented");
576    }
577
578    @Override
579    public void beginCrawlStop() {
580        throw new NotImplementedException("Not implemented");
581    }
582
583    @Override
584    public void cleanup() {
585        throw new NotImplementedException("Not implemented");
586    }
587
588    @Override
589    public boolean crawlIsEnded() {
590        throw new NotImplementedException("Not implemented");
591    }
592
593    @Override
594    public int getActiveToeCount() {
595        throw new NotImplementedException("Not implemented");
596    }
597
598    @Override
599    public int getCurrentProcessedKBPerSec() {
600        throw new NotImplementedException("Not implemented");
601    }
602
603    @Override
604    public String getHarvestInformation() {
605        throw new NotImplementedException("Not implemented");
606    }
607
608    @Override
609    public String getProgressStats() {
610        throw new NotImplementedException("Not implemented");
611    }
612
613    @Override
614    public long getQueuedUriCount() {
615        throw new NotImplementedException("Not implemented");
616    }
617
618    @Override
619    public boolean isPaused() {
620        throw new NotImplementedException("Not implemented");
621    }
622
623}