001/* 002 * #%L 003 * NetarchiveSuite - heritrix3 controller 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, the National Library of France and the Austrian National Library. 006 * %% 007 * This program is free software: you can redistribute it and/or modify 008 * it under the terms of the GNU Lesser General Public License as 009 * published by the Free Software Foundation, either version 2.1 of the 010 * License, or (at your option) any later version. 011 * 012 * This program is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Lesser Public License for more details. 016 * 017 * You should have received a copy of the GNU General Lesser Public 018 * License along with this program. If not, see 019 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 020 * #L% 021 */ 022package dk.netarkivet.harvester.heritrix3.controller; 023 024import java.io.File; 025import java.io.IOException; 026import java.io.UnsupportedEncodingException; 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.List; 030 031import org.apache.commons.lang.StringUtils; 032import org.apache.http.client.methods.HttpPost; 033import org.apache.http.entity.StringEntity; 034import org.netarchivesuite.heritrix3wrapper.EngineResult; 035import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper; 036import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper.CrawlControllerState; 037import org.netarchivesuite.heritrix3wrapper.JobResult; 038import org.netarchivesuite.heritrix3wrapper.ResultStatus; 039import org.netarchivesuite.heritrix3wrapper.ScriptResult; 040import org.netarchivesuite.heritrix3wrapper.jaxb.JobShort; 041import org.slf4j.Logger; 042import org.slf4j.LoggerFactory; 043 044import dk.netarkivet.common.exceptions.HeritrixLaunchException; 045import dk.netarkivet.common.exceptions.IOFailure; 046import dk.netarkivet.common.exceptions.IllegalState; 047import dk.netarkivet.common.exceptions.NotImplementedException; 048import dk.netarkivet.common.utils.SystemUtils; 049import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage; 050import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceInfo; 051import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceJobInfo; 052import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlStatus; 053import dk.netarkivet.harvester.harvesting.frontier.FullFrontierReport; 054import dk.netarkivet.harvester.heritrix3.Heritrix3Files; 055 056/** 057 * This implementation of the HeritrixController interface starts Heritrix3 as a separate process and uses JMX to 058 * communicate with it. Each instance executes exactly one process that runs exactly one crawl job. 059 */ 060public class HeritrixController extends AbstractRestHeritrixController { 061 062 /** The logger for this class. */ 063 private static final Logger log = LoggerFactory.getLogger(HeritrixController.class); 064 065 /** 066 * The name that Heritrix3 gives to the job we ask it to create. 067 */ 068 private String jobName; 069 070 /** The header line (legend) for the statistics report. */ 071 private String progressStatisticsLegend; 072 073 private int heritrix3EngineRetries; 074 private int heritrix3EngineIntervalBetweenRetriesInMillis; 075 076 private String baseUrl; 077 078 /** 079 * Create a BnfHeritrixController object. 080 * 081 * @param files Files that are used to set up Heritrix3. 082 */ 083 public HeritrixController(Heritrix3Files files, String jobName) { 084 super(files); 085 this.jobName = jobName; 086 } 087 088 /** 089 * Initialize the JMXconnection to the Heritrix3. 090 * 091 * @throws IOFailure If Heritrix3 dies before initialisation, or we encounter any problems during the 092 * initialisation. 093 * @see IHeritrixController#initialize() 094 */ 095 @Override 096 public void initialize() { 097 098 ///////////////////////////////////////////////////// 099 // Initialize H3 wrapper 100 ///////////////////////////////////////////////////// 101 102 // TODO these numbers could be settings 103 this.heritrix3EngineRetries = 60; 104 this.heritrix3EngineIntervalBetweenRetriesInMillis = 1000; // 1 second 105 106 h3wrapper = Heritrix3Wrapper.getInstance(getHostName(), getGuiPort(), null, null, getHeritrixAdminName(), 107 getHeritrixAdminPassword()); 108 109 EngineResult engineResult; 110 try { 111 engineResult = h3wrapper.waitForEngineReady(heritrix3EngineRetries, 112 heritrix3EngineIntervalBetweenRetriesInMillis); 113 } catch (Throwable e) { 114 e.printStackTrace(); 115 throw new IOFailure("Heritrix3 engine not started: " + e); 116 } 117 118 if (engineResult != null) { 119 if (engineResult.status != ResultStatus.OK) { 120 String errMsg = "Heritrix3 wrapper could not connect to Heritrix3. Resultstate = " 121 + engineResult.status; 122 log.error(errMsg, engineResult.t); 123 throw new IOFailure(errMsg, engineResult.t); 124 } 125 } else { 126 throw new IOFailure("Unexpected error: Heritrix3 wrapper returned null engine result."); 127 } 128 129 baseUrl = "https://" + getHostName() + ":" + Integer.toString(getGuiPort()) + "/engine/"; 130 131 // POST: Heritrix3 is up and running and responds nicely 132 log.info("Heritrix3 REST interface up and running"); 133 } 134 135 @Override 136 public void requestCrawlStart() { 137 // Create a new job 138 File cxmlFile = getHeritrixFiles().getOrderFile(); 139 File seedsFile = getHeritrixFiles().getSeedsFile(); 140 JobResult jobResult; 141 142 File jobDir = files.getHeritrixJobDir(); 143 if (!jobDir.exists()) { 144 jobDir.mkdirs(); 145 } 146 147 try { 148 log.info("Copying the crawler-beans.cxml file and seeds.txt to the heritrix3 jobdir '{}'", jobDir); 149 Heritrix3Wrapper.copyFile(cxmlFile, jobDir); 150 Heritrix3Wrapper.copyFileAs(seedsFile, jobDir, "seeds.txt"); 151 } catch (IOException e) { 152 throw new IOFailure("Problem occurred during the copying of files to our heritrix job", e); 153 } 154 155 // PRE: h3 is running, and the job files copied to their final location 156 EngineResult engineResult = null; 157 try { 158 engineResult = h3wrapper.rescanJobDirectory(); 159 log.info("H3 jobs available for building: {}", knownJobsToString(engineResult)); 160 161 log.trace("Result of rescanJobDirectory() operation: " + new String(engineResult.response, "UTF-8")); 162 163 jobResult = h3wrapper.buildJobConfiguration(jobName); 164 log.trace("Result of buildJobConfiguration() operation: " + new String(jobResult.response, "UTF-8")); 165 if (jobResult.status == ResultStatus.OK) { 166 if (jobResult.job.statusDescription.equalsIgnoreCase("Unbuilt")) { 167 throw new HeritrixLaunchException("The job '" + jobName + "' could not be built. Last loglines are " 168 + StringUtils.join(jobResult.job.jobLogTail, "\n")); 169 } else if (jobResult.job.statusDescription.equalsIgnoreCase("Ready")) { 170 log.info("Job {} built successfully", jobName); 171 } else if (jobResult.job.statusDescription.startsWith("Finished")) { // Created but not launchable 172 log.warn("The job {} seems unlaunchable. Tearing down the job. Last loglines are ", jobName, 173 StringUtils.join(jobResult.job.jobLogTail, "\n")); 174 jobResult = h3wrapper.teardownJob(jobName); 175 log.trace("Result of teardown() operation: " + new String(jobResult.response, "UTF-8")); 176 throw new HeritrixLaunchException("Job '" + jobName + "' failed to launch: " 177 + StringUtils.join(jobResult.job.jobLogTail, "\n")); 178 } else { 179 throw new IllegalState( 180 "Unknown job.statusdescription returned from h3: " + jobResult.job.statusDescription); 181 } 182 } else { 183 throw new IllegalState( 184 "Unknown ResultStatus returned from h3wrapper: " + ResultStatus.toString(jobResult.status)); 185 } 186 187 jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.NASCENT, 60, 1000); 188 if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.NASCENT.toString())) { 189 log.info("The H3 job {} in now in state CrawlControllerState.NASCENT", jobName); 190 } else { 191 log.warn("The job state is now {}. Should have been CrawlControllerState.NASCENT", 192 jobResult.job.crawlControllerState); 193 } 194 jobResult = h3wrapper.launchJob(jobName); 195 196 log.trace("Result of launchJob() operation: " + new String(jobResult.response, "UTF-8")); 197 jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.PAUSED, 60, 1000); 198 if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.PAUSED.toString())) { 199 log.info("The H3 job {} in now in state CrawlControllerState.PAUSED", jobName); 200 } else { 201 log.warn("The job state is now {}. Should have been CrawlControllerState.PAUSED", 202 jobResult.job.crawlControllerState); 203 } 204 205 // check if param pauseAtStart is true 206 ScriptResult scriptResult = h3wrapper.ExecuteShellScriptInJob(jobName, "groovy", 207 "rawOut.println crawlController.pauseAtStart\n"); 208 boolean pauseAtStart = false; 209 if (scriptResult != null && scriptResult.script != null) { 210 String rawOutput = scriptResult.script.rawOutput; // false\n or true\n 211 if (rawOutput.endsWith("\n") || rawOutput.endsWith("\r")) { 212 rawOutput = rawOutput.substring(0, rawOutput.length() - 1); 213 } 214 pauseAtStart = Boolean.parseBoolean(rawOutput); 215 } 216 log.info("The parameter pauseAtStart is {}", pauseAtStart); 217 // if param pauseAtStart is false 218 if (pauseAtStart == false) { 219 jobResult = h3wrapper.unpauseJob(jobName); 220 log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState); 221 222 // POST: h3 is running, and the job with name 'jobName' is running 223 log.trace("h3-State after unpausing job '{}': {}", jobName, new String(jobResult.response, "UTF-8")); 224 } else { 225 log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState); 226 } 227 228 } catch (UnsupportedEncodingException e) { 229 throw new IOFailure("Unexpected error during communication with heritrix3", e); 230 } 231 } 232 233 @Override 234 public void requestCrawlStop(String reason) { 235 log.info("Terminating job {}. Reason: {}", this.jobName, reason); 236 JobResult jobResult = h3wrapper.job(jobName); 237 if (jobResult != null) { 238 if (jobResult.job.isRunning) { 239 JobResult result = h3wrapper.terminateJob(this.jobName); 240 if (!result.job.isRunning) { 241 log.warn("Job '{}' terminated", this.jobName); 242 } else { 243 log.warn("Job '{}' not terminated correctly", this.jobName); 244 } 245 } else { 246 log.warn("Job '{}' not terminated, as it was not running", this.jobName); 247 } 248 } else { 249 log.warn("Job '{}' has maybe already been terminated and/or heritrix3 is no longer running", this.jobName); 250 } 251 } 252 253 @Override 254 public void stopHeritrix() { 255 log.debug("Stopping Heritrix3"); 256 try { 257 // Check if a heritrix3 process still exists for this jobName 258 ProcessBuilder processBuilder = new ProcessBuilder("pgrep", "-f", jobName); 259 log.info("Looking up heritrix3 process with. " + processBuilder.command()); 260 if (processBuilder.start().waitFor() == 0) { // Yes, ask heritrix3 to shutdown, ignoring any jobs named 261 // jobName 262 log.info("Heritrix running, requesting heritrix to stop and ignoring running job '{}'", jobName); 263 h3wrapper.exitJavaProcess(Arrays.asList(new String[] {jobName})); 264 } else { 265 log.info("Heritrix3 process not running for job '{}'", jobName); 266 } 267 // Check again 268 if (processBuilder.start().waitFor() == 0) { // The process is still alive, kill it 269 log.info("Heritrix3 process still running, pkill'ing heritrix3 "); 270 ProcessBuilder killerProcessBuilder = new ProcessBuilder("pkill", "-f", jobName); 271 int pkillExitValue = killerProcessBuilder.start().exitValue(); 272 if (pkillExitValue != 0) { 273 log.warn("Non xero exit value ({}) when trying to pkill Heritrix3.", pkillExitValue); 274 } else { 275 log.info("Heritrix process terminated successfully with the pkill command {}", 276 killerProcessBuilder.command()); 277 } 278 } else { 279 log.info("Heritrix3 stopped successfully."); 280 } 281 } catch (IOException e) { 282 log.warn("Exception while trying to shutdown heritrix", e); 283 } catch (InterruptedException e) { 284 log.debug("stopHeritrix call interupted", e); 285 } 286 } 287 288 /** 289 * Return the URL for monitoring this instance. 290 * 291 * @return the URL for monitoring this instance. 292 */ 293 public String getHeritrixConsoleURL() { 294 return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine/job/"; 295 } 296 297 /** 298 * Return the URL for monitoring the job of this instance. 299 * 300 * @return the URL for monitoring the job of this instance. 301 */ 302 public String getHeritrixJobConsoleURL() { 303 return getHeritrixConsoleURL() + files.getCrawlDir().getName(); 304 } 305 306 /** 307 * Cleanup after an Heritrix3 process. This entails sending the shutdown command to the Heritrix3 process, and 308 * killing it forcefully, if it is still alive after waiting the period of time specified by the 309 * CommonSettings.PROCESS_TIMEOUT setting. 310 * 311 * @param crawlDir the crawldir to cleanup (argument is currently not used) 312 * @see IHeritrixController#cleanup() 313 */ 314 public void cleanup(File crawlDir) { 315 JobResult jobResult; 316 try { 317 // Check engine status 318 EngineResult engineResult = h3wrapper.rescanJobDirectory(); 319 if (engineResult != null) { 320 List<JobShort> knownJobs = engineResult.engine.jobs; 321 if (knownJobs.size() != 1) { 322 log.warn("Should be one job but there is {} jobs: {}", knownJobs.size(), 323 knownJobsToString(engineResult)); 324 } 325 } else { 326 log.warn("Unresponsive Heritrix3 engine. Let's try continuing the cleanup anyway"); 327 } 328 329 // Check that job jobName still exists in H3 engine 330 jobResult = h3wrapper.job(jobName); 331 if (jobResult != null) { 332 if (jobResult.status == ResultStatus.OK && jobResult.job.crawlControllerState != null) { 333 String TEARDOWN = "teardown"; 334 if (jobResult.job.availableActions.contains(TEARDOWN)) { 335 log.info("Tearing down h3 job {}", jobName); 336 jobResult = h3wrapper.teardownJob(jobName); 337 } else { 338 String errMsg = "Tearing down h3 job '" + jobName 339 + "' not possible. Not one of the actions available: " 340 + StringUtils.join(jobResult.job.availableActions, ","); 341 log.warn(errMsg); 342 throw new IOFailure(errMsg); 343 } 344 } 345 } else { 346 throw new IOFailure("Unexpected error during communication with heritrix3 during cleanup"); 347 } 348 // Wait for the state: jobResult.job.crawlControllerState == null (but we only try ten times with 1 second 349 // interval 350 jobResult = h3wrapper.waitForJobState(jobName, null, 10, heritrix3EngineIntervalBetweenRetriesInMillis); 351 // Did we get the expected state? 352 if (jobResult.job.crawlControllerState != null) { 353 log.warn("The job {} is still lurking about. Shutdown heritrix3 and ignore the job", jobName); 354 List<String> jobsToIgnore = new ArrayList<String>(); 355 jobsToIgnore.add(jobName); 356 EngineResult result = h3wrapper.exitJavaProcess(jobsToIgnore); 357 if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION 358 && result.status != ResultStatus.OFFLINE)) { 359 throw new IOFailure("Heritrix3 could not be shut down"); 360 } 361 } else { 362 EngineResult result = h3wrapper.exitJavaProcess(null); 363 if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION 364 && result.status != ResultStatus.OFFLINE)) { 365 throw new IOFailure("Heritrix3 could not be shut down"); 366 } 367 } 368 } catch (Throwable e) { 369 throw new IOFailure("Unknown error during communication with heritrix3", e); 370 } 371 } 372 373 private String knownJobsToString(EngineResult engineResult) { 374 String result = ""; 375 if (engineResult == null || engineResult.engine == null || engineResult.engine.jobs == null) { 376 result = null; 377 } else { 378 List<JobShort> knownjobs = engineResult.engine.jobs; 379 for (JobShort js : knownjobs) { 380 result += js.shortName + " "; 381 } 382 } 383 384 return result; 385 } 386 387 /** 388 * Return the URL for monitoring this instance. 389 * 390 * @return the URL for monitoring this instance. 391 */ 392 public String getAdminInterfaceUrl() { 393 return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine"; 394 } 395 396 /** 397 * Gets a message that stores the information summarizing the crawl progress. 398 * 399 * @return a message that stores the information summarizing the crawl progress. 400 */ 401 public CrawlProgressMessage getCrawlProgress() { 402 Heritrix3Files files = getHeritrixFiles(); 403 CrawlProgressMessage cpm = new CrawlProgressMessage(files.getHarvestID(), files.getJobID(), 404 progressStatisticsLegend); 405 cpm.setHostUrl(getHeritrixJobConsoleURL()); 406 JobResult jobResult = h3wrapper.job(jobName); 407 if (jobResult != null) { 408 getCrawlServiceAttributes(cpm, jobResult); 409 } else { 410 log.warn("Unable to get Heritrix3 status for job '{}'", jobName); 411 } 412 if (cpm.crawlIsFinished()) { 413 cpm.setStatus(CrawlStatus.CRAWLING_FINISHED); 414 // No need to go further, CrawlService.Job bean does not exist 415 return cpm; 416 } 417 if (jobResult != null) { 418 fetchCrawlServiceJobAttributes(cpm, jobResult); 419 } else { 420 log.warn("Unable to get JobAttributes for job '{}'", jobName); 421 } 422 return cpm; 423 } 424 425 /** 426 * Retrieve the values of the crawl service attributes and add them to the CrawlProgressMessage being put together. 427 * 428 * @param cpm the crawlProgress message being prepared 429 */ 430 private void getCrawlServiceAttributes(CrawlProgressMessage cpm, JobResult job) { 431 // TODO check job state?? 432 CrawlServiceInfo hStatus = cpm.getHeritrixStatus(); 433 hStatus.setAlertCount(job.job.alertCount); // info taken from job information 434 hStatus.setCurrentJob(this.jobName); // Note:Information not taken from H3 435 hStatus.setCrawling(job.job.isRunning);// info taken from job information 436 } 437 438 /** 439 * Retrieve the values of the crawl service job attributes and add them to the CrawlProgressMessage being put 440 * together. 441 * 442 * @param cpm the crawlProgress message being prepared 443 */ 444 private void fetchCrawlServiceJobAttributes(CrawlProgressMessage cpm, JobResult job) { 445 CrawlServiceJobInfo jStatus = cpm.getJobStatus(); 446 447 /* 448 * timestamp discovered queued downloaded doc/s(avg) KB/s(avg) dl-failures busy-thread mem-use-KB heap-size-KB 449 * congestion max-depth avg-depth 2015-04-29T12:42:54Z 774 573 185 0.9(2.31) 49(41) 16 2 61249 270848 1 456 114 450 */ 451 /* 452 * jStatus.setProgressStatistics(newProgressStats); if (progressStatisticsLegend == null) { 453 * progressStatisticsLegend = (String) executeMBeanOperation(CrawlServiceJobOperation.progressStatisticsLegend); 454 * } 455 */ 456 457 long totalUriCount = job.job.uriTotalsReport.totalUriCount; 458 long downloadedUriCount = job.job.uriTotalsReport.downloadedUriCount; 459 Double progress; 460 if (totalUriCount == 0) { 461 progress = 0.0; 462 } else { 463 progress = downloadedUriCount * 100.0 / totalUriCount; 464 } 465 jStatus.setProgressStatistics(progress + "%"); 466 467 Long elapsedSeconds = job.job.elapsedReport.elapsedMilliseconds; 468 if (elapsedSeconds == null) { 469 elapsedSeconds = -1L; 470 } else { 471 elapsedSeconds = elapsedSeconds / 1000L; 472 } 473 jStatus.setElapsedSeconds(elapsedSeconds); 474 475 Double currentProcessedDocsPerSec = job.job.rateReport.currentDocsPerSecond; 476 if (currentProcessedDocsPerSec == null) { 477 currentProcessedDocsPerSec = new Double(-1L); 478 } 479 jStatus.setCurrentProcessedDocsPerSec(currentProcessedDocsPerSec); 480 481 Double processedDocsPerSec = job.job.rateReport.averageDocsPerSecond; 482 if (processedDocsPerSec == null) { 483 processedDocsPerSec = new Double(-1L); 484 } 485 jStatus.setProcessedDocsPerSec(processedDocsPerSec); 486 487 Integer kbRate = job.job.rateReport.currentKiBPerSec; 488 if (kbRate == null) { 489 kbRate = -1; 490 } 491 jStatus.setCurrentProcessedKBPerSec(kbRate); 492 493 Integer processedKBPerSec = job.job.rateReport.averageKiBPerSec; 494 if (processedKBPerSec == null) { 495 processedKBPerSec = -1; 496 } 497 jStatus.setProcessedKBPerSec(processedKBPerSec); 498 499 Long discoveredFilesCount = job.job.uriTotalsReport.totalUriCount; 500 if (discoveredFilesCount == null) { 501 discoveredFilesCount = -1L; 502 } 503 jStatus.setDiscoveredFilesCount(discoveredFilesCount); 504 505 Long downloadedCount = job.job.uriTotalsReport.downloadedUriCount; 506 if (downloadedCount == null) { 507 downloadedCount = -1L; 508 } 509 jStatus.setDownloadedFilesCount(downloadedCount); 510 /* 511 * 27 queues: 5 active (1 in-process; 0 ready; 4 snoozed); 0 inactive; 0 retired; 22 exhausted 512 */ 513 String frontierShortReport = String.format( 514 "%d queues: %d active (%d in-process; %d ready; %d snoozed); %d inactive; %d retired; %d exhausted", 515 job.job.frontierReport.totalQueues, job.job.frontierReport.activeQueues, 516 job.job.frontierReport.inProcessQueues, job.job.frontierReport.readyQueues, 517 job.job.frontierReport.snoozedQueues, job.job.frontierReport.inactiveQueues, 518 job.job.frontierReport.retiredQueues, job.job.frontierReport.exhaustedQueues); 519 jStatus.setFrontierShortReport(frontierShortReport); 520 521 String newStatus = "?"; 522 String StringValue = job.job.crawlControllerState; 523 if (StringValue != null) { 524 newStatus = (String) StringValue; 525 } 526 jStatus.setStatus(newStatus); 527 528 switch (newStatus) { 529 case "PAUSING": 530 cpm.setStatus(CrawlStatus.CRAWLER_PAUSING); 531 break; 532 case "PAUSED": 533 cpm.setStatus(CrawlStatus.CRAWLER_PAUSED); 534 break; 535 case "EMPTY": 536 cpm.setStatus(CrawlStatus.CRAWLER_EMPTY); 537 break; 538 default: 539 cpm.setStatus(CrawlStatus.CRAWLER_ACTIVE); 540 break; 541 } 542 543 Integer currentActiveToecount = job.job.loadReport.busyThreads; 544 if (currentActiveToecount == null) { 545 currentActiveToecount = -1; 546 } 547 jStatus.setActiveToeCount(currentActiveToecount); 548 } 549 550 /** 551 * Generates a full frontier report from H3 using an REST call (Groovy script) 552 * 553 * @return a Full frontier report. 554 */ 555 public FullFrontierReport getFullFrontierReport() { 556 // construct script request to send 557 HttpPost postRequest = new HttpPost(baseUrl + "job/" + jobName + "/script"); 558 StringEntity postEntity = null; 559 try { 560 postEntity = new StringEntity("engine=beanshell&script=" 561 + dk.netarkivet.harvester.heritrix3.Constants.FRONTIER_REPORT_GROOVY_SCRIPT); 562 } catch (UnsupportedEncodingException e) { 563 e.printStackTrace(); 564 } 565 postEntity.setContentType("application/x-www-form-urlencoded"); 566 postRequest.addHeader("Accept", "application/xml"); 567 postRequest.setEntity(postEntity); 568 ScriptResult result = h3wrapper.scriptResult(postRequest); 569 return FullFrontierReport.parseContentsAsXML(jobName, result.response, 570 dk.netarkivet.harvester.heritrix3.Constants.XML_RAWOUT_TAG); 571 } 572 573 @Override 574 public boolean atFinish() { 575 throw new NotImplementedException("Not implemented"); 576 } 577 578 @Override 579 public void beginCrawlStop() { 580 throw new NotImplementedException("Not implemented"); 581 } 582 583 @Override 584 public void cleanup() { 585 throw new NotImplementedException("Not implemented"); 586 } 587 588 @Override 589 public boolean crawlIsEnded() { 590 throw new NotImplementedException("Not implemented"); 591 } 592 593 @Override 594 public int getActiveToeCount() { 595 throw new NotImplementedException("Not implemented"); 596 } 597 598 @Override 599 public int getCurrentProcessedKBPerSec() { 600 throw new NotImplementedException("Not implemented"); 601 } 602 603 @Override 604 public String getHarvestInformation() { 605 throw new NotImplementedException("Not implemented"); 606 } 607 608 @Override 609 public String getProgressStats() { 610 throw new NotImplementedException("Not implemented"); 611 } 612 613 @Override 614 public long getQueuedUriCount() { 615 throw new NotImplementedException("Not implemented"); 616 } 617 618 @Override 619 public boolean isPaused() { 620 throw new NotImplementedException("Not implemented"); 621 } 622 623}