001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.heritrix3.controller; 024 025import java.io.File; 026import java.io.IOException; 027import java.io.UnsupportedEncodingException; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.List; 031 032import org.apache.commons.lang.StringUtils; 033import org.apache.http.client.methods.HttpPost; 034import org.apache.http.entity.StringEntity; 035import org.netarchivesuite.heritrix3wrapper.EngineResult; 036import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper; 037import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper.CrawlControllerState; 038import org.netarchivesuite.heritrix3wrapper.JobResult; 039import org.netarchivesuite.heritrix3wrapper.ResultStatus; 040import org.netarchivesuite.heritrix3wrapper.ScriptResult; 041import org.netarchivesuite.heritrix3wrapper.jaxb.JobShort; 042import org.slf4j.Logger; 043import org.slf4j.LoggerFactory; 044 045import dk.netarkivet.common.exceptions.HeritrixLaunchException; 046import dk.netarkivet.common.exceptions.IOFailure; 047import dk.netarkivet.common.exceptions.IllegalState; 048import dk.netarkivet.common.exceptions.NotImplementedException; 049import dk.netarkivet.common.utils.SystemUtils; 050import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage; 051import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceInfo; 052import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlServiceJobInfo; 053import dk.netarkivet.harvester.harvesting.distribute.CrawlProgressMessage.CrawlStatus; 054import dk.netarkivet.harvester.harvesting.frontier.FullFrontierReport; 055import dk.netarkivet.harvester.heritrix3.Heritrix3Files; 056 057/** 058 * This implementation of the HeritrixController interface starts Heritrix3 as a separate process and uses JMX to 059 * communicate with it. Each instance executes exactly one process that runs exactly one crawl job. 060 */ 061public class HeritrixController extends AbstractRestHeritrixController { 062 063 /** The logger for this class. */ 064 private static final Logger log = LoggerFactory.getLogger(HeritrixController.class); 065 066 /** 067 * The name that Heritrix3 gives to the job we ask it to create. 068 */ 069 private String jobName; 070 071 /** The header line (legend) for the statistics report. */ 072 private String progressStatisticsLegend; 073 074 private int heritrix3EngineRetries; 075 private int heritrix3EngineIntervalBetweenRetriesInMillis; 076 077 private String baseUrl; 078 079 /** 080 * Create a BnfHeritrixController object. 081 * 082 * @param files Files that are used to set up Heritrix3. 083 */ 084 public HeritrixController(Heritrix3Files files, String jobName) { 085 super(files); 086 this.jobName = jobName; 087 } 088 089 /** 090 * Initialize the JMXconnection to the Heritrix3. 091 * 092 * @throws IOFailure If Heritrix3 dies before initialisation, or we encounter any problems during the initialisation. 093 * @see IHeritrixController#initialize() 094 */ 095 @Override 096 public void initialize() { 097 098 ///////////////////////////////////////////////////// 099 // Initialize H3 wrapper 100 ///////////////////////////////////////////////////// 101 102 //TODO these numbers could be settings 103 this.heritrix3EngineRetries = 60; 104 this.heritrix3EngineIntervalBetweenRetriesInMillis = 1000; // 1 second 105 106 107 h3wrapper = Heritrix3Wrapper.getInstance(getHostName(), getGuiPort(), 108 null, null, getHeritrixAdminName(), getHeritrixAdminPassword()); 109 110 EngineResult engineResult; 111 try { 112 engineResult = h3wrapper.waitForEngineReady(heritrix3EngineRetries, heritrix3EngineIntervalBetweenRetriesInMillis); 113 } catch (Throwable e){ 114 e.printStackTrace(); 115 throw new IOFailure("Heritrix3 engine not started: " + e); 116 } 117 118 if (engineResult != null) { 119 if (engineResult.status != ResultStatus.OK) { 120 String errMsg = "Heritrix3 wrapper could not connect to Heritrix3. Resultstate = " + engineResult.status; 121 log.error(errMsg, engineResult.t); 122 throw new IOFailure(errMsg, engineResult.t); 123 } 124 } else { 125 throw new IOFailure("Unexpected error: Heritrix3 wrapper returned null engine result."); 126 } 127 128 baseUrl = "https://" + getHostName() + ":" + Integer.toString(getGuiPort()) + "/engine/"; 129 130 // POST: Heritrix3 is up and running and responds nicely 131 log.info("Heritrix3 REST interface up and running"); 132 } 133 134 @Override 135 public void requestCrawlStart() { 136 // Create a new job 137 File cxmlFile = getHeritrixFiles().getOrderFile(); 138 File seedsFile = getHeritrixFiles().getSeedsFile(); 139 JobResult jobResult; 140 141 File jobDir = files.getHeritrixJobDir(); 142 if (!jobDir.exists()) { 143 jobDir.mkdirs(); 144 } 145 146 try { 147 log.info("Copying the crawler-beans.cxml file and seeds.txt to the heritrix3 jobdir '{}'", jobDir); 148 Heritrix3Wrapper.copyFile( cxmlFile, jobDir ); 149 Heritrix3Wrapper.copyFileAs( seedsFile, jobDir, "seeds.txt" ); 150 } catch (IOException e) { 151 throw new IOFailure("Problem occurred during the copying of files to our heritrix job", e); 152 } 153 154 // PRE: h3 is running, and the job files copied to their final location 155 EngineResult engineResult = null; 156 try { 157 engineResult = h3wrapper.rescanJobDirectory(); 158 log.info("H3 jobs available for building: {}", knownJobsToString(engineResult)); 159 160 log.trace("Result of rescanJobDirectory() operation: " + new String(engineResult.response, "UTF-8")); 161 162 jobResult = h3wrapper.buildJobConfiguration(jobName); 163 log.trace("Result of buildJobConfiguration() operation: " + new String(jobResult.response, "UTF-8")); 164 if (jobResult.status == ResultStatus.OK) { 165 if (jobResult.job.statusDescription.equalsIgnoreCase("Unbuilt")) { 166 throw new HeritrixLaunchException("The job '" + jobName + "' could not be built. Last loglines are " + StringUtils.join(jobResult.job.jobLogTail, "\n")); 167 } else if (jobResult.job.statusDescription.equalsIgnoreCase("Ready")) { 168 log.info("Job {} built successfully", jobName); 169 } else if (jobResult.job.statusDescription.startsWith("Finished")) { // Created but not launchable 170 log.warn("The job {} seems unlaunchable. Tearing down the job. Last loglines are ", jobName, 171 StringUtils.join(jobResult.job.jobLogTail, "\n")); 172 jobResult = h3wrapper.teardownJob(jobName); 173 log.trace("Result of teardown() operation: " + new String(jobResult.response, "UTF-8")); 174 throw new HeritrixLaunchException("Job '" + jobName + "' failed to launch: " + StringUtils.join(jobResult.job.jobLogTail, "\n")); 175 } else { 176 throw new IllegalState("Unknown job.statusdescription returned from h3: " + jobResult.job.statusDescription); 177 } 178 } else { 179 throw new IllegalState("Unknown ResultStatus returned from h3wrapper: " 180 + ResultStatus.toString(jobResult.status)); 181 } 182 183 jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.NASCENT, 60, 1000); 184 if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.NASCENT.toString())) { 185 log.info("The H3 job {} in now in state CrawlControllerState.NASCENT", jobName); 186 } else { 187 log.warn("The job state is now {}. Should have been CrawlControllerState.NASCENT", jobResult.job.crawlControllerState); 188 } 189 jobResult = h3wrapper.launchJob(jobName); 190 191 log.trace("Result of launchJob() operation: " + new String(jobResult.response, "UTF-8")); 192 jobResult = h3wrapper.waitForJobState(jobName, CrawlControllerState.PAUSED, 60, 1000); 193 if (jobResult.job.crawlControllerState.equalsIgnoreCase(CrawlControllerState.PAUSED.toString())) { 194 log.info("The H3 job {} in now in state CrawlControllerState.PAUSED", jobName); 195 } else { 196 log.warn("The job state is now {}. Should have been CrawlControllerState.PAUSED", jobResult.job.crawlControllerState); 197 } 198 199 //check if param pauseAtStart is true 200 ScriptResult scriptResult = h3wrapper.ExecuteShellScriptInJob(jobName, "groovy", "rawOut.println crawlController.pauseAtStart\n"); 201 boolean pauseAtStart = false; 202 if (scriptResult != null && scriptResult.script != null) { 203 String rawOutput = scriptResult.script.rawOutput; //false\n or true\n 204 if(rawOutput.endsWith("\n") || rawOutput.endsWith("\r")) { 205 rawOutput = rawOutput.substring(0, rawOutput.length()-1); 206 } 207 pauseAtStart = Boolean.parseBoolean(rawOutput); 208 } 209 log.info("The parameter pauseAtStart is {}", pauseAtStart); 210 //if param pauseAtStart is false 211 if(pauseAtStart == false) { 212 jobResult = h3wrapper.unpauseJob(jobName); 213 log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState); 214 215 // POST: h3 is running, and the job with name 'jobName' is running 216 log.trace("h3-State after unpausing job '{}': {}", jobName, new String(jobResult.response, "UTF-8")); 217 } else { 218 log.info("The job {} is now in state {}", jobName, jobResult.job.crawlControllerState); 219 } 220 221 222 } catch (UnsupportedEncodingException e) { 223 throw new IOFailure("Unexpected error during communication with heritrix3", e); 224 } 225 } 226 227 @Override 228 public void requestCrawlStop(String reason) { 229 log.info("Terminating job {}. Reason: {}", this.jobName, reason); 230 JobResult jobResult = h3wrapper.job(jobName); 231 if (jobResult != null) { 232 if (jobResult.job.isRunning) { 233 JobResult result = h3wrapper.terminateJob(this.jobName); 234 if (!result.job.isRunning) { 235 log.warn("Job '{}' terminated", this.jobName); 236 } else { 237 log.warn("Job '{}' not terminated correctly", this.jobName); 238 } 239 } else { 240 log.warn("Job '{}' not terminated, as it was not running", this.jobName); 241 } 242 } else { 243 log.warn("Job '{}' has maybe already been terminated and/or heritrix3 is no longer running", this.jobName); 244 } 245 } 246 247 @Override 248 public void stopHeritrix() { 249 log.debug("Stopping Heritrix3"); 250 try { 251 // Check if a heritrix3 process still exists for this jobName 252 ProcessBuilder processBuilder = new ProcessBuilder("pgrep", "-f", jobName); 253 log.info("Looking up heritrix3 process with. " + processBuilder.command()); 254 if (processBuilder.start().waitFor() == 0) { // Yes, ask heritrix3 to shutdown, ignoring any jobs named jobName 255 log.info("Heritrix running, requesting heritrix to stop and ignoring running job '{}'", jobName); 256 h3wrapper.exitJavaProcess(Arrays.asList(new String[] {jobName})); 257 } else { 258 log.info("Heritrix3 process not running for job '{}'", jobName); 259 } 260 // Check again 261 if (processBuilder.start().waitFor() == 0) { // The process is still alive, kill it 262 log.info("Heritrix3 process still running, pkill'ing heritrix3 "); 263 ProcessBuilder killerProcessBuilder = new ProcessBuilder("pkill", "-f", jobName); 264 int pkillExitValue = killerProcessBuilder.start().exitValue(); 265 if (pkillExitValue != 0) { 266 log.warn("Non xero exit value ({}) when trying to pkill Heritrix3.", pkillExitValue); 267 } else { 268 log.info("Heritrix process terminated successfully with the pkill command {}", killerProcessBuilder.command()); 269 } 270 } else { 271 log.info("Heritrix3 stopped successfully."); 272 } 273 } catch (IOException e) { 274 log.warn("Exception while trying to shutdown heritrix", e); 275 } catch (InterruptedException e) { 276 log.debug("stopHeritrix call interupted", e); 277 } 278 } 279 280 /** 281 * Return the URL for monitoring this instance. 282 * 283 * @return the URL for monitoring this instance. 284 */ 285 public String getHeritrixConsoleURL() { 286 return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine/job/"; 287 } 288 289 /** 290 * Return the URL for monitoring the job of this instance. 291 * 292 * @return the URL for monitoring the job of this instance. 293 */ 294 public String getHeritrixJobConsoleURL() { 295 return getHeritrixConsoleURL() + files.getCrawlDir().getName(); 296 } 297 298 /** 299 * Cleanup after an Heritrix3 process. This entails sending the shutdown command to the Heritrix3 process, and killing 300 * it forcefully, if it is still alive after waiting the period of time specified by the 301 * CommonSettings.PROCESS_TIMEOUT setting. 302 * 303 * @param crawlDir the crawldir to cleanup (argument is currently not used) 304 * @see IHeritrixController#cleanup() 305 */ 306 public void cleanup(File crawlDir) { 307 JobResult jobResult; 308 try { 309 // Check engine status 310 EngineResult engineResult = h3wrapper.rescanJobDirectory(); 311 if (engineResult != null){ 312 List<JobShort> knownJobs = engineResult.engine.jobs; 313 if (knownJobs.size() != 1) { 314 log.warn("Should be one job but there is {} jobs: {}", knownJobs.size(), knownJobsToString(engineResult)); 315 } 316 } else { 317 log.warn("Unresponsive Heritrix3 engine. Let's try continuing the cleanup anyway"); 318 } 319 320 // Check that job jobName still exists in H3 engine 321 jobResult = h3wrapper.job(jobName); 322 if (jobResult != null) { 323 if (jobResult.status == ResultStatus.OK && jobResult.job.crawlControllerState != null) { 324 String TEARDOWN = "teardown"; 325 if (jobResult.job.availableActions.contains(TEARDOWN)) { 326 log.info("Tearing down h3 job {}" , jobName); 327 jobResult = h3wrapper.teardownJob(jobName); 328 } else { 329 String errMsg = "Tearing down h3 job '" + jobName + "' not possible. Not one of the actions available: " + StringUtils.join(jobResult.job.availableActions, ","); 330 log.warn(errMsg); 331 throw new IOFailure(errMsg); 332 } 333 } 334 } else { 335 throw new IOFailure("Unexpected error during communication with heritrix3 during cleanup"); 336 } 337 // Wait for the state: jobResult.job.crawlControllerState == null (but we only try ten times with 1 second interval 338 jobResult = h3wrapper.waitForJobState(jobName, null, 10, heritrix3EngineIntervalBetweenRetriesInMillis); 339 // Did we get the expected state? 340 if (jobResult.job.crawlControllerState != null) { 341 log.warn("The job {} is still lurking about. Shutdown heritrix3 and ignore the job", jobName); 342 List<String> jobsToIgnore = new ArrayList<String>(); 343 jobsToIgnore.add(jobName); 344 EngineResult result = h3wrapper.exitJavaProcess(jobsToIgnore); 345 if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION && result.status != ResultStatus.OFFLINE)) { 346 throw new IOFailure("Heritrix3 could not be shut down"); 347 } 348 } else { 349 EngineResult result = h3wrapper.exitJavaProcess(null); 350 if (result == null || (result.status != ResultStatus.RESPONSE_EXCEPTION && result.status != ResultStatus.OFFLINE)) { 351 throw new IOFailure("Heritrix3 could not be shut down"); 352 } 353 } 354 } catch (Throwable e) { 355 throw new IOFailure("Unknown error during communication with heritrix3", e); 356 } 357 } 358 359 360 private String knownJobsToString(EngineResult engineResult) { 361 String result = ""; 362 if (engineResult == null || engineResult.engine == null || engineResult.engine.jobs == null) { 363 result = null; 364 } else { 365 List<JobShort> knownjobs = engineResult.engine.jobs; 366 for (JobShort js: knownjobs) { 367 result += js.shortName + " "; 368 } 369 } 370 371 return result; 372 } 373 374 /** 375 * Return the URL for monitoring this instance. 376 * 377 * @return the URL for monitoring this instance. 378 */ 379 public String getAdminInterfaceUrl() { 380 return "https://" + SystemUtils.getLocalHostName() + ":" + getGuiPort() + "/engine"; 381 } 382 383 /** 384 * Gets a message that stores the information summarizing the crawl progress. 385 * 386 * @return a message that stores the information summarizing the crawl progress. 387 */ 388 public CrawlProgressMessage getCrawlProgress() { 389 Heritrix3Files files = getHeritrixFiles(); 390 CrawlProgressMessage cpm = new CrawlProgressMessage(files.getHarvestID(), files.getJobID(), 391 progressStatisticsLegend); 392 cpm.setHostUrl(getHeritrixJobConsoleURL()); 393 JobResult jobResult = h3wrapper.job(jobName); 394 if (jobResult != null) { 395 getCrawlServiceAttributes(cpm, jobResult); 396 } else { 397 log.warn("Unable to get Heritrix3 status for job '{}'", jobName); 398 } 399 if (cpm.crawlIsFinished()) { 400 cpm.setStatus(CrawlStatus.CRAWLING_FINISHED); 401 // No need to go further, CrawlService.Job bean does not exist 402 return cpm; 403 } 404 if (jobResult != null) { 405 fetchCrawlServiceJobAttributes(cpm, jobResult); 406 } else { 407 log.warn("Unable to get JobAttributes for job '{}'", jobName); 408 } 409 return cpm; 410 } 411 412 /** 413 * Retrieve the values of the crawl service attributes and add them to the CrawlProgressMessage being put together. 414 * 415 * @param cpm the crawlProgress message being prepared 416 */ 417 private void getCrawlServiceAttributes(CrawlProgressMessage cpm, JobResult job) { 418 // TODO check job state?? 419 CrawlServiceInfo hStatus = cpm.getHeritrixStatus(); 420 hStatus.setAlertCount(job.job.alertCount); // info taken from job information 421 hStatus.setCurrentJob(this.jobName); // Note:Information not taken from H3 422 hStatus.setCrawling(job.job.isRunning);// info taken from job information 423 } 424 425 /** 426 * Retrieve the values of the crawl service job attributes and add them to the CrawlProgressMessage being put 427 * together. 428 * 429 * @param cpm the crawlProgress message being prepared 430 */ 431 private void fetchCrawlServiceJobAttributes(CrawlProgressMessage cpm, JobResult job) { 432 CrawlServiceJobInfo jStatus = cpm.getJobStatus(); 433 434/* 435 timestamp discovered queued downloaded doc/s(avg) KB/s(avg) dl-failures busy-thread mem-use-KB heap-size-KB congestion max-depth avg-depth 4362015-04-29T12:42:54Z 774 573 185 0.9(2.31) 49(41) 16 2 61249 270848 1 456 114 437*/ 438 /* 439 jStatus.setProgressStatistics(newProgressStats); 440 if (progressStatisticsLegend == null) { 441 progressStatisticsLegend = (String) executeMBeanOperation(CrawlServiceJobOperation.progressStatisticsLegend); 442 } 443 */ 444 445 long totalUriCount = job.job.uriTotalsReport.totalUriCount; 446 long downloadedUriCount = job.job.uriTotalsReport.downloadedUriCount; 447 Double progress; 448 if (totalUriCount == 0) { 449 progress = 0.0; 450 } else { 451 progress = downloadedUriCount * 100.0 / totalUriCount; 452 } 453 jStatus.setProgressStatistics(progress + "%"); 454 455 Long elapsedSeconds = job.job.elapsedReport.elapsedMilliseconds; 456 if (elapsedSeconds == null) { 457 elapsedSeconds = -1L; 458 } else { 459 elapsedSeconds = elapsedSeconds / 1000L; 460 } 461 jStatus.setElapsedSeconds(elapsedSeconds); 462 463 Double currentProcessedDocsPerSec = job.job.rateReport.currentDocsPerSecond; 464 if (currentProcessedDocsPerSec == null) { 465 currentProcessedDocsPerSec = new Double(-1L); 466 } 467 jStatus.setCurrentProcessedDocsPerSec(currentProcessedDocsPerSec); 468 469 Double processedDocsPerSec = job.job.rateReport.averageDocsPerSecond; 470 if (processedDocsPerSec == null) { 471 processedDocsPerSec = new Double(-1L); 472 } 473 jStatus.setProcessedDocsPerSec(processedDocsPerSec); 474 475 Integer kbRate = job.job.rateReport.currentKiBPerSec; 476 if (kbRate == null) { 477 kbRate = -1; 478 } 479 jStatus.setCurrentProcessedKBPerSec(kbRate); 480 481 Integer processedKBPerSec = job.job.rateReport.averageKiBPerSec; 482 if (processedKBPerSec == null) { 483 processedKBPerSec = -1; 484 } 485 jStatus.setProcessedKBPerSec(processedKBPerSec); 486 487 Long discoveredFilesCount = job.job.uriTotalsReport.totalUriCount; 488 if (discoveredFilesCount == null) { 489 discoveredFilesCount = -1L; 490 } 491 jStatus.setDiscoveredFilesCount(discoveredFilesCount); 492 493 Long downloadedCount = job.job.uriTotalsReport.downloadedUriCount; 494 if (downloadedCount == null) { 495 downloadedCount = -1L; 496 } 497 jStatus.setDownloadedFilesCount(downloadedCount); 498/* 49927 queues: 5 active (1 in-process; 0 ready; 4 snoozed); 0 inactive; 0 retired; 22 exhausted 500*/ 501 String frontierShortReport = String.format("%d queues: %d active (%d in-process; %d ready; %d snoozed); %d inactive; %d retired; %d exhausted", 502 job.job.frontierReport.totalQueues, 503 job.job.frontierReport.activeQueues, 504 job.job.frontierReport.inProcessQueues, 505 job.job.frontierReport.readyQueues, 506 job.job.frontierReport.snoozedQueues, 507 job.job.frontierReport.inactiveQueues, 508 job.job.frontierReport.retiredQueues, 509 job.job.frontierReport.exhaustedQueues); 510 jStatus.setFrontierShortReport(frontierShortReport); 511 512 String newStatus = "?"; 513 String StringValue = job.job.crawlControllerState; 514 if (StringValue != null) { 515 newStatus = (String) StringValue; 516 } 517 jStatus.setStatus(newStatus); 518 String status = (String) StringValue; 519 if (status.contains("PAUSE")) { // FIXME this is not correct 520 cpm.setStatus(CrawlStatus.CRAWLER_PAUSED); 521 } else { 522 cpm.setStatus(CrawlStatus.CRAWLER_ACTIVE); 523 } 524 525 Integer currentActiveToecount = job.job.loadReport.busyThreads; 526 if (currentActiveToecount == null) { 527 currentActiveToecount = -1; 528 } 529 jStatus.setActiveToeCount(currentActiveToecount); 530 } 531 532 /** 533 * Generates a full frontier report from H3 using an REST call (Groovy script) 534 * 535 * @return a Full frontier report. 536 */ 537 public FullFrontierReport getFullFrontierReport() { 538 //construct script request to send 539 HttpPost postRequest = new HttpPost(baseUrl + "job/" + jobName + "/script"); 540 StringEntity postEntity = null; 541 try { 542 postEntity = new StringEntity("engine=beanshell&script="+dk.netarkivet.harvester.heritrix3.Constants.FRONTIER_REPORT_GROOVY_SCRIPT); 543 } catch (UnsupportedEncodingException e) { 544 e.printStackTrace(); 545 } 546 postEntity.setContentType("application/x-www-form-urlencoded"); 547 postRequest.addHeader("Accept", "application/xml"); 548 postRequest.setEntity(postEntity); 549 ScriptResult result = h3wrapper.scriptResult(postRequest); 550 return FullFrontierReport.parseContentsAsXML( 551 jobName, result.response, dk.netarkivet.harvester.heritrix3.Constants.XML_RAWOUT_TAG); 552 } 553 554 @Override 555 public boolean atFinish() { 556 throw new NotImplementedException("Not implemented"); 557 } 558 559 @Override 560 public void beginCrawlStop() { 561 throw new NotImplementedException("Not implemented"); 562 } 563 564 @Override 565 public void cleanup() { 566 throw new NotImplementedException("Not implemented"); 567 } 568 569 @Override 570 public boolean crawlIsEnded() { 571 throw new NotImplementedException("Not implemented"); 572 } 573 574 @Override 575 public int getActiveToeCount() { 576 throw new NotImplementedException("Not implemented"); 577 } 578 579 @Override 580 public int getCurrentProcessedKBPerSec() { 581 throw new NotImplementedException("Not implemented"); 582 } 583 584 @Override 585 public String getHarvestInformation() { 586 throw new NotImplementedException("Not implemented"); 587 } 588 589 @Override 590 public String getProgressStats() { 591 throw new NotImplementedException("Not implemented"); 592 } 593 594 @Override 595 public long getQueuedUriCount() { 596 throw new NotImplementedException("Not implemented"); 597 } 598 599 @Override 600 public boolean isPaused() { 601 throw new NotImplementedException("Not implemented"); 602 } 603 604}