001package dk.netarkivet.harvester.webinterface.servlet; 002 003import java.io.ByteArrayOutputStream; 004import java.io.IOException; 005import java.io.InputStream; 006import java.util.List; 007 008import javax.servlet.ServletContext; 009import javax.servlet.ServletOutputStream; 010import javax.servlet.http.HttpServletRequest; 011import javax.servlet.http.HttpServletResponse; 012 013import org.netarchivesuite.heritrix3wrapper.ScriptResult; 014import org.netarchivesuite.heritrix3wrapper.StreamResult; 015import org.netarchivesuite.heritrix3wrapper.jaxb.Job; 016import org.netarchivesuite.heritrix3wrapper.jaxb.Report; 017 018import com.antiaction.common.filter.Caching; 019import com.antiaction.common.templateengine.TemplateBuilderFactory; 020import com.antiaction.common.templateengine.TemplateBuilderPlaceHolder; 021import com.antiaction.common.templateengine.TemplatePlaceHolder; 022 023import dk.netarkivet.common.CommonSettings; 024import dk.netarkivet.common.Constants; 025import dk.netarkivet.common.utils.Settings; 026 027public class JobResource implements ResourceAbstract { 028 029 private static final String NAS_GROOVY_RESOURCE_PATH = "dk/netarkivet/harvester/webinterface/servlet/nas.groovy"; 030 031 private NASEnvironment environment; 032 033 protected int R_JOB = -1; 034 035 protected int R_CRAWLLOG = -1; 036 037 protected int R_FRONTIER = -1; 038 039 protected int R_SCRIPT = -1; 040 041 protected int R_REPORT = -1; 042 043 @Override 044 public void resources_init(NASEnvironment environment) { 045 this.environment = environment; 046 } 047 048 @Override 049 public void resources_add(ResourceManagerAbstract resourceManager) { 050 R_JOB = resourceManager.resource_add(this, "/job/<numeric>/", false); 051 R_CRAWLLOG = resourceManager.resource_add(this, "/job/<numeric>/crawllog/", false); 052 R_FRONTIER = resourceManager.resource_add(this, "/job/<numeric>/frontier/", false); 053 R_SCRIPT = resourceManager.resource_add(this, "/job/<numeric>/script/", false); 054 R_REPORT = resourceManager.resource_add(this, "/job/<numeric>/report/", false); 055 } 056 057 @Override 058 public void resource_service(ServletContext servletContext, NASUser nas_user, HttpServletRequest req, HttpServletResponse resp, int resource_id, List<Integer> numerics, String pathInfo) throws IOException { 059 if (NASEnvironment.contextPath == null) { 060 NASEnvironment.contextPath = req.getContextPath(); 061 } 062 if (NASEnvironment.servicePath == null) { 063 NASEnvironment.servicePath = req.getContextPath() + req.getServletPath() + "/"; 064 } 065 String method = req.getMethod().toUpperCase(); 066 if (resource_id == R_JOB) { 067 if ("GET".equals(method)) { 068 job(req, resp, numerics); 069 } 070 } else if (resource_id == R_CRAWLLOG) { 071 if ("GET".equals(method) || "POST".equals(method)) { 072 crawllog_list(req, resp, numerics); 073 } 074 } else if (resource_id == R_FRONTIER) { 075 if ("GET".equals(method) || "POST".equals(method)) { 076 frontier_list(req, resp, numerics); 077 } 078 } else if (resource_id == R_SCRIPT) { 079 if ("GET".equals(method) || "POST".equals(method)) { 080 script(req, resp, numerics); 081 } 082 } else if (resource_id == R_REPORT) { 083 if ("GET".equals(method)) { 084 report(req, resp, numerics); 085 } 086 } 087 } 088 089 public void job(HttpServletRequest req, HttpServletResponse resp, List<Integer> numerics) throws IOException { 090 resp.setContentType("text/html; charset=UTF-8"); 091 ServletOutputStream out = resp.getOutputStream(); 092 093 Caching.caching_disable_headers(resp); 094 095 TemplateBuilderFactory<MasterTemplateBuilder> tplBuilder = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class); 096 MasterTemplateBuilder masterTplBuilder = tplBuilder.getTemplateBuilder(); 097 098 StringBuilder sb = new StringBuilder(); 099 StringBuilder menuSb = new StringBuilder(); 100 101 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(numerics.get(0)); 102 Job job; 103 104 if (h3Job != null && !h3Job.bInitialized) { 105 h3Job.init(); 106 } 107 108 if (h3Job != null && h3Job.isReady()) { 109 h3Job.update(); 110 String action = req.getParameter("action"); 111 if (action != null && action.length() > 0) { 112 if ("build".equalsIgnoreCase(action)) { 113 h3Job.h3wrapper.buildJobConfiguration(h3Job.jobname); 114 } 115 if ("launch".equalsIgnoreCase(action)) { 116 h3Job.h3wrapper.launchJob(h3Job.jobname); 117 } 118 if ("pause".equalsIgnoreCase(action)) { 119 h3Job.h3wrapper.pauseJob(h3Job.jobname); 120 } 121 if ("unpause".equalsIgnoreCase(action)) { 122 h3Job.h3wrapper.unpauseJob(h3Job.jobname); 123 } 124 if ("checkpoint".equalsIgnoreCase(action)) { 125 h3Job.h3wrapper.checkpointJob(h3Job.jobname); 126 } 127 if ("terminate".equalsIgnoreCase(action)) { 128 h3Job.h3wrapper.terminateJob(h3Job.jobname); 129 } 130 if ("teardown".equalsIgnoreCase(action)) { 131 h3Job.h3wrapper.teardownJob(h3Job.jobname); 132 } 133 } 134 135 menuSb.append("<tr><td> <a href=\""); 136 menuSb.append(NASEnvironment.servicePath); 137 menuSb.append("job/"); 138 menuSb.append(h3Job.jobId); 139 menuSb.append("/"); 140 menuSb.append("\"> "); 141 menuSb.append(h3Job.jobId); 142 menuSb.append("</a></td></tr>"); 143 144 sb.append("JobId: "); 145 sb.append(h3Job.jobId); 146 sb.append("<br />\n"); 147 sb.append("HarvestNum: "); 148 sb.append(h3Job.job.getHarvestNum()); 149 sb.append("<br />\n"); 150 sb.append("Snapshop: "); 151 sb.append(h3Job.job.isSnapshot()); 152 sb.append("<br />\n"); 153 sb.append("Channel: "); 154 sb.append(h3Job.job.getChannel()); 155 sb.append("<br />\n"); 156 sb.append("OrderXMLName: "); 157 sb.append(h3Job.job.getOrderXMLName()); 158 sb.append("<br />\n"); 159 sb.append("CountDomains: "); 160 sb.append(h3Job.job.getCountDomains()); 161 sb.append("<br />\n"); 162 sb.append("MaxBytesPerDomain: "); 163 sb.append(h3Job.job.getMaxBytesPerDomain()); 164 sb.append("<br />\n"); 165 sb.append("MaxObjectsPerDomain: "); 166 sb.append(h3Job.job.getMaxObjectsPerDomain()); 167 sb.append("<br />\n"); 168 sb.append("MaxJobRunningTime: "); 169 sb.append(h3Job.job.getMaxJobRunningTime()); 170 sb.append(" ms.<br />\n"); 171 172 sb.append("<br />\n"); 173 174 sb.append("<a href=\""); 175 sb.append(NASEnvironment.servicePath); 176 sb.append("job/"); 177 sb.append(h3Job.jobId); 178 sb.append("/crawllog/"); 179 sb.append("\" class=\"btn btn-default\">"); 180 sb.append("Show/filter crawllog"); 181 sb.append("</a>"); 182 183 sb.append(" "); 184 185 sb.append("<a href=\""); 186 sb.append(NASEnvironment.servicePath); 187 sb.append("job/"); 188 sb.append(h3Job.jobId); 189 sb.append("/frontier/"); 190 sb.append("\" class=\"btn btn-default\">"); 191 sb.append("Show/delete frontier queue"); 192 sb.append("</a>"); 193 194 sb.append(" "); 195 196 sb.append("<a href=\""); 197 sb.append(NASEnvironment.servicePath); 198 sb.append("job/"); 199 sb.append(h3Job.jobId); 200 sb.append("/script/"); 201 sb.append("\" class=\"btn btn-default\">"); 202 sb.append("Open scripting console"); 203 sb.append("</a>"); 204 205 sb.append(" "); 206 207 sb.append("<a href=\""); 208 sb.append(NASEnvironment.servicePath); 209 sb.append("job/"); 210 sb.append(h3Job.jobId); 211 sb.append("/report/"); 212 sb.append("\" class=\"btn btn-default\">"); 213 sb.append("Show report"); 214 sb.append("</a>"); 215 216 sb.append(" "); 217 218 sb.append("<a href=\""); 219 sb.append(h3Job.hostUrl); 220 sb.append("\" class=\"btn btn-default\">"); 221 sb.append("Heritrix3 WebUI"); 222 sb.append("</a>"); 223 224 sb.append("<br />\n"); 225 226 if (h3Job.jobResult != null && h3Job.jobResult.job != null) { 227 job = h3Job.jobResult.job; 228 sb.append("<br />\n"); 229 for (int i=0; i<job.availableActions.size(); ++i) { 230 if (i > 0) { 231 sb.append(" "); 232 } 233 // disabled="disabled" 234 sb.append("<a href=\"?action="); 235 sb.append(job.availableActions.get(i)); 236 sb.append("\" class=\"btn btn-default\">"); 237 sb.append(job.availableActions.get(i)); 238 sb.append("</a>"); 239 } 240 sb.append("<br />\n"); 241 sb.append("<br />\n"); 242 243 sb.append("shortName: "); 244 sb.append(job.shortName); 245 sb.append("<br />\n"); 246 sb.append("crawlControllerState: "); 247 sb.append(job.crawlControllerState); 248 sb.append("<br />\n"); 249 sb.append("crawlExitStatus: "); 250 sb.append(job.crawlExitStatus); 251 sb.append("<br />\n"); 252 sb.append("statusDescription: "); 253 sb.append(job.statusDescription); 254 sb.append("<br />\n"); 255 sb.append("url: "); 256 sb.append("<a href=\""); 257 sb.append(job.url); 258 sb.append("/"); 259 sb.append("\">"); 260 sb.append(job.url); 261 sb.append("</a>"); 262 sb.append("<br />\n"); 263 if (job.jobLogTail != null) { 264 for (int i =0; i<job.jobLogTail.size(); ++i) { 265 sb.append("jobLogTail["); 266 sb.append(i); 267 sb.append("]: "); 268 sb.append(job.jobLogTail.get(i)); 269 sb.append("<br />\n"); 270 } 271 } 272 if (job.uriTotalsReport != null) { 273 sb.append("uriTotalsReport.downloadedUriCount: "); 274 sb.append(job.uriTotalsReport.downloadedUriCount); 275 sb.append("<br />\n"); 276 sb.append("uriTotalsReport.queuedUriCount: "); 277 sb.append(job.uriTotalsReport.queuedUriCount); 278 sb.append("<br />\n"); 279 sb.append("uriTotalsReport.totalUriCount: "); 280 sb.append(job.uriTotalsReport.totalUriCount); 281 sb.append("<br />\n"); 282 sb.append("uriTotalsReport.futureUriCount: "); 283 sb.append(job.uriTotalsReport.futureUriCount); 284 sb.append("<br />\n"); 285 } 286 if (job.sizeTotalsReport != null) { 287 sb.append("sizeTotalsReport.dupByHash: "); 288 sb.append(job.sizeTotalsReport.dupByHash); 289 sb.append("<br />\n"); 290 sb.append("sizeTotalsReport.dupByHashCount: "); 291 sb.append(job.sizeTotalsReport.dupByHashCount); 292 sb.append("<br />\n"); 293 sb.append("sizeTotalsReport.novel: "); 294 sb.append(job.sizeTotalsReport.novel); 295 sb.append("<br />\n"); 296 sb.append("sizeTotalsReport.novelCount: "); 297 sb.append(job.sizeTotalsReport.novelCount); 298 sb.append("<br />\n"); 299 sb.append("sizeTotalsReport.notModified: "); 300 sb.append(job.sizeTotalsReport.notModified); 301 sb.append("<br />\n"); 302 sb.append("sizeTotalsReport.notModifiedCount: "); 303 sb.append(job.sizeTotalsReport.notModifiedCount); 304 sb.append("<br />\n"); 305 sb.append("sizeTotalsReport.total: "); 306 sb.append(job.sizeTotalsReport.total); 307 sb.append("<br />\n"); 308 sb.append("sizeTotalsReport.totalCount: "); 309 sb.append(job.sizeTotalsReport.totalCount); 310 sb.append("<br />\n"); 311 } 312 if (job.rateReport != null) { 313 sb.append("rateReport.currentDocsPerSecond: "); 314 sb.append(job.rateReport.currentDocsPerSecond); 315 sb.append("<br />\n"); 316 sb.append("rateReport.averageDocsPerSecond: "); 317 sb.append(job.rateReport.averageDocsPerSecond); 318 sb.append("<br />\n"); 319 sb.append("rateReport.currentKiBPerSec: "); 320 sb.append(job.rateReport.currentKiBPerSec); 321 sb.append("<br />\n"); 322 sb.append("rateReport.averageKiBPerSec: "); 323 sb.append(job.rateReport.averageKiBPerSec); 324 sb.append("<br />\n"); 325 } 326 if (job.loadReport != null) { 327 sb.append("loadReport.busyThreads: "); 328 sb.append(job.loadReport.busyThreads); 329 sb.append("<br />\n"); 330 sb.append("loadReport.totalThreads: "); 331 sb.append(job.loadReport.totalThreads); 332 sb.append("<br />\n"); 333 sb.append("loadReport.congestionRatio: "); 334 sb.append(job.loadReport.congestionRatio); 335 sb.append("<br />\n"); 336 sb.append("loadReport.averageQueueDepth: "); 337 sb.append(job.loadReport.averageQueueDepth); 338 sb.append("<br />\n"); 339 sb.append("loadReport.deepestQueueDepth: "); 340 sb.append(job.loadReport.deepestQueueDepth); 341 sb.append("<br />\n"); 342 } 343 if (job.elapsedReport != null) { 344 sb.append("elapsedReport.elapsed: "); 345 sb.append(job.elapsedReport.elapsedPretty); 346 sb.append(" ("); 347 sb.append(job.elapsedReport.elapsedMilliseconds); 348 sb.append("ms)"); 349 sb.append("<br />\n"); 350 } 351 if (job.threadReport != null) { 352 sb.append("threadReport.toeCount: "); 353 sb.append(job.threadReport.toeCount); 354 sb.append("<br />\n"); 355 if (job.threadReport.steps != null) { 356 for (int i =0; i<job.threadReport.steps.size(); ++i) { 357 sb.append("threadReport.steps["); 358 sb.append(i); 359 sb.append("]: "); 360 sb.append(job.threadReport.steps.get(i)); 361 sb.append("<br />\n"); 362 } 363 } 364 if (job.threadReport.processors != null) { 365 for (int i =0; i<job.threadReport.processors.size(); ++i) { 366 sb.append("threadReport.processors["); 367 sb.append(i); 368 sb.append("]: "); 369 sb.append(job.threadReport.processors.get(i)); 370 sb.append("<br />\n"); 371 } 372 } 373 } 374 if (job.frontierReport != null) { 375 sb.append("frontierReport.totalQueues: "); 376 sb.append(job.frontierReport.totalQueues); 377 sb.append("<br />\n"); 378 sb.append("frontierReport.inProcessQueues: "); 379 sb.append(job.frontierReport.inProcessQueues); 380 sb.append("<br />\n"); 381 sb.append("frontierReport.readyQueues: "); 382 sb.append(job.frontierReport.readyQueues); 383 sb.append("<br />\n"); 384 sb.append("frontierReport.snoozedQueues: "); 385 sb.append(job.frontierReport.snoozedQueues); 386 sb.append("<br />\n"); 387 sb.append("frontierReport.activeQueues: "); 388 sb.append(job.frontierReport.activeQueues); 389 sb.append("<br />\n"); 390 sb.append("frontierReport.inactiveQueues: "); 391 sb.append(job.frontierReport.inactiveQueues); 392 sb.append("<br />\n"); 393 sb.append("frontierReport.ineligibleQueues: "); 394 sb.append(job.frontierReport.ineligibleQueues); 395 sb.append("<br />\n"); 396 sb.append("frontierReport.retiredQueues: "); 397 sb.append(job.frontierReport.retiredQueues); 398 sb.append("<br />\n"); 399 sb.append("frontierReport.exhaustedQueues: "); 400 sb.append(job.frontierReport.exhaustedQueues); 401 sb.append("<br />\n"); 402 sb.append("frontierReport.lastReachedState: "); 403 sb.append(job.frontierReport.lastReachedState); 404 sb.append("<br />\n"); 405 } 406 if (job.crawlLogTail != null) { 407 for (int i =0; i<job.crawlLogTail.size(); ++i) { 408 sb.append("crawlLogTail["); 409 sb.append(i); 410 sb.append("]: "); 411 sb.append(job.crawlLogTail.get(i)); 412 sb.append("<br />\n"); 413 } 414 } 415 sb.append("isRunning: "); 416 sb.append(job.isRunning); 417 sb.append("<br />\n"); 418 sb.append("isLaunchable: "); 419 sb.append(job.isLaunchable); 420 sb.append("<br />\n"); 421 sb.append("alertCount: "); 422 sb.append(job.alertCount); 423 sb.append("<br />\n"); 424 sb.append("alertLogFilePath: "); 425 sb.append(job.alertLogFilePath); 426 sb.append("<br />\n"); 427 sb.append("crawlLogFilePath: "); 428 sb.append(job.crawlLogFilePath); 429 sb.append("<br />\n"); 430 if (job.heapReport != null) { 431 sb.append("heapReport.usedBytes: "); 432 sb.append(job.heapReport.usedBytes); 433 sb.append("<br />\n"); 434 sb.append("heapReport.totalBytes: "); 435 sb.append(job.heapReport.totalBytes); 436 sb.append("<br />\n"); 437 sb.append("heapReport.maxBytes: "); 438 sb.append(job.heapReport.maxBytes); 439 sb.append("<br />\n"); 440 } 441 } 442 } else { 443 sb.append("Job "); 444 sb.append(numerics.get(0)); 445 sb.append(" is not running."); 446 } 447 448 if (masterTplBuilder.titlePlace != null) { 449 masterTplBuilder.titlePlace.setText("Running job"); 450 } 451 452 if (masterTplBuilder.menuPlace != null) { 453 masterTplBuilder.menuPlace.setText(menuSb.toString()); 454 } 455 456 if (masterTplBuilder.headingPlace != null) { 457 masterTplBuilder.headingPlace.setText("Running job"); 458 } 459 460 if (masterTplBuilder.contentPlace != null) { 461 masterTplBuilder.contentPlace.setText(sb.toString()); 462 } 463 464 if (masterTplBuilder.versionPlace != null) { 465 masterTplBuilder.versionPlace.setText(Constants.getVersionString()); 466 } 467 468 if (masterTplBuilder.environmentPlace != null) { 469 masterTplBuilder.environmentPlace.setText(Settings.get(CommonSettings.ENVIRONMENT_NAME)); 470 } 471 472 masterTplBuilder.write(out); 473 474 out.flush(); 475 out.close(); 476 } 477 478 public void crawllog_list(HttpServletRequest req, HttpServletResponse resp, List<Integer> numerics) throws IOException { 479 resp.setContentType("text/html; charset=UTF-8"); 480 ServletOutputStream out = resp.getOutputStream(); 481 482 TemplateBuilderFactory<MasterTemplateBuilder> tplBuilder = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class); 483 MasterTemplateBuilder masterTplBuilder = tplBuilder.getTemplateBuilder(); 484 485 long lines; 486 long linesPerPage = 100; 487 long page = 1; 488 long pages = 0; 489 String q = null; 490 491 String tmpStr; 492 tmpStr = req.getParameter("page"); 493 if (tmpStr != null && tmpStr.length() > 0) { 494 try { 495 page = Long.parseLong(tmpStr); 496 } catch (NumberFormatException e) { 497 } 498 } 499 tmpStr = req.getParameter("itemsperpage"); 500 if (tmpStr != null && tmpStr.length() > 0) { 501 try { 502 linesPerPage = Long.parseLong(tmpStr); 503 } catch (NumberFormatException e) { 504 } 505 } 506 if (linesPerPage < 25) { 507 linesPerPage = 25; 508 } 509 if (linesPerPage > 1000) { 510 linesPerPage = 1000; 511 } 512 513 tmpStr = req.getParameter("q"); 514 if (tmpStr != null && tmpStr.length() > 0 && !tmpStr.equalsIgnoreCase(".*")) { 515 q = tmpStr; 516 } 517 518 StringBuilder sb = new StringBuilder(); 519 StringBuilder menuSb = new StringBuilder(); 520 521 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(numerics.get(0)); 522 Pageable pageable = h3Job; 523 524 if (h3Job != null && h3Job.isReady()) { 525 menuSb.append("<tr><td> <a href=\""); 526 menuSb.append(NASEnvironment.servicePath); 527 menuSb.append("job/"); 528 menuSb.append(h3Job.jobId); 529 menuSb.append("/"); 530 menuSb.append("\"> "); 531 menuSb.append(h3Job.jobId); 532 menuSb.append("</a></td></tr>"); 533 534 String actionStr = req.getParameter("action"); 535 if ("update".equalsIgnoreCase(actionStr)) { 536 byte[] tmpBuf = new byte[1024 * 1024]; 537 h3Job.updateCrawlLog(tmpBuf); 538 } 539 540 SearchResult searchResult = null; 541 if (q != null) { 542 searchResult = h3Job.getSearchResult(q); 543 searchResult.update(); 544 pageable = searchResult; 545 } 546 547 lines = pageable.getIndexSize(); 548 if (lines > 0) { 549 lines = (lines / 8) - 1; 550 pages = Pagination.getPages(lines, linesPerPage); 551 } else { 552 lines = 0; 553 } 554 if (page > pages) { 555 page = pages; 556 } 557 sb.append("Cached lines: "); 558 sb.append(lines); 559 sb.append("<br />\n"); 560 sb.append("Cached size: "); 561 sb.append(pageable.getLastIndexed()); 562 sb.append("<br />\n"); 563 564 sb.append("<a href=\""); 565 sb.append("?action=update"); 566 sb.append("\" class=\"btn btn-default\">"); 567 sb.append("Update cache"); 568 sb.append("</a>"); 569 sb.append("the cache manually "); 570 sb.append("<br />\n"); 571 572 if (q == null) { 573 q = ".*"; 574 } 575 sb.append("<form class=\"form-horizontal\" action=\"?\" name=\"insert_form\" method=\"post\" enctype=\"application/x-www-form-urlencoded\" accept-charset=\"utf-8\">"); 576 sb.append("<input type=\"text\" id=\"q\" name=\"q\" value=\"" + q + "\" placeholder=\"content-type\">\n"); 577 sb.append("<button type=\"submit\" name=\"search\" value=\"1\" class=\"btn btn-success\"><i class=\"icon-white icon-thumbs-up\"></i> Search</button>\n"); 578 579 sb.append("<br />\n"); 580 sb.append("<br />\n"); 581 sb.append(Pagination.getPagination(page, linesPerPage, pages, false)); 582 sb.append("<div>\n"); 583 sb.append("<pre>\n"); 584 if (lines > 0) { 585 byte[] pageBytes = pageable.readPage(page, linesPerPage, true); 586 sb.append(new String(pageBytes, "UTF-8")); 587 } 588 sb.append("</pre>\n"); 589 sb.append("</div>\n"); 590 sb.append(Pagination.getPagination(page, linesPerPage, pages, false)); 591 sb.append("</form>"); 592 } else { 593 sb.append("Job "); 594 sb.append(numerics.get(0)); 595 sb.append(" is not running."); 596 } 597 598 if (masterTplBuilder.titlePlace != null) { 599 masterTplBuilder.titlePlace.setText("Crawllog"); 600 } 601 602 if (masterTplBuilder.menuPlace != null) { 603 masterTplBuilder.menuPlace.setText(menuSb.toString()); 604 } 605 606 if (masterTplBuilder.headingPlace != null) { 607 masterTplBuilder.headingPlace.setText("Crawllog"); 608 } 609 610 if (masterTplBuilder.contentPlace != null) { 611 masterTplBuilder.contentPlace.setText(sb.toString()); 612 } 613 614 if (masterTplBuilder.versionPlace != null) { 615 masterTplBuilder.versionPlace.setText(Constants.getVersionString()); 616 } 617 618 if (masterTplBuilder.environmentPlace != null) { 619 masterTplBuilder.environmentPlace.setText(Settings.get(CommonSettings.ENVIRONMENT_NAME)); 620 } 621 622 masterTplBuilder.write(out); 623 624 out.flush(); 625 out.close(); 626 } 627 628 public void frontier_list(HttpServletRequest req, HttpServletResponse resp, List<Integer> numerics) throws IOException { 629 resp.setContentType("text/html; charset=UTF-8"); 630 ServletOutputStream out = resp.getOutputStream(); 631 632 TemplateBuilderFactory<MasterTemplateBuilder> tplBuilder = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class); 633 MasterTemplateBuilder masterTplBuilder = tplBuilder.getTemplateBuilder(); 634 635 StringBuilder sb = new StringBuilder(); 636 StringBuilder menuSb = new StringBuilder(); 637 638 String regex = req.getParameter("regex"); 639 if (regex == null || regex.length() == 0) { 640 regex =".*"; 641 } 642 long limit = 1000; 643 String limitStr = req.getParameter("limit"); 644 if (limitStr != null && limitStr.length() > 0) { 645 try { 646 limit = Long.parseLong(limitStr); 647 } catch (NumberFormatException e) { 648 } 649 } 650 String initials = req.getParameter("initials"); 651 if (initials == null) { 652 initials = ""; 653 } 654 655 String resource = NAS_GROOVY_RESOURCE_PATH; 656 InputStream in = JobResource.class.getClassLoader().getResourceAsStream(resource); 657 ByteArrayOutputStream bOut = new ByteArrayOutputStream(); 658 byte[] tmpArr = new byte[8192]; 659 int read; 660 while ((read = in.read(tmpArr)) != -1) { 661 bOut.write(tmpArr, 0, read); 662 } 663 in.close(); 664 String script = new String(bOut.toByteArray(), "UTF-8"); 665 666 /* 667 //RandomAccessFile raf = new RandomAccessFile("/home/nicl/workspace-nas-h3/heritrix3-scripts/src/main/java/view-frontier-url.groovy", "r"); 668 RandomAccessFile raf = new RandomAccessFile("/home/nicl/workspace-nas-h3/heritrix3-scripts/src/main/java/nas.groovy", "r"); 669 byte[] src = new byte[(int)raf.length()]; 670 raf.readFully(src); 671 raf.close(); 672 String script = new String(src, "UTF-8"); 673 */ 674 675 String deleteStr = req.getParameter("delete"); 676 if (deleteStr != null && "1".equals(deleteStr) && initials != null && initials.length() > 0) { 677 script += "\n"; 678 script += "\ninitials = \"" + initials + "\""; 679 script += "\ndeleteFromFrontier '" + regex + "'\n"; 680 } else { 681 script += "\n"; 682 script += "\nlistFrontier '" + regex + "', " + limit + "\n"; 683 } 684 685 // To use, just remove the initial "//" from any one of these lines. 686 // 687 //killToeThread 1 //Kill a toe thread by number 688 //listFrontier '.*stats.*' //List uris in the frontier matching a given regexp 689 //deleteFromFrontier '.*foobar.*' //Remove uris matching a given regexp from the frontier 690 //printCrawlLog '.*' //View already crawled lines uris matching a given regexp 691 692 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(numerics.get(0)); 693 694 if (h3Job != null && h3Job.isReady()) { 695 menuSb.append("<tr><td> <a href=\""); 696 menuSb.append(NASEnvironment.servicePath); 697 menuSb.append("job/"); 698 menuSb.append(h3Job.jobId); 699 menuSb.append("/"); 700 menuSb.append("\"> "); 701 menuSb.append(h3Job.jobId); 702 menuSb.append("</a></td></tr>"); 703 704 if (deleteStr != null && "1".equals(deleteStr) && (initials == null || initials.length() == 0)) { 705 //sb.append("<span style=\"text-color: red;\">Initials required to delete from the frontier queue!</span><br />\n"); 706 sb.append("<div class=\"notify notify-red\"><span class=\"symbol icon-error\"></span> Initials required to delete from the frontier queue!</div>"); 707 } 708 709 sb.append("<form class=\"form-horizontal\" action=\"?\" name=\"insert_form\" method=\"post\" enctype=\"application/x-www-form-urlencoded\" accept-charset=\"utf-8\">\n"); 710 sb.append("<label for=\"limit\">Limit:</label>"); 711 sb.append("<input type=\"text\" id=\"limit\" name=\"limit\" value=\"" + limit + "\" placeholder=\"return limit\">\n"); 712 sb.append("<label for=\"regex\">Filter regex:</label>"); 713 sb.append("<input type=\"text\" id=\"regex\" name=\"regex\" value=\"" + regex + "\" placeholder=\"regex\">\n"); 714 sb.append("<button type=\"submit\" name=\"show\" value=\"1\" class=\"btn btn-success\"><i class=\"icon-white icon-thumbs-up\"></i> Show</button>\n"); 715 sb.append(" "); 716 sb.append("<label for=\"initials\">Deleter initials:</label>"); 717 sb.append("<input type=\"text\" id=\"initials\" name=\"initials\" value=\"" + initials + "\" placeholder=\"initials\">\n"); 718 sb.append("<button type=\"submit\" name=\"delete\" value=\"1\" class=\"btn btn-success\"><i class=\"icon-white icon-thumbs-up\"></i> Delete</button>\n"); 719 sb.append("</form>\n"); 720 721 ScriptResult scriptResult = h3Job.h3wrapper.ExecuteShellScriptInJob(h3Job.jobResult.job.shortName, "groovy", script); 722 //System.out.println(new String(scriptResult.response, "UTF-8")); 723 if (scriptResult != null && scriptResult.script != null) { 724 if (scriptResult.script.htmlOutput != null) { 725 sb.append("<fieldset><legend>htmlOut</legend>"); 726 sb.append(scriptResult.script.htmlOutput); 727 sb.append("</fieldset><br />\n"); 728 } 729 if (scriptResult.script.rawOutput != null) { 730 sb.append("<fieldset><legend>rawOut</legend>"); 731 sb.append("<pre>"); 732 sb.append(scriptResult.script.rawOutput); 733 sb.append("</pre>"); 734 sb.append("</fieldset><br />\n"); 735 } 736 } 737 } else { 738 sb.append("Job "); 739 sb.append(numerics.get(0)); 740 sb.append(" is not running."); 741 } 742 743 if (masterTplBuilder.titlePlace != null) { 744 masterTplBuilder.titlePlace.setText("Frontier queue"); 745 } 746 747 if (masterTplBuilder.menuPlace != null) { 748 masterTplBuilder.menuPlace.setText(menuSb.toString()); 749 } 750 751 if (masterTplBuilder.headingPlace != null) { 752 masterTplBuilder.headingPlace.setText("Frontier queue"); 753 } 754 755 if (masterTplBuilder.contentPlace != null) { 756 masterTplBuilder.contentPlace.setText(sb.toString()); 757 } 758 759 if (masterTplBuilder.versionPlace != null) { 760 masterTplBuilder.versionPlace.setText(Constants.getVersionString()); 761 } 762 763 if (masterTplBuilder.environmentPlace != null) { 764 masterTplBuilder.environmentPlace.setText(Settings.get(CommonSettings.ENVIRONMENT_NAME)); 765 } 766 767 masterTplBuilder.write(out); 768 769 out.flush(); 770 out.close(); 771 } 772 773 public static class ScriptTemplateBuilder extends MasterTemplateBuilder { 774 775 @TemplateBuilderPlaceHolder("script") 776 public TemplatePlaceHolder scriptPlace; 777 778 } 779 780 public void script(HttpServletRequest req, HttpServletResponse resp, List<Integer> numerics) throws IOException { 781 resp.setContentType("text/html; charset=UTF-8"); 782 ServletOutputStream out = resp.getOutputStream(); 783 784 TemplateBuilderFactory<ScriptTemplateBuilder> tplBuilder = TemplateBuilderFactory.getInstance(environment.templateMaster, "h3script.tpl", "UTF-8", ScriptTemplateBuilder.class); 785 ScriptTemplateBuilder masterTplBuilder = tplBuilder.getTemplateBuilder(); 786 787 String engineStr = req.getParameter("engine"); 788 String scriptStr = req.getParameter("script"); 789 if (scriptStr == null) { 790 scriptStr = ""; 791 } 792 793 StringBuilder sb = new StringBuilder(); 794 StringBuilder menuSb = new StringBuilder(); 795 796 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(numerics.get(0)); 797 798 if (h3Job != null && h3Job.isReady()) { 799 menuSb.append("<tr><td> <a href=\""); 800 menuSb.append(NASEnvironment.servicePath); 801 menuSb.append("job/"); 802 menuSb.append(h3Job.jobId); 803 menuSb.append("/"); 804 menuSb.append("\"> "); 805 menuSb.append(h3Job.jobId); 806 menuSb.append("</a></td></tr>"); 807 808 if (engineStr != null && engineStr.length() > 0 && scriptStr != null && scriptStr.length() > 0) { 809 ScriptResult scriptResult = h3Job.h3wrapper.ExecuteShellScriptInJob(h3Job.jobResult.job.shortName, engineStr, scriptStr); 810 //System.out.println(new String(scriptResult.response, "UTF-8")); 811 if (scriptResult != null && scriptResult.script != null) { 812 if (scriptResult.script.htmlOutput != null) { 813 sb.append(scriptResult.script.htmlOutput); 814 } 815 if (scriptResult.script.rawOutput != null) { 816 sb.append("<pre>"); 817 sb.append(scriptResult.script.rawOutput); 818 sb.append("</pre>"); 819 } 820 sb.append("<pre>"); 821 sb.append(new String(scriptResult.response, "UTF-8")); 822 sb.append("</pre>"); 823 } 824 } 825 } 826 827 if (masterTplBuilder.titlePlace != null) { 828 masterTplBuilder.titlePlace.setText("Scripting console"); 829 } 830 831 if (masterTplBuilder.menuPlace != null) { 832 masterTplBuilder.menuPlace.setText(menuSb.toString()); 833 } 834 835 if (masterTplBuilder.headingPlace != null) { 836 masterTplBuilder.headingPlace.setText("Scripting console"); 837 } 838 839 if (masterTplBuilder.scriptPlace != null) { 840 masterTplBuilder.scriptPlace.setText(scriptStr); 841 } 842 843 if (masterTplBuilder.contentPlace != null) { 844 masterTplBuilder.contentPlace.setText(sb.toString()); 845 } 846 847 if (masterTplBuilder.versionPlace != null) { 848 masterTplBuilder.versionPlace.setText(Constants.getVersionString()); 849 } 850 851 if (masterTplBuilder.environmentPlace != null) { 852 masterTplBuilder.environmentPlace.setText(Settings.get(CommonSettings.ENVIRONMENT_NAME)); 853 } 854 855 masterTplBuilder.write(out); 856 857 out.flush(); 858 out.close(); 859 } 860 861 public void report(HttpServletRequest req, HttpServletResponse resp, List<Integer> numerics) throws IOException { 862 resp.setContentType("text/html; charset=UTF-8"); 863 ServletOutputStream out = resp.getOutputStream(); 864 865 TemplateBuilderFactory<MasterTemplateBuilder> tplBuilder = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class); 866 MasterTemplateBuilder masterTplBuilder = tplBuilder.getTemplateBuilder(); 867 868 StringBuilder sb = new StringBuilder(); 869 StringBuilder menuSb = new StringBuilder(); 870 871 String reportStr = req.getParameter("report"); 872 873 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(numerics.get(0)); 874 Job job; 875 876 if (h3Job != null && h3Job.isReady()) { 877 menuSb.append("<tr><td> <a href=\""); 878 menuSb.append(NASEnvironment.servicePath); 879 menuSb.append("job/"); 880 menuSb.append(h3Job.jobId); 881 menuSb.append("/"); 882 menuSb.append("\"> "); 883 menuSb.append(h3Job.jobId); 884 menuSb.append("</a></td></tr>"); 885 886 if (h3Job.jobResult != null && h3Job.jobResult.job != null) { 887 job = h3Job.jobResult.job; 888 Report report; 889 for (int i=0; i<job.reports.size(); ++i) { 890 report = job.reports.get(i); 891 if (i > 0) { 892 sb.append(" "); 893 } 894 sb.append("<a href=\""); 895 sb.append(NASEnvironment.servicePath); 896 sb.append("job/"); 897 sb.append(h3Job.jobId); 898 sb.append("/report/?report="); 899 sb.append(report.className); 900 sb.append("\" class=\"btn btn-default\">"); 901 sb.append(report.shortName); 902 sb.append("</a>"); 903 } 904 if (reportStr != null && reportStr.length() > 0) { 905 sb.append("<br />\n"); 906 sb.append("<h5>"); 907 sb.append(reportStr); 908 sb.append("</h5>"); 909 sb.append("<pre>"); 910 StreamResult anypathResult = h3Job.h3wrapper.path("job/" + h3Job.jobname + "/report/" + reportStr, null, null); 911 byte[] tmpBuf = new byte[8192]; 912 int read; 913 try { 914 while ((read = anypathResult.in.read(tmpBuf)) != -1) { 915 sb.append(new String(tmpBuf, 0, read)); 916 } 917 anypathResult.close(); 918 } catch (IOException e) { 919 e.printStackTrace(); 920 } 921 sb.append("</pre>"); 922 } 923 } 924 } 925 926 if (masterTplBuilder.titlePlace != null) { 927 masterTplBuilder.titlePlace.setText("Scripting console"); 928 } 929 930 if (masterTplBuilder.menuPlace != null) { 931 masterTplBuilder.menuPlace.setText(menuSb.toString()); 932 } 933 934 if (masterTplBuilder.headingPlace != null) { 935 masterTplBuilder.headingPlace.setText("Scripting console"); 936 } 937 938 if (masterTplBuilder.contentPlace != null) { 939 masterTplBuilder.contentPlace.setText(sb.toString()); 940 } 941 942 if (masterTplBuilder.versionPlace != null) { 943 masterTplBuilder.versionPlace.setText(Constants.getVersionString()); 944 } 945 946 if (masterTplBuilder.environmentPlace != null) { 947 masterTplBuilder.environmentPlace.setText(Settings.get(CommonSettings.ENVIRONMENT_NAME)); 948 } 949 950 masterTplBuilder.write(out); 951 952 out.flush(); 953 out.close(); 954 } 955 956}