001/* 002 * #%L 003 * Netarchivesuite - heritrix 3 monitor 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.heritrix3.monitor.resources; 025 026import java.io.IOException; 027import java.net.URLEncoder; 028import java.util.List; 029import java.util.Locale; 030 031import javax.servlet.ServletContext; 032import javax.servlet.ServletOutputStream; 033import javax.servlet.http.HttpServletRequest; 034import javax.servlet.http.HttpServletResponse; 035 036import com.antiaction.common.filter.Caching; 037import com.antiaction.common.templateengine.TemplateBuilderFactory; 038 039import dk.netarkivet.heritrix3.monitor.Heritrix3JobMonitor; 040import dk.netarkivet.heritrix3.monitor.NASEnvironment; 041import dk.netarkivet.heritrix3.monitor.NASUser; 042import dk.netarkivet.heritrix3.monitor.Pageable; 043import dk.netarkivet.heritrix3.monitor.Pagination; 044import dk.netarkivet.heritrix3.monitor.ResourceAbstract; 045import dk.netarkivet.heritrix3.monitor.ResourceManagerAbstract; 046import dk.netarkivet.heritrix3.monitor.SearchResult; 047import dk.netarkivet.heritrix3.monitor.HttpLocaleHandler.HttpLocale; 048 049public class H3CrawlLogCachedResource implements ResourceAbstract { 050 051 private NASEnvironment environment; 052 053 protected int R_CRAWLLOG = -1; 054 055 @Override 056 public void resources_init(NASEnvironment environment) { 057 this.environment = environment; 058 } 059 060 @Override 061 public void resources_add(ResourceManagerAbstract resourceManager) { 062 R_CRAWLLOG = resourceManager.resource_add(this, "/job/<numeric>/crawllog/", false); 063 } 064 065 @Override 066 public void resource_service(ServletContext servletContext, NASUser nas_user, HttpServletRequest req, HttpServletResponse resp, HttpLocale httpLocale, int resource_id, List<Integer> numerics, String pathInfo) throws IOException { 067 if (NASEnvironment.contextPath == null) { 068 NASEnvironment.contextPath = req.getContextPath(); 069 } 070 if (NASEnvironment.servicePath == null) { 071 NASEnvironment.servicePath = req.getContextPath() + req.getServletPath() + "/"; 072 } 073 String method = req.getMethod().toUpperCase(); 074 if (resource_id == R_CRAWLLOG) { 075 if ("GET".equals(method) || "POST".equals(method)) { 076 crawllog_list(req, resp, httpLocale, numerics); 077 } 078 } 079 } 080 081 public void crawllog_list(HttpServletRequest req, HttpServletResponse resp, HttpLocale httpLocale, List<Integer> numerics) throws IOException { 082 Locale locale = httpLocale.locale; 083 resp.setContentType("text/html; charset=UTF-8"); 084 ServletOutputStream out = resp.getOutputStream(); 085 Caching.caching_disable_headers(resp); 086 087 TemplateBuilderFactory<MasterTemplateBuilder> masterTplBuilderFactory = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class); 088 MasterTemplateBuilder masterTplBuilder = masterTplBuilderFactory.getTemplateBuilder(); 089 090 long lines; 091 long linesPerPage = 100; 092 long page = 1; 093 long pages = 0; 094 String q = null; 095 096 String tmpStr; 097 tmpStr = req.getParameter("page"); 098 if (tmpStr != null && tmpStr.length() > 0) { 099 try { 100 page = Long.parseLong(tmpStr); 101 } catch (NumberFormatException e) { 102 } 103 } 104 tmpStr = req.getParameter("itemsperpage"); 105 if (tmpStr != null && tmpStr.length() > 0) { 106 try { 107 linesPerPage = Long.parseLong(tmpStr); 108 } catch (NumberFormatException e) { 109 } 110 } 111 112 if (linesPerPage < 25) { 113 linesPerPage = 25; 114 } 115 116 String additionalParams; 117 118 tmpStr = req.getParameter("q"); 119 if (tmpStr != null && tmpStr.length() > 0 && !tmpStr.equalsIgnoreCase(".*")) { 120 q = tmpStr; 121 additionalParams = "&q=" + URLEncoder.encode(q, "UTF-8"); 122 } else { 123 additionalParams = ""; 124 } 125 126 StringBuilder sb = new StringBuilder(); 127 128 long jobId = numerics.get(0); 129 Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(jobId); 130 Pageable pageable = h3Job; 131 132 if (h3Job != null && h3Job.isReady()) { 133 String actionStr = req.getParameter("action"); 134 135 if ("update".equalsIgnoreCase(actionStr)) { 136 byte[] tmpBuf = new byte[1024 * 1024]; 137 h3Job.updateCrawlLog(tmpBuf); 138 } 139 140 long totalCachedLines = h3Job.getTotalCachedLines(); 141 long totalCachedSize = h3Job.getLastIndexed(); 142 143 SearchResult searchResult = null; 144 145 if (q != null) { 146 searchResult = h3Job.getSearchResult(q); 147 searchResult.update(); 148 pageable = searchResult; 149 } else { 150 q = ".*"; 151 } 152 153 lines = pageable.getIndexSize(); 154 155 if (lines > 0) { 156 lines = (lines / 8) - 1; 157 pages = Pagination.getPages(lines, linesPerPage); 158 } else { 159 lines = 0; 160 } 161 if (page > pages) { 162 page = pages; 163 } 164 165 sb.append("<div style=\"margin-bottom:20px;\">\n"); 166 sb.append("<div style=\"float:left;min-width:180px;\">\n"); 167 sb.append("Total cached lines: "); 168 sb.append(totalCachedLines); 169 sb.append(" URIs<br />\n"); 170 sb.append("Total cached size: "); 171 sb.append(totalCachedSize); 172 sb.append(" bytes\n"); 173 sb.append("</div>\n"); 174 175 sb.append("<div style=\"float:left;\">\n"); 176 sb.append("<a href=\""); 177 sb.append("?action=update"); 178 sb.append("\" class=\"btn btn-default\">"); 179 sb.append("Update cache"); 180 sb.append("</a>"); 181 //sb.append("the cache manually "); 182 sb.append("</div>\n"); 183 184 sb.append("<div style=\"clear:both;\"></div>\n"); 185 sb.append("</div>\n"); 186 187 sb.append("<div style=\"margin-bottom:20px;\">\n"); 188 189 sb.append("<form class=\"form-horizontal\" action=\"?\" name=\"insert_form\" method=\"post\" enctype=\"application/x-www-form-urlencoded\" accept-charset=\"utf-8\">"); 190 sb.append("<label for=\"itemsperpage\">Lines per page:</label>"); 191 sb.append("<input type=\"text\" id=\"itemsperpage\" name=\"itemsperpage\" value=\"" + linesPerPage + "\" placeholder=\"must be > 25 and < 1000 \">\n"); 192 sb.append("<label for=\"q\">Filter regex:</label>"); 193 sb.append("<input type=\"text\" id=\"q\" name=\"q\" value=\"" + q + "\" placeholder=\"content-type\" style=\"display:inline;width:350px;\">\n"); 194 sb.append("<button type=\"submit\" name=\"search\" value=\"1\" class=\"btn btn-success\"><i class=\"icon-white icon-thumbs-up\"></i> Search</button>\n"); 195 196 sb.append("</div>\n"); 197 198 sb.append("<div style=\"float:left;margin: 20px 0px;\">\n"); 199 sb.append("<span>Matching lines: "); 200 sb.append(lines); 201 sb.append(" URIs</span>\n"); 202 sb.append("</div>\n"); 203 sb.append(Pagination.getPagination(page, linesPerPage, pages, false, additionalParams)); 204 sb.append("<div style=\"clear:both;\"></div>"); 205 sb.append("<div>\n"); 206 sb.append("<pre>\n"); 207 if (lines > 0) { 208 byte[] pageBytes = pageable.readPage(page, linesPerPage, true); 209 sb.append(new String(pageBytes, "UTF-8")); 210 } 211 sb.append("</pre>\n"); 212 sb.append("</div>\n"); 213 sb.append(Pagination.getPagination(page, linesPerPage, pages, false, additionalParams)); 214 sb.append("</form>"); 215 } else { 216 sb.append("Job "); 217 sb.append(jobId); 218 sb.append(" is not running."); 219 } 220 221 StringBuilder menuSb = masterTplBuilder.buildMenu(new StringBuilder(), req, locale, h3Job); 222 223 masterTplBuilder.insertContent("Job " + jobId + " Crawllog", menuSb.toString(), httpLocale.generateLanguageLinks(), 224 "Job " + jobId + " Crawllog", sb.toString(), "").write(out); 225 226 out.flush(); 227 out.close(); 228 } 229 230}