001/*
002 * #%L
003 * Netarchivesuite - heritrix 3 monitor
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.heritrix3.monitor.resources;
025
026import java.io.IOException;
027import java.net.URLEncoder;
028import java.util.List;
029import java.util.Locale;
030
031import javax.servlet.ServletContext;
032import javax.servlet.ServletOutputStream;
033import javax.servlet.http.HttpServletRequest;
034import javax.servlet.http.HttpServletResponse;
035
036import com.antiaction.common.filter.Caching;
037import com.antiaction.common.templateengine.TemplateBuilderFactory;
038
039import dk.netarkivet.heritrix3.monitor.Heritrix3JobMonitor;
040import dk.netarkivet.heritrix3.monitor.NASEnvironment;
041import dk.netarkivet.heritrix3.monitor.NASUser;
042import dk.netarkivet.heritrix3.monitor.Pageable;
043import dk.netarkivet.heritrix3.monitor.Pagination;
044import dk.netarkivet.heritrix3.monitor.ResourceAbstract;
045import dk.netarkivet.heritrix3.monitor.ResourceManagerAbstract;
046import dk.netarkivet.heritrix3.monitor.SearchResult;
047import dk.netarkivet.heritrix3.monitor.HttpLocaleHandler.HttpLocale;
048
049public class H3CrawlLogCachedResource implements ResourceAbstract {
050
051    private NASEnvironment environment;
052
053    protected int R_CRAWLLOG = -1;
054
055    @Override
056    public void resources_init(NASEnvironment environment) {
057        this.environment = environment;
058    }
059
060    @Override
061    public void resources_add(ResourceManagerAbstract resourceManager) {
062        R_CRAWLLOG = resourceManager.resource_add(this, "/job/<numeric>/crawllog/", false);
063    }
064
065    @Override
066    public void resource_service(ServletContext servletContext, NASUser nas_user, HttpServletRequest req, HttpServletResponse resp, HttpLocale httpLocale, int resource_id, List<Integer> numerics, String pathInfo) throws IOException {
067        if (NASEnvironment.contextPath == null) {
068            NASEnvironment.contextPath = req.getContextPath();
069        }
070        if (NASEnvironment.servicePath == null) {
071            NASEnvironment.servicePath = req.getContextPath() + req.getServletPath() + "/";
072        }
073        String method = req.getMethod().toUpperCase();
074        if (resource_id == R_CRAWLLOG) {
075            if ("GET".equals(method) || "POST".equals(method)) {
076                crawllog_list(req, resp, httpLocale, numerics);
077            }
078        }
079    }
080
081    public void crawllog_list(HttpServletRequest req, HttpServletResponse resp, HttpLocale httpLocale, List<Integer> numerics) throws IOException {
082        Locale locale = httpLocale.locale;
083        resp.setContentType("text/html; charset=UTF-8");
084        ServletOutputStream out = resp.getOutputStream();
085        Caching.caching_disable_headers(resp);
086
087        TemplateBuilderFactory<MasterTemplateBuilder> masterTplBuilderFactory = TemplateBuilderFactory.getInstance(environment.templateMaster, "master.tpl", "UTF-8", MasterTemplateBuilder.class);
088        MasterTemplateBuilder masterTplBuilder = masterTplBuilderFactory.getTemplateBuilder();
089
090        long lines;
091        long linesPerPage = 100;
092        long page = 1;
093        long pages = 0;
094        String q = null;
095
096        String tmpStr;
097        tmpStr = req.getParameter("page");
098        if (tmpStr != null && tmpStr.length() > 0) {
099            try {
100                page = Long.parseLong(tmpStr);
101            } catch (NumberFormatException e) {
102            }
103        }
104        tmpStr = req.getParameter("itemsperpage");
105        if (tmpStr != null && tmpStr.length() > 0) {
106            try {
107                linesPerPage = Long.parseLong(tmpStr);
108            } catch (NumberFormatException e) {
109            }
110        }
111
112        if (linesPerPage < 25) {
113            linesPerPage = 25;
114        }
115
116        String additionalParams;
117
118        tmpStr = req.getParameter("q");
119        if (tmpStr != null && tmpStr.length() > 0 && !tmpStr.equalsIgnoreCase(".*")) {
120            q = tmpStr;
121            additionalParams = "&q=" + URLEncoder.encode(q, "UTF-8");
122        } else {
123                additionalParams = "";
124        }
125
126        StringBuilder sb = new StringBuilder();
127
128        long jobId = numerics.get(0);
129        Heritrix3JobMonitor h3Job = environment.h3JobMonitorThread.getRunningH3Job(jobId);
130        Pageable pageable = h3Job;
131
132        if (h3Job != null && h3Job.isReady()) {
133            String actionStr = req.getParameter("action");
134            
135            if ("update".equalsIgnoreCase(actionStr)) {
136                byte[] tmpBuf = new byte[1024 * 1024];
137                h3Job.updateCrawlLog(tmpBuf);
138            }
139            
140            long totalCachedLines = h3Job.getTotalCachedLines();
141            long totalCachedSize = h3Job.getLastIndexed();
142
143            SearchResult searchResult = null;
144            
145            if (q != null) {
146                searchResult = h3Job.getSearchResult(q);
147                searchResult.update();
148                pageable = searchResult;
149            } else  {
150                q = ".*";
151            }
152
153            lines = pageable.getIndexSize();
154
155            if (lines > 0) {
156                lines = (lines / 8) - 1;
157                pages = Pagination.getPages(lines, linesPerPage);
158            } else {
159                lines = 0;
160            }
161            if (page > pages) {
162                page = pages;
163            }
164
165            sb.append("<div style=\"margin-bottom:20px;\">\n");
166            sb.append("<div style=\"float:left;min-width:180px;\">\n");
167            sb.append("Total cached lines: ");
168            sb.append(totalCachedLines);
169            sb.append(" URIs<br />\n");
170            sb.append("Total cached size: ");
171            sb.append(totalCachedSize);
172            sb.append(" bytes\n");
173            sb.append("</div>\n");
174            
175            sb.append("<div style=\"float:left;\">\n");
176            sb.append("<a href=\"");
177            sb.append("?action=update");
178            sb.append("\" class=\"btn btn-default\">");
179            sb.append("Update cache");
180            sb.append("</a>");
181            //sb.append("the cache manually ");
182            sb.append("</div>\n");
183
184            sb.append("<div style=\"clear:both;\"></div>\n");
185            sb.append("</div>\n");
186
187            sb.append("<div style=\"margin-bottom:20px;\">\n");
188
189            sb.append("<form class=\"form-horizontal\" action=\"?\" name=\"insert_form\" method=\"post\" enctype=\"application/x-www-form-urlencoded\" accept-charset=\"utf-8\">");
190            sb.append("<label for=\"itemsperpage\">Lines per page:</label>");
191            sb.append("<input type=\"text\" id=\"itemsperpage\" name=\"itemsperpage\" value=\"" + linesPerPage + "\" placeholder=\"must be &gt; 25 and &lt; 1000 \">\n");
192            sb.append("<label for=\"q\">Filter regex:</label>");
193            sb.append("<input type=\"text\" id=\"q\" name=\"q\" value=\"" + q + "\" placeholder=\"content-type\" style=\"display:inline;width:350px;\">\n");
194            sb.append("<button type=\"submit\" name=\"search\" value=\"1\" class=\"btn btn-success\"><i class=\"icon-white icon-thumbs-up\"></i> Search</button>\n");
195
196            sb.append("</div>\n");
197            
198            sb.append("<div style=\"float:left;margin: 20px 0px;\">\n");
199            sb.append("<span>Matching lines: ");
200            sb.append(lines);
201            sb.append(" URIs</span>\n");
202            sb.append("</div>\n");
203            sb.append(Pagination.getPagination(page, linesPerPage, pages, false, additionalParams));
204            sb.append("<div style=\"clear:both;\"></div>");
205            sb.append("<div>\n");
206            sb.append("<pre>\n");
207            if (lines > 0) {
208                byte[] pageBytes = pageable.readPage(page, linesPerPage, true);
209                sb.append(new String(pageBytes, "UTF-8"));
210            }
211            sb.append("</pre>\n");
212            sb.append("</div>\n");
213            sb.append(Pagination.getPagination(page, linesPerPage, pages, false, additionalParams));
214            sb.append("</form>");
215        } else {
216            sb.append("Job ");
217            sb.append(jobId);
218            sb.append(" is not running.");
219        }
220
221        StringBuilder menuSb = masterTplBuilder.buildMenu(new StringBuilder(), req, locale, h3Job);
222
223        masterTplBuilder.insertContent("Job " + jobId + " Crawllog", menuSb.toString(), httpLocale.generateLanguageLinks(),
224                        "Job " + jobId + " Crawllog", sb.toString(), "").write(out);
225
226        out.flush();
227        out.close();
228    }
229
230}