001/* 002 * #%L 003 * Netarchivesuite - heritrix 3 monitor 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.heritrix3.monitor; 025 026import java.io.File; 027import java.io.IOException; 028import java.io.RandomAccessFile; 029import java.nio.ByteBuffer; 030import java.nio.channels.FileChannel; 031import java.util.regex.Matcher; 032import java.util.regex.Pattern; 033 034import org.apache.commons.io.IOUtils; 035 036public class SearchResult implements Pageable { 037 038 protected Heritrix3JobMonitor h3Job; 039 040 protected Pattern p; 041 protected Matcher m; 042 043 protected File srLogFile; 044 045 protected RandomAccessFile srLogRaf; 046 047 protected File srIdxFile; 048 049 protected RandomAccessFile srIdxRaf; 050 051 protected long lastIndexed; 052 053 public SearchResult(NASEnvironment environment, Heritrix3JobMonitor h3Job, String q, int searchResultNr) throws IOException { 054 this.h3Job = h3Job; 055 p = Pattern.compile(q, Pattern.CASE_INSENSITIVE); 056 // Create a reusable pattern matcher object for use with the reset method. 057 m = p.matcher("42"); 058 srLogFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".log"); 059 srLogRaf = new RandomAccessFile(srLogFile, "rw"); 060 srLogRaf.setLength(0); 061 srIdxFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".idx"); 062 srIdxRaf = new RandomAccessFile(srIdxFile, "rw"); 063 srIdxRaf.setLength(0); 064 srIdxRaf.writeLong(0); 065 lastIndexed = 0; 066 } 067 068 public synchronized void update() throws IOException { 069 RandomAccessFile logRaf = new RandomAccessFile(h3Job.logFile, "r"); 070 logRaf.seek(lastIndexed); 071 srLogRaf.seek(srLogRaf.length()); 072 srIdxRaf.seek(srIdxRaf.length()); 073 FileChannel logChannel = logRaf.getChannel(); 074 byte[] bytes = new byte[1024*1024]; 075 ByteBuffer byteBuffer = ByteBuffer.wrap(bytes); 076 String tmpStr; 077 //long index = lastIndex; 078 long index = srLogRaf.length(); 079 int pos; 080 int to; 081 int mark; 082 int limit; 083 boolean b; 084 while (logChannel.read(byteBuffer) != -1) { 085 byteBuffer.flip(); 086 pos = byteBuffer.position(); 087 mark = pos; 088 limit = byteBuffer.limit(); 089 b = true; 090 while (b) { 091 if (pos < limit) { 092 if (bytes[pos++] == '\n') { 093 to = pos - 1; 094 if (bytes[to - 1] == '\r') { 095 --to; 096 } 097 tmpStr = new String(bytes, mark, to - mark, "UTF-8"); 098 m.reset(tmpStr); 099 if (m.matches()) { 100 srLogRaf.write(bytes, mark, pos - mark); 101 index += pos - mark; 102 srIdxRaf.writeLong(index); 103 } 104 lastIndexed += pos - mark; 105 // next 106 mark = pos; 107 //index += pos - mark; 108 //lastIndex = index; 109 } 110 } else { 111 b = false; 112 } 113 } 114 byteBuffer.position(mark); 115 byteBuffer.compact(); 116 } 117 logRaf.close(); 118 } 119 120 @Override 121 public long getIndexSize() { 122 return srIdxFile.length(); 123 } 124 125 @Override 126 public long getLastIndexed() { 127 return srLogFile.length(); 128 } 129 130 @Override 131 public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException { 132 return StringIndexFile.readPage(srIdxRaf, srLogRaf, page, itemsPerPage, descending); 133 } 134 135 public synchronized void cleanup() { 136 IOUtils.closeQuietly(srIdxRaf); 137 } 138 139}