001/*
002 * #%L
003 * Netarchivesuite - heritrix 3 monitor
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.heritrix3.monitor;
025
026import java.io.File;
027import java.io.IOException;
028import java.io.RandomAccessFile;
029import java.nio.ByteBuffer;
030import java.nio.channels.FileChannel;
031import java.util.regex.Matcher;
032import java.util.regex.Pattern;
033
034import org.apache.commons.io.IOUtils;
035
036public class SearchResult implements Pageable {
037
038    protected Heritrix3JobMonitor h3Job;
039
040    protected Pattern p;
041    protected Matcher m;
042
043    protected File srLogFile;
044
045    protected RandomAccessFile srLogRaf;
046
047    protected File srIdxFile;
048
049    protected RandomAccessFile srIdxRaf;
050
051    protected long lastIndexed;
052
053    public SearchResult(NASEnvironment environment, Heritrix3JobMonitor h3Job, String q, int searchResultNr) throws IOException {
054        this.h3Job = h3Job;
055        p = Pattern.compile(q, Pattern.CASE_INSENSITIVE);
056        // Create a reusable pattern matcher object for use with the reset method.
057        m = p.matcher("42");
058        srLogFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".log");
059        srLogRaf = new RandomAccessFile(srLogFile, "rw");
060        srLogRaf.setLength(0);
061        srIdxFile = new File(environment.tempPath, "crawllog-" + h3Job.jobId + "-" + searchResultNr + ".idx");
062        srIdxRaf = new RandomAccessFile(srIdxFile, "rw");
063        srIdxRaf.setLength(0);
064        srIdxRaf.writeLong(0);
065        lastIndexed = 0;
066    }
067
068    public synchronized void update() throws IOException {
069        RandomAccessFile logRaf = new RandomAccessFile(h3Job.logFile, "r");
070        logRaf.seek(lastIndexed);
071        srLogRaf.seek(srLogRaf.length());
072        srIdxRaf.seek(srIdxRaf.length());
073        FileChannel logChannel = logRaf.getChannel();
074        byte[] bytes = new byte[1024*1024];
075        ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
076        String tmpStr;
077        //long index = lastIndex;
078        long index = srLogRaf.length();
079        int pos;
080        int to;
081        int mark;
082        int limit;
083        boolean b;
084        while (logChannel.read(byteBuffer) != -1) {
085            byteBuffer.flip();
086            pos = byteBuffer.position();
087            mark = pos;
088            limit = byteBuffer.limit();
089            b = true;
090            while (b) {
091                if (pos < limit) {
092                    if (bytes[pos++] == '\n') {
093                        to = pos - 1;
094                        if (bytes[to - 1] == '\r') {
095                            --to;
096                        }
097                        tmpStr = new String(bytes, mark, to - mark, "UTF-8");
098                        m.reset(tmpStr);
099                        if (m.matches()) {
100                            srLogRaf.write(bytes, mark, pos - mark);
101                            index += pos - mark;
102                            srIdxRaf.writeLong(index);
103                        }
104                        lastIndexed += pos - mark;
105                        // next
106                        mark = pos;
107                        //index += pos - mark;
108                        //lastIndex = index;
109                    }
110                } else {
111                    b = false;
112                }
113            }
114            byteBuffer.position(mark);
115            byteBuffer.compact();
116        }
117        logRaf.close();
118    }
119
120    @Override
121    public long getIndexSize() {
122        return srIdxFile.length();
123    }
124
125    @Override
126    public long getLastIndexed() {
127        return srLogFile.length();
128    }
129
130    @Override
131    public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException {
132        return StringIndexFile.readPage(srIdxRaf, srLogRaf, page, itemsPerPage, descending);
133    }
134
135    public synchronized void cleanup() {
136        IOUtils.closeQuietly(srIdxRaf);
137    }
138
139}