001package dk.netarkivet.harvester.webinterface.servlet;
002
003import java.io.File;
004import java.io.IOException;
005import java.io.RandomAccessFile;
006import java.util.HashMap;
007import java.util.Iterator;
008import java.util.List;
009import java.util.Map;
010
011import org.apache.commons.io.IOUtils;
012import org.netarchivesuite.heritrix3wrapper.ByteRange;
013import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper;
014import org.netarchivesuite.heritrix3wrapper.JobResult;
015import org.netarchivesuite.heritrix3wrapper.StreamResult;
016
017import dk.netarkivet.harvester.datamodel.Job;
018import dk.netarkivet.harvester.harvesting.monitor.StartedJobInfo;
019
020public class Heritrix3JobMonitor implements Pageable {
021
022    protected NASEnvironment environment;
023
024    public boolean bActive = true;
025
026    public boolean bPull = false;
027
028    public boolean bInitialized;
029
030    public long jobId;
031
032    public Job job;
033
034    public Heritrix3Wrapper h3wrapper;
035
036    public String h3HostnamePort;
037
038    public String hostUrl;
039
040    public String jobname;
041
042    public JobResult jobResult;
043
044    public String crawlLogFilePath;
045
046    public File logFile;
047
048    public RandomAccessFile logRaf;
049
050    public File idxFile;
051
052    public RandomAccessFile idxRaf;
053
054    public long lastIndexed = 0;
055
056    protected Heritrix3JobMonitor() {
057    }
058
059    public static Heritrix3JobMonitor getInstance(Long jobId, NASEnvironment environment) throws IOException {
060        Heritrix3JobMonitor jobmonitor = new Heritrix3JobMonitor();
061        jobmonitor.environment = environment;
062        jobmonitor.jobId = jobId;
063        jobmonitor.logFile = new File(environment.tempPath, "crawllog-" + jobId + ".log");
064        jobmonitor.idxFile = new File(environment.tempPath, "crawllog-" + jobId + ".idx");
065        jobmonitor.init();
066        return jobmonitor;
067    }
068
069    public synchronized void init() throws IOException {
070        if (bActive && !bInitialized) {
071            if (job == null) {
072                job = Heritrix3JobMonitorThread.jobDAO.read(jobId);
073            }
074            if (h3wrapper == null) {
075                StartedJobInfo startedInfo = Heritrix3JobMonitorThread.runningJobsInfoDAO.getMostRecentByJobId(jobId);
076                if (startedInfo != null) {
077                    hostUrl = startedInfo.getHostUrl();
078                    if (hostUrl != null && hostUrl.length() > 0) {
079                        h3wrapper = Heritrix3WrapperManager.getHeritrix3Wrapper(hostUrl, environment.h3AdminName, environment.h3AdminPassword);
080                    }
081                }
082            }
083            if (jobname == null && h3wrapper != null) {
084                jobname = Heritrix3WrapperManager.getJobname(h3wrapper, jobId);
085            }
086            if ((jobResult == null || jobResult.job == null) && jobname != null) {
087                jobResult = h3wrapper.job(jobname);
088            }
089            if (jobResult != null && jobResult.job != null) {
090                crawlLogFilePath = jobResult.job.crawlLogFilePath;
091            }
092            if (crawlLogFilePath != null) {
093                logRaf = new RandomAccessFile(logFile, "rw");
094                idxRaf = new RandomAccessFile(idxFile, "rw");
095                idxRaf.writeLong(0);
096                bInitialized = true;
097            }
098        }
099    }
100
101    public synchronized void update() throws IOException {
102        if (job != null) {
103            Job tmpJob = job = Heritrix3JobMonitorThread.jobDAO.read(jobId);
104            if (tmpJob != null) {
105                job = tmpJob;
106            }
107        }
108        if (jobResult != null && jobResult.job != null && jobname != null) {
109            JobResult tmpJobResult = h3wrapper.job(jobname);
110            if (tmpJobResult != null) {
111                jobResult = tmpJobResult;
112            }
113        }
114    }
115
116    public synchronized void updateCrawlLog(byte[] tmpBuf) throws IOException {
117        long pos;
118        long to;
119        int idx;
120        boolean bLoop;
121        ByteRange byteRange;
122        if (bActive && !bInitialized) {
123            init();
124        }
125        if (bActive && bInitialized) {
126            bLoop = true;
127            while (bLoop) {
128                idxRaf.seek(idxRaf.length());
129                pos = logRaf.length();
130                to = pos;
131                StreamResult anypathResult = h3wrapper.anypath(jobResult.job.crawlLogFilePath, pos, pos + tmpBuf.length - 1);
132                if (anypathResult != null && anypathResult.byteRange != null && anypathResult.in != null) {
133                    byteRange = anypathResult.byteRange;
134                    if (byteRange.contentLength > 0) {
135                        logRaf.seek(pos);
136                        int read;
137                        try {
138                            while ((read = anypathResult.in.read(tmpBuf)) != -1) {
139                                logRaf.write(tmpBuf, 0, read);
140                                to += read;
141                                idx = 0;
142                                while (read > 0) {
143                                    ++pos;
144                                    --read;
145                                    if (tmpBuf[idx++] == '\n') {
146                                        idxRaf.writeLong(pos);
147                                        lastIndexed = pos;
148                                    }
149                                }
150                            }
151                        }
152                        catch (IOException e) {
153                            e.printStackTrace();
154                        }
155                        IOUtils.closeQuietly(anypathResult);
156                        if (byteRange.contentLength == to) {
157                            bLoop = false;
158                        }
159                    } else {
160                        bLoop = false;
161                    }
162                } else {
163                    bLoop = false;
164                }
165            }
166        }
167    }
168
169    @Override
170    public synchronized long getIndexSize() {
171        return idxFile.length();
172    }
173
174    @Override
175    public long getLastIndexed() {
176        return lastIndexed;
177    }
178
179    @Override
180    public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException {
181        return StringIndexFile.readPage(idxRaf, logRaf, page, itemsPerPage, descending);
182    }
183
184    public synchronized boolean isReady() {
185        return (bActive && bInitialized);
186    }
187
188    protected Map<String, SearchResult> qSearchResultMap = new HashMap<String, SearchResult>();
189
190    protected int searchResultNr = 1;
191
192    public synchronized SearchResult getSearchResult(String q) throws IOException {
193        SearchResult searchResult = qSearchResultMap.get(q);
194        if (searchResult == null) {
195            searchResult = new SearchResult(environment, this, q, searchResultNr++);
196            qSearchResultMap.put(q, searchResult);
197        }
198        return searchResult;
199    }
200
201    public synchronized void cleanup(List<File> oldFilesList) {
202        bActive = false;
203        bInitialized = false;
204        hostUrl = null;
205        h3wrapper = null;
206        jobname = null;
207        jobResult = null;
208        crawlLogFilePath = null;
209        IOUtils.closeQuietly(logRaf);
210        IOUtils.closeQuietly(idxRaf);
211        oldFilesList.add(logFile);
212        oldFilesList.add(idxFile);
213        Iterator<SearchResult> srIter = qSearchResultMap.values().iterator();
214        SearchResult sr;
215        while (srIter.hasNext()) {
216            sr = srIter.next();
217            oldFilesList.add(sr.srIdxFile);
218            oldFilesList.add(sr.srLogFile);
219            sr.cleanup();
220        }
221        qSearchResultMap.clear();
222    }
223
224}