001package dk.netarkivet.harvester.webinterface.servlet; 002 003import java.io.File; 004import java.io.IOException; 005import java.io.RandomAccessFile; 006import java.util.HashMap; 007import java.util.Iterator; 008import java.util.List; 009import java.util.Map; 010 011import org.apache.commons.io.IOUtils; 012import org.netarchivesuite.heritrix3wrapper.ByteRange; 013import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper; 014import org.netarchivesuite.heritrix3wrapper.JobResult; 015import org.netarchivesuite.heritrix3wrapper.StreamResult; 016 017import dk.netarkivet.harvester.datamodel.Job; 018import dk.netarkivet.harvester.harvesting.monitor.StartedJobInfo; 019 020public class Heritrix3JobMonitor implements Pageable { 021 022 protected NASEnvironment environment; 023 024 public boolean bActive = true; 025 026 public boolean bPull = false; 027 028 public boolean bInitialized; 029 030 public long jobId; 031 032 public Job job; 033 034 public Heritrix3Wrapper h3wrapper; 035 036 public String h3HostnamePort; 037 038 public String hostUrl; 039 040 public String jobname; 041 042 public JobResult jobResult; 043 044 public String crawlLogFilePath; 045 046 public File logFile; 047 048 public RandomAccessFile logRaf; 049 050 public File idxFile; 051 052 public RandomAccessFile idxRaf; 053 054 public long lastIndexed = 0; 055 056 protected Heritrix3JobMonitor() { 057 } 058 059 public static Heritrix3JobMonitor getInstance(Long jobId, NASEnvironment environment) throws IOException { 060 Heritrix3JobMonitor jobmonitor = new Heritrix3JobMonitor(); 061 jobmonitor.environment = environment; 062 jobmonitor.jobId = jobId; 063 jobmonitor.logFile = new File(environment.tempPath, "crawllog-" + jobId + ".log"); 064 jobmonitor.idxFile = new File(environment.tempPath, "crawllog-" + jobId + ".idx"); 065 jobmonitor.init(); 066 return jobmonitor; 067 } 068 069 public synchronized void init() throws IOException { 070 if (bActive && !bInitialized) { 071 if (job == null) { 072 job = Heritrix3JobMonitorThread.jobDAO.read(jobId); 073 } 074 if (h3wrapper == null) { 075 StartedJobInfo startedInfo = Heritrix3JobMonitorThread.runningJobsInfoDAO.getMostRecentByJobId(jobId); 076 if (startedInfo != null) { 077 hostUrl = startedInfo.getHostUrl(); 078 if (hostUrl != null && hostUrl.length() > 0) { 079 h3wrapper = Heritrix3WrapperManager.getHeritrix3Wrapper(hostUrl, environment.h3AdminName, environment.h3AdminPassword); 080 } 081 } 082 } 083 if (jobname == null && h3wrapper != null) { 084 jobname = Heritrix3WrapperManager.getJobname(h3wrapper, jobId); 085 } 086 if ((jobResult == null || jobResult.job == null) && jobname != null) { 087 jobResult = h3wrapper.job(jobname); 088 } 089 if (jobResult != null && jobResult.job != null) { 090 crawlLogFilePath = jobResult.job.crawlLogFilePath; 091 } 092 if (crawlLogFilePath != null) { 093 logRaf = new RandomAccessFile(logFile, "rw"); 094 idxRaf = new RandomAccessFile(idxFile, "rw"); 095 idxRaf.writeLong(0); 096 bInitialized = true; 097 } 098 } 099 } 100 101 public synchronized void update() throws IOException { 102 if (job != null) { 103 Job tmpJob = job = Heritrix3JobMonitorThread.jobDAO.read(jobId); 104 if (tmpJob != null) { 105 job = tmpJob; 106 } 107 } 108 if (jobResult != null && jobResult.job != null && jobname != null) { 109 JobResult tmpJobResult = h3wrapper.job(jobname); 110 if (tmpJobResult != null) { 111 jobResult = tmpJobResult; 112 } 113 } 114 } 115 116 public synchronized void updateCrawlLog(byte[] tmpBuf) throws IOException { 117 long pos; 118 long to; 119 int idx; 120 boolean bLoop; 121 ByteRange byteRange; 122 if (bActive && !bInitialized) { 123 init(); 124 } 125 if (bActive && bInitialized) { 126 bLoop = true; 127 while (bLoop) { 128 idxRaf.seek(idxRaf.length()); 129 pos = logRaf.length(); 130 to = pos; 131 StreamResult anypathResult = h3wrapper.anypath(jobResult.job.crawlLogFilePath, pos, pos + tmpBuf.length - 1); 132 if (anypathResult != null && anypathResult.byteRange != null && anypathResult.in != null) { 133 byteRange = anypathResult.byteRange; 134 if (byteRange.contentLength > 0) { 135 logRaf.seek(pos); 136 int read; 137 try { 138 while ((read = anypathResult.in.read(tmpBuf)) != -1) { 139 logRaf.write(tmpBuf, 0, read); 140 to += read; 141 idx = 0; 142 while (read > 0) { 143 ++pos; 144 --read; 145 if (tmpBuf[idx++] == '\n') { 146 idxRaf.writeLong(pos); 147 lastIndexed = pos; 148 } 149 } 150 } 151 } 152 catch (IOException e) { 153 e.printStackTrace(); 154 } 155 IOUtils.closeQuietly(anypathResult); 156 if (byteRange.contentLength == to) { 157 bLoop = false; 158 } 159 } else { 160 bLoop = false; 161 } 162 } else { 163 bLoop = false; 164 } 165 } 166 } 167 } 168 169 @Override 170 public synchronized long getIndexSize() { 171 return idxFile.length(); 172 } 173 174 @Override 175 public long getLastIndexed() { 176 return lastIndexed; 177 } 178 179 @Override 180 public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException { 181 return StringIndexFile.readPage(idxRaf, logRaf, page, itemsPerPage, descending); 182 } 183 184 public synchronized boolean isReady() { 185 return (bActive && bInitialized); 186 } 187 188 protected Map<String, SearchResult> qSearchResultMap = new HashMap<String, SearchResult>(); 189 190 protected int searchResultNr = 1; 191 192 public synchronized SearchResult getSearchResult(String q) throws IOException { 193 SearchResult searchResult = qSearchResultMap.get(q); 194 if (searchResult == null) { 195 searchResult = new SearchResult(environment, this, q, searchResultNr++); 196 qSearchResultMap.put(q, searchResult); 197 } 198 return searchResult; 199 } 200 201 public synchronized void cleanup(List<File> oldFilesList) { 202 bActive = false; 203 bInitialized = false; 204 hostUrl = null; 205 h3wrapper = null; 206 jobname = null; 207 jobResult = null; 208 crawlLogFilePath = null; 209 IOUtils.closeQuietly(logRaf); 210 IOUtils.closeQuietly(idxRaf); 211 oldFilesList.add(logFile); 212 oldFilesList.add(idxFile); 213 Iterator<SearchResult> srIter = qSearchResultMap.values().iterator(); 214 SearchResult sr; 215 while (srIter.hasNext()) { 216 sr = srIter.next(); 217 oldFilesList.add(sr.srIdxFile); 218 oldFilesList.add(sr.srLogFile); 219 sr.cleanup(); 220 } 221 qSearchResultMap.clear(); 222 } 223 224}