001/* 002 * #%L 003 * Netarchivesuite - heritrix 3 monitor 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.heritrix3.monitor; 025 026import java.io.IOException; 027import java.io.RandomAccessFile; 028 029public class StringIndexFile { 030 031 public static byte[] readPage(RandomAccessFile idxRaf, RandomAccessFile logRaf, long page, long itemsPerPage, boolean descending) throws IOException { 032 byte[] bytes = null;; 033 if (page < 1) { 034 throw new IllegalArgumentException(); 035 } 036 if (itemsPerPage < 25) { 037 throw new IllegalArgumentException(); 038 } 039 long length = idxRaf.length(); 040 if (length > 8) { 041 if (!descending) { 042 // Forwards. 043 long fromIdx = (page - 1) * (itemsPerPage * 8); 044 long toIdx = fromIdx + (itemsPerPage * 8); 045 if (toIdx > length) { 046 toIdx = length; 047 } 048 idxRaf.seek(fromIdx); 049 fromIdx = idxRaf.readLong(); 050 idxRaf.seek(toIdx); 051 toIdx = idxRaf.readLong(); 052 logRaf.seek(fromIdx); 053 bytes = new byte[(int)(toIdx - fromIdx)]; 054 logRaf.readFully(bytes, 0, (int)(toIdx - fromIdx)); 055 } else { 056 // Backwards. 057 long toIdx = length - ((page - 1) * itemsPerPage * 8); 058 long fromIdx = toIdx - (itemsPerPage * 8) - 8; 059 if (fromIdx < 0) { 060 fromIdx = 0; 061 } 062 // Read line indexes for page. 063 int pageIdxArrLen = (int)(toIdx - fromIdx); 064 byte[] pageIdxArr = new byte[pageIdxArrLen]; 065 idxRaf.seek(fromIdx); 066 int pos = 0; 067 int limit = pageIdxArrLen; 068 int read = 0; 069 while (limit > 0 && read != -1) { 070 read = idxRaf.read(pageIdxArr, pos, limit); 071 if (read != -1) { 072 pos += read; 073 limit -= read; 074 } 075 } 076 // Convert line indexes for page. 077 limit = pos; 078 pos = 0; 079 long[] idxArr = new long[limit / 8]; 080 long l; 081 int dstIdx = 0; 082 while (pos < limit) { 083 l = (pageIdxArr[pos++] & 255) << 56 | (pageIdxArr[pos++] & 255) << 48 | (pageIdxArr[pos++] & 255) << 40 | (pageIdxArr[pos++] & 255) << 32 084 | (pageIdxArr[pos++] & 255) << 24 | (pageIdxArr[pos++] & 255) << 16 | (pageIdxArr[pos++] & 255) << 8 | (pageIdxArr[pos++] & 255); 085 idxArr[dstIdx++] = l; 086 } 087 // Load the crawllog lines for page. 088 pos = 0; 089 limit /= 8; 090 fromIdx = idxArr[pos]; 091 toIdx = idxArr[limit - 1]; 092 logRaf.seek(fromIdx); 093 byte[] tmpBytes = new byte[(int)(toIdx - fromIdx)]; 094 logRaf.readFully(tmpBytes, 0, (int)(toIdx - fromIdx)); 095 // Reverse crawllog lines for page. 096 bytes = new byte[tmpBytes.length]; 097 long base = idxArr[pos++]; 098 fromIdx = base; 099 int len; 100 dstIdx = bytes.length; 101 while (pos < limit) { 102 toIdx = idxArr[pos++]; 103 len = (int)(toIdx - fromIdx); 104 dstIdx -= len; 105 System.arraycopy(tmpBytes, (int)(fromIdx - base), bytes, dstIdx, len); 106 fromIdx = toIdx; 107 } 108 } 109 } 110 return bytes; 111 } 112 113}