Source code

001/*
002 * #%L
003 * Netarchivesuite - heritrix 3 monitor
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.heritrix3.monitor;
025
026import java.io.IOException;
027import java.io.RandomAccessFile;
028
029public class StringIndexFile {
030
031    public static byte[] readPage(RandomAccessFile idxRaf, RandomAccessFile logRaf, long page, long itemsPerPage, boolean descending) throws IOException {
032        byte[] bytes = null;;
033        if (page < 1) {
034            throw new IllegalArgumentException();
035        }
036        if (itemsPerPage < 25) {
037            throw new IllegalArgumentException();
038        }
039        long length = idxRaf.length();
040        if (length > 8) {
041            if (!descending) {
042                // Forwards.
043                long fromIdx = (page - 1) * (itemsPerPage * 8);
044                long toIdx = fromIdx + (itemsPerPage * 8);
045                if (toIdx > length) {
046                    toIdx = length;
047                }
048                idxRaf.seek(fromIdx);
049                fromIdx = idxRaf.readLong();
050                idxRaf.seek(toIdx);
051                toIdx = idxRaf.readLong();
052                logRaf.seek(fromIdx);
053                bytes = new byte[(int)(toIdx - fromIdx)];
054                logRaf.readFully(bytes, 0, (int)(toIdx - fromIdx));
055            } else {
056                // Backwards.
057                long toIdx = length - ((page - 1) * itemsPerPage * 8);
058                long fromIdx = toIdx - (itemsPerPage * 8) - 8;
059                if (fromIdx < 0) {
060                    fromIdx = 0;
061                }
062                // Read line indexes for page.
063                int pageIdxArrLen = (int)(toIdx - fromIdx);
064                byte[] pageIdxArr = new byte[pageIdxArrLen];
065                idxRaf.seek(fromIdx);
066                int pos = 0;
067                int limit = pageIdxArrLen;
068                int read = 0;
069                while (limit > 0 && read != -1) {
070                    read = idxRaf.read(pageIdxArr, pos, limit);
071                    if (read != -1) {
072                        pos += read;
073                        limit -= read;
074                    }
075                }
076                // Convert line indexes for page.
077                limit = pos;
078                pos = 0;
079                long[] idxArr = new long[limit / 8];
080                long l;
081                int dstIdx = 0;
082                while (pos < limit) {
083                    l = (pageIdxArr[pos++] & 255) << 56 | (pageIdxArr[pos++] & 255) << 48 | (pageIdxArr[pos++] & 255) << 40 | (pageIdxArr[pos++] & 255) << 32
084                            | (pageIdxArr[pos++] & 255) << 24 | (pageIdxArr[pos++] & 255) << 16 | (pageIdxArr[pos++] & 255) << 8 | (pageIdxArr[pos++] & 255);
085                    idxArr[dstIdx++] = l;
086                }
087                // Load the crawllog lines for page.
088                pos = 0;
089                limit /= 8;
090                fromIdx = idxArr[pos];
091                toIdx = idxArr[limit - 1];
092                logRaf.seek(fromIdx);
093                byte[] tmpBytes = new byte[(int)(toIdx - fromIdx)];
094                logRaf.readFully(tmpBytes, 0, (int)(toIdx - fromIdx));
095                // Reverse crawllog lines for page.
096                bytes = new byte[tmpBytes.length];
097                long base = idxArr[pos++];
098                fromIdx = base;
099                int len;
100                dstIdx = bytes.length;
101                while (pos < limit) {
102                    toIdx = idxArr[pos++];
103                    len = (int)(toIdx - fromIdx);
104                    dstIdx -= len;
105                    System.arraycopy(tmpBytes, (int)(fromIdx - base), bytes, dstIdx, len);
106                    fromIdx = toIdx;
107                }
108            }
109        }
110        return bytes;
111    }
112
113}