001/* 002 * #%L 003 * Netarchivesuite - wayback 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.wayback.indexer; 024 025import java.io.BufferedReader; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.InputStreamReader; 029import java.util.Date; 030 031import org.apache.commons.io.IOUtils; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import dk.netarkivet.common.distribute.RemoteFile; 036import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory; 037import dk.netarkivet.common.distribute.arcrepository.BatchStatus; 038import dk.netarkivet.common.distribute.arcrepository.PreservationArcRepositoryClient; 039import dk.netarkivet.common.exceptions.IOFailure; 040import dk.netarkivet.common.utils.Settings; 041import dk.netarkivet.common.utils.batch.DatedFileListJob; 042import dk.netarkivet.common.utils.batch.FileListJob; 043import dk.netarkivet.wayback.WaybackSettings; 044 045public class FileNameHarvester { 046 047 /** Logger for this class. */ 048 private static final Logger log = LoggerFactory.getLogger(FileNameHarvester.class); 049 050 /** 051 * This method harvests a list of all the files currently in the arcrepository and appends any new ones found to the 052 * ArchiveFile object store. 053 */ 054 public static synchronized void harvestAllFilenames() { 055 ArchiveFileDAO dao = new ArchiveFileDAO(); 056 PreservationArcRepositoryClient client = ArcRepositoryClientFactory.getPreservationInstance(); 057 BatchStatus status = client.batch(new FileListJob(), Settings.get(WaybackSettings.WAYBACK_REPLICA)); 058 RemoteFile results = status.getResultFile(); 059 InputStream is = results.getInputStream(); 060 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 061 String line; 062 try { 063 while ((line = reader.readLine()) != null) { 064 if (!dao.exists(line.trim())) { 065 ArchiveFile file = new ArchiveFile(); 066 file.setFilename(line.trim()); 067 file.setIndexed(false); 068 log.info("Creating object store entry for '{}'", file.getFilename()); 069 dao.create(file); 070 } // If the file is already known in the persistent store, no 071 // action needs to be taken. 072 } 073 } catch (IOException e) { 074 throw new IOFailure("Error reading remote file", e); 075 } finally { 076 IOUtils.closeQuietly(reader); 077 } 078 } 079 080 /** 081 * This method harvests a list of all the recently added files in the archive. 082 */ 083 public static synchronized void harvestRecentFilenames() { 084 ArchiveFileDAO dao = new ArchiveFileDAO(); 085 PreservationArcRepositoryClient client = ArcRepositoryClientFactory.getPreservationInstance(); 086 long timeAgo = Settings.getLong(WaybackSettings.WAYBACK_INDEXER_RECENT_PRODUCER_SINCE); 087 Date since = new Date(System.currentTimeMillis() - timeAgo); 088 BatchStatus status = client.batch(new DatedFileListJob(since), Settings.get(WaybackSettings.WAYBACK_REPLICA)); 089 RemoteFile results = status.getResultFile(); 090 InputStream is = results.getInputStream(); 091 BufferedReader reader = new BufferedReader(new InputStreamReader(is)); 092 String line; 093 try { 094 while ((line = reader.readLine()) != null) { 095 if (!dao.exists(line.trim())) { 096 ArchiveFile file = new ArchiveFile(); 097 file.setFilename(line.trim()); 098 file.setIndexed(false); 099 log.info("Creating object store entry for '{}'", file.getFilename()); 100 dao.create(file); 101 } // If the file is already known in the persistent store, no 102 // action needs to be taken. 103 } 104 } catch (IOException e) { 105 throw new IOFailure("Error reading remote file", e); 106 } finally { 107 IOUtils.closeQuietly(reader); 108 } 109 } 110}