001/*
002 * #%L
003 * Netarchivesuite - wayback
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.wayback.indexer;
024
025import java.io.BufferedReader;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.InputStreamReader;
029import java.util.Date;
030
031import org.apache.commons.io.IOUtils;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035import dk.netarkivet.common.distribute.RemoteFile;
036import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
037import dk.netarkivet.common.distribute.arcrepository.BatchStatus;
038import dk.netarkivet.common.distribute.arcrepository.PreservationArcRepositoryClient;
039import dk.netarkivet.common.exceptions.IOFailure;
040import dk.netarkivet.common.utils.Settings;
041import dk.netarkivet.common.utils.batch.DatedFileListJob;
042import dk.netarkivet.common.utils.batch.FileListJob;
043import dk.netarkivet.wayback.WaybackSettings;
044
045public class FileNameHarvester {
046
047    /** Logger for this class. */
048    private static final Logger log = LoggerFactory.getLogger(FileNameHarvester.class);
049
050    /**
051     * This method harvests a list of all the files currently in the arcrepository and appends any new ones found to the
052     * ArchiveFile object store.
053     */
054    public static synchronized void harvestAllFilenames() {
055        ArchiveFileDAO dao = new ArchiveFileDAO();
056        PreservationArcRepositoryClient client = ArcRepositoryClientFactory.getPreservationInstance();
057        BatchStatus status = client.batch(new FileListJob(), Settings.get(WaybackSettings.WAYBACK_REPLICA));
058        RemoteFile results = status.getResultFile();
059        InputStream is = results.getInputStream();
060        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
061        String line;
062        try {
063            while ((line = reader.readLine()) != null) {
064                if (!dao.exists(line.trim())) {
065                    ArchiveFile file = new ArchiveFile();
066                    file.setFilename(line.trim());
067                    file.setIndexed(false);
068                    log.info("Creating object store entry for '{}'", file.getFilename());
069                    dao.create(file);
070                } // If the file is already known in the persistent store, no
071                  // action needs to be taken.
072            }
073        } catch (IOException e) {
074            throw new IOFailure("Error reading remote file", e);
075        } finally {
076            IOUtils.closeQuietly(reader);
077        }
078    }
079
080    /**
081     * This method harvests a list of all the recently added files in the archive.
082     */
083    public static synchronized void harvestRecentFilenames() {
084        ArchiveFileDAO dao = new ArchiveFileDAO();
085        PreservationArcRepositoryClient client = ArcRepositoryClientFactory.getPreservationInstance();
086        long timeAgo = Settings.getLong(WaybackSettings.WAYBACK_INDEXER_RECENT_PRODUCER_SINCE);
087        Date since = new Date(System.currentTimeMillis() - timeAgo);
088        BatchStatus status = client.batch(new DatedFileListJob(since), Settings.get(WaybackSettings.WAYBACK_REPLICA));
089        RemoteFile results = status.getResultFile();
090        InputStream is = results.getInputStream();
091        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
092        String line;
093        try {
094            while ((line = reader.readLine()) != null) {
095                if (!dao.exists(line.trim())) {
096                    ArchiveFile file = new ArchiveFile();
097                    file.setFilename(line.trim());
098                    file.setIndexed(false);
099                    log.info("Creating object store entry for '{}'", file.getFilename());
100                    dao.create(file);
101                } // If the file is already known in the persistent store, no
102                  // action needs to be taken.
103            }
104        } catch (IOException e) {
105            throw new IOFailure("Error reading remote file", e);
106        } finally {
107            IOUtils.closeQuietly(reader);
108        }
109    }
110}