001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.distribute;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileNotFoundException;
029import java.io.FilterInputStream;
030import java.io.IOException;
031import java.io.InputStream;
032import java.io.OutputStream;
033import java.security.DigestInputStream;
034import java.security.DigestOutputStream;
035import java.util.Date;
036import java.util.Random;
037
038import org.apache.commons.io.IOUtils;
039import org.apache.commons.net.io.CopyStreamException;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043import dk.netarkivet.common.CommonSettings;
044import dk.netarkivet.common.exceptions.ArgumentNotValid;
045import dk.netarkivet.common.exceptions.IOFailure;
046import dk.netarkivet.common.utils.ChecksumCalculator;
047import dk.netarkivet.common.utils.FileUtils;
048import dk.netarkivet.common.utils.Settings;
049
050/**
051 * Class encapsulating upload to & download from an ftp-server.
052 * <p>
053 * Transfers are done using binary type and passive mode, if available.
054 */
055@SuppressWarnings({"serial"})
056public final class FTPRemoteFile extends AbstractRemoteFile {
057
058    /** A named logger for this class. */
059    private static final transient Logger log = LoggerFactory.getLogger(FTPRemoteFile.class);
060
061    /**
062     * How many times we will retry upload, download, and logon.
063     */
064    public static int FTP_RETRIES = Settings.getInt(CommonSettings.FTP_RETRIES_SETTINGS);
065    /**
066     * How large a data timeout on our FTP connections.
067     */
068    public static int FTP_DATATIMEOUT = Settings.getInt(CommonSettings.FTP_DATATIMEOUT_SETTINGS);
069
070    /** The default place in classpath where the settings file can be found. */
071    private static final String DEFAULT_SETTINGS_CLASSPATH = "dk/netarkivet/common/distribute/FTPRemoteFileSettings.xml";
072
073    /*
074     * The static initialiser is called when the class is loaded. It will add default values for all settings defined in
075     * this class, by loading them from a settings.xml file in classpath.
076     */
077    static {
078        Settings.addDefaultClasspathSettings(DEFAULT_SETTINGS_CLASSPATH);
079    }
080
081    /**
082     * Ftp-connection information. FTP-related settings are by default read from settings, unless connectionParameters
083     * are given in the constructor.
084     */
085    private String ftpServerName;
086
087    /** The ftp-server port. */
088    private final int ftpServerPort;
089    /** The username used to connect to the ftp-server. */
090    private final String ftpUserName;
091    /** The password used to connect to the ftp-server. */
092    private final String ftpUserPassword;
093
094    /** The name that we use for the file on the FTP server. This is only for internal use. */
095    private final String ftpFileName;
096
097    /** If useChecksums is true, contains the file checksum. */
098    protected final String checksum;
099    
100    private FTPConnectionManager cm;
101
102    // NOTE: The constants defining setting names below are left non-final on
103    // purpose! Otherwise, the static initialiser that loads default values
104    // will not run.
105
106    /**
107     * Private constructor used by getInstance() static-method Tries to generate unique name on ftp-server.
108     *
109     * @param localFile File used to create new file on ftp-server.
110     * @param useChecksums If true, checksums will be used to check transfers.
111     * @param fileDeletable If true, this file will be deleted after upload to FTP.
112     * @param multipleDownloads If true, the file will not be removed from FTP server automatically after first
113     * download.
114     * @param connectionParams If not null, contains connection parameters to the FTP-server desired by the user
115     * @throws IOFailure if MD5 checksum fails, or ftp fails
116     * @throws ArgumentNotValid if the local file cannot be read.
117     */
118    private FTPRemoteFile(File localFile, boolean useChecksums, boolean fileDeletable, boolean multipleDownloads,
119            RemoteFileSettings connectionParams) throws IOFailure {
120        super(localFile, useChecksums, fileDeletable, multipleDownloads);
121        if (connectionParams != null) {
122            // use the connection parameters desired by the user.
123            this.ftpServerName = connectionParams.getServerName();
124            this.ftpServerPort = connectionParams.getServerPort();
125            this.ftpUserName = connectionParams.getUserName();
126            this.ftpUserPassword = connectionParams.getUserPassword();
127        } else {
128            // use the connection parameters specified by the settings.
129            this.ftpServerName = Settings.get(CommonSettings.FTP_SERVER_NAME);
130            this.ftpServerPort = Settings.getInt(CommonSettings.FTP_SERVER_PORT);
131            this.ftpUserName = Settings.get(CommonSettings.FTP_USER_NAME);
132            this.ftpUserPassword = Settings.get(CommonSettings.FTP_USER_PASSWORD);
133        }
134        this.cm = new FTPConnectionManager(ftpUserName, ftpUserPassword, ftpServerName, ftpServerPort, 
135                        Settings.getInt(CommonSettings.FTP_RETRIES_SETTINGS), Settings.getInt(CommonSettings.FTP_DATATIMEOUT_SETTINGS));
136
137        if (filesize == 0) {
138            if (useChecksums) {
139                checksum = ChecksumCalculator.calculateMd5(file);
140            } else {
141                checksum = null;
142            }
143            ftpFileName = "-";
144        } else { 
145                // A large enough number to make it unlikely that two files are
146            // created with the same FTP server name. Already the millisecond
147            // datestamp reduces the likelihood, with this even if two
148            // processes/threads try to upload the same file in the same
149            // millisecond (very unlikely) they have only .01% chance of
150            // clashing.
151            final int aMagicNumber = 100000;
152            ftpFileName = file.getName() + "-" + new Random().nextInt(aMagicNumber) + "-" + new Date().getTime();
153            InputStream in;
154
155            try {
156                in = new FileInputStream(localFile);
157            } catch (FileNotFoundException e) {
158                final String message = "Couldn't prepare file '" + localFile + "' for remote access. File not found.";
159                log.debug(message, e);
160                throw new IOFailure(message, e);
161            }
162            log.debug("Writing '{}' as '{}' on ftp-server {}", file.getName(), ftpFileName, cm.getFtpServer());
163
164            // Writing inlined in constructor to allow the checksum field to
165            // be final (and thus must be set in constructor).
166            try {
167                cm.logOn();
168                if (useChecksums) {
169                    in = new DigestInputStream(in, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
170                }
171                boolean success = false;
172                int tried = 0;
173                while (!success && tried < FTP_RETRIES) {
174                    tried++;
175                    try {
176                        success = cm.getFTPClient().storeFile(ftpFileName, in);
177                        if (!success) {
178                            log.debug("FTP store failed attempt '{}' of {}: {}", tried, FTP_RETRIES,
179                                    cm.getFtpErrorMessage());
180                        }
181                    } catch (IOException e) {
182                        String message = "Write operation to '" + ftpFileName + "' failed on attempt " + tried + " of "
183                                + FTP_RETRIES;
184                        if (e instanceof CopyStreamException) {
185                            CopyStreamException realException = (CopyStreamException) e;
186                            message += "(real cause = " + realException.getIOException() + ")";
187                        }
188                        log.debug(message, e);
189                    }
190                }
191                if (!success) {
192                    final String msg = "Failed to upload '" + localFile + "' after " + tried + " attempts";
193                    log.warn(msg);
194                    throw new IOFailure(msg);
195                }
196                log.debug("Completed writing the file '{}'", ftpFileName);
197
198                if (useChecksums) {
199                    checksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest().digest());
200                    log.debug("Checksum of '{}' is:{}", ftpFileName, checksum);
201                } else {
202                    checksum = null;
203                }
204            } finally {
205                IOUtils.closeQuietly(in);
206                cm.logOut();
207                log.debug("Ftp logout");
208            }
209        }
210        if (fileDeletable) {
211            try {
212                FileUtils.removeRecursively(localFile);
213            } catch (IOFailure e) {
214                // Not fatal
215                log.warn("Couldn't remove tmp file {}", localFile, e);
216            }
217        }
218    }
219
220    /**
221     * Create a remote file that handles the transport of the remote file data. This method is used by the sender to
222     * prepare the transport.
223     *
224     * @param localFile File object for the remote file
225     * @param useChecksums If true, checksums will be used to check transfers.
226     * @param fileDeletable If true, this file will be deleted after upload to FTP.
227     * @param multipleDownloads If true, the file will not be removed from FTP server automatically after first
228     * download.
229     * @return FTPRemoteFile object
230     * @throws IOFailure if FTPRemoteFile creation fails
231     */
232    public static RemoteFile getInstance(File localFile, Boolean useChecksums, Boolean fileDeletable,
233            Boolean multipleDownloads) throws IOFailure {
234        ArgumentNotValid.checkNotNull(localFile, "File remoteFile");
235        return new FTPRemoteFile(localFile, useChecksums, fileDeletable, multipleDownloads, null);
236    }
237
238    public static RemoteFile getInstance(File localFile, Boolean useChecksums, Boolean fileDeletable,
239            Boolean multipleDownloads, RemoteFileSettings connectionParams) throws IOFailure {
240        ArgumentNotValid.checkNotNull(localFile, "File remoteFile");
241        return new FTPRemoteFile(localFile, useChecksums, fileDeletable, multipleDownloads, connectionParams);
242    }
243
244    /**
245     * An implementation of the getInputStream operation that works with FTP. Notice that most of the special work
246     * (logging out and checking MD5) happens in the close() method of the returned InputStream, since that is the only
247     * place where we can know we're done.
248     *
249     * @return An InputStream that will deliver the data transferred by FTP. Holding on to this for long periods without
250     * reading any data might cause a timeout.
251     */
252    @Override
253    public InputStream getInputStream() {
254        if (filesize == 0) {
255            return new ByteArrayInputStream(new byte[] {});
256        }
257        try {
258            cm.logOn();
259
260            InputStream in = cm.getFTPClient().retrieveFileStream(ftpFileName);
261            if (in == null) {
262                throw new IOFailure("Unable to retrieve input stream:" + cm.getFtpErrorMessage());
263            }
264            if (useChecksums) {
265                in = new DigestInputStream(in, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
266            }
267            return new FilterInputStream(in) {
268                public void close() throws IOException {
269                    try {
270                        super.close();
271                        if (useChecksums) {
272                            String newChecksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest()
273                                    .digest());
274                            if (!newChecksum.equals(checksum)) {
275                                final String msg = "Checksums of '" + ftpFileName + "' do not match! " + "Should be "
276                                        + checksum + " but was " + newChecksum;
277                                log.warn(msg);
278                                throw new IOFailure(msg);
279                            }
280                        }
281                    } finally {
282                        cm.logOut();
283                        if (!multipleDownloads) {
284                            cleanup();
285                        }
286                    }
287                }
288            };
289        } catch (IOException e) {
290            String msg = "Creating inputstream from '" + ftpFileName + "' failed ";
291            if (e instanceof CopyStreamException) {
292                CopyStreamException realException = (CopyStreamException) e;
293                msg += "(real cause = " + realException.getIOException() + ")";
294            }
295            log.warn(msg, e);
296            throw new IOFailure(msg, e);
297        }
298    }
299
300    /**
301     * Write the contents of this ftp remote file to an output stream. Notice that while the checksum of the transferred
302     * data is checked, no retries are performed, and in case of failure, there is no guarantee that any data have been
303     * transferred.
304     *
305     * @param out OutputStream that the data will be written to. This stream will not be closed by this operation.
306     * @throws IOFailure If append operation fails
307     */
308    @Override
309    public void appendTo(OutputStream out) {
310        ArgumentNotValid.checkNotNull(out, "OutputStream out");
311
312        if (filesize == 0) {
313            return;
314        }
315
316        try {
317            cm.logOn();
318
319            if (useChecksums) {
320                out = new DigestOutputStream(out, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
321            }
322            if (!cm.getFTPClient().retrieveFile(ftpFileName, out)) {
323                final String msg = "Append operation from '" + ftpFileName + "' failed: " + cm.getFtpErrorMessage();
324                log.warn(msg);
325                throw new IOFailure(msg);
326            }
327            out.flush();
328            if (useChecksums) {
329                String newChecksum = ChecksumCalculator.toHex(((DigestOutputStream) out).getMessageDigest().digest());
330                if (checksum != null && !checksum.equals(newChecksum)) {
331                    final String msg = "Checksums of '" + ftpFileName + "' do not match! Should be " + checksum
332                            + " but was " + newChecksum;
333                    log.warn(msg);
334                    throw new IOFailure(msg);
335                }
336            }
337        } catch (IOException e) {
338            String msg = "Append operation from '" + ftpFileName + "' failed ";
339            if (e instanceof CopyStreamException) {
340                CopyStreamException realException = (CopyStreamException) e;
341                msg += "(real cause = " + realException.getIOException() + ")";
342            }
343            log.warn(msg, e);
344            throw new IOFailure(msg, e);
345        } finally {
346            cm.logOut();
347            if (!multipleDownloads) {
348                cleanup();
349            }
350        }
351    }
352
353    /**
354     * Cleanup will delete the file on the FTP server. This method should never throw exceptions. It is idempotent,
355     * meaning it can be called twice without trouble.
356     */
357    @Override
358    public void cleanup() {
359        if (filesize == 0) {
360            return;
361        }
362        log.debug("Deleting file '{}' from ftp server", ftpFileName);
363        try {
364            cm.logOn();
365            cm.getFTPClient().deleteFile(ftpFileName);
366        } catch (Exception e) {
367            log.warn("Error while deleting ftp file '{}' for file '{}'", ftpFileName, file.getName(), e);
368        } finally {
369            // try to disconnect before returning from method
370            try {
371                cm.logOut();
372            } catch (Exception e) {
373                log.warn("Unexpected error while logging out ", e);
374            }
375        }
376        log.debug("File '{}' deleted from ftp server. Cleanup finished.", ftpFileName);
377    }
378
379    /**
380     * Return a human-readable description of the object.
381     *
382     * @return description of object -- not machine readable
383     */
384    public String toString() {
385        return "RemoteFile '" + file.getName() + "' (#" + checksum + ")";
386    }
387
388    /**
389     * Get checksum for file, or null if checksums were not requested.
390     *
391     * @return checksum for file, or null if checksums were not requested.
392     */
393    public String getChecksum() {
394        return checksum;
395    }
396
397    /**
398     * Retrieval of the number of retries for retrieving a file from a FTP server. Returns the setting for number of
399     * retries.
400     *
401     * @return The number of retries for the FTP connection, defined in settings.
402     */
403    @Override
404    public int getNumberOfRetries() {
405        return FTP_RETRIES;
406    }
407
408    public static RemoteFileSettings getRemoteFileSettings() {
409        return new RemoteFileSettings(Settings.get(CommonSettings.FTP_SERVER_NAME),
410                Settings.getInt(CommonSettings.FTP_SERVER_PORT), Settings.get(CommonSettings.FTP_USER_NAME),
411                Settings.get(CommonSettings.FTP_USER_PASSWORD));
412    }
413
414}