001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.distribute;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileNotFoundException;
029import java.io.FilterInputStream;
030import java.io.IOException;
031import java.io.InputStream;
032import java.io.OutputStream;
033import java.security.DigestInputStream;
034import java.security.DigestOutputStream;
035import java.util.Date;
036import java.util.Random;
037
038import org.apache.commons.io.IOUtils;
039import org.apache.commons.net.io.CopyStreamException;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043import dk.netarkivet.common.CommonSettings;
044import dk.netarkivet.common.exceptions.ArgumentNotValid;
045import dk.netarkivet.common.exceptions.IOFailure;
046import dk.netarkivet.common.utils.ChecksumCalculator;
047import dk.netarkivet.common.utils.FileUtils;
048import dk.netarkivet.common.utils.NotificationType;
049import dk.netarkivet.common.utils.NotificationsFactory;
050import dk.netarkivet.common.utils.Settings;
051
052/**
053 * Class encapsulating upload to & download from an ftp-server.
054 * <p>
055 * Transfers are done using binary type and passive mode, if available.
056 */
057@SuppressWarnings({"serial"})
058public final class FTPRemoteFile extends AbstractRemoteFile {
059
060    /** A named logger for this class. */
061    private static final transient Logger log = LoggerFactory.getLogger(FTPRemoteFile.class);
062
063    /**
064     * How many times we will retry upload, download, and logon.
065     */
066    public static int FTP_RETRIES = Settings.getInt(CommonSettings.FTP_RETRIES_SETTINGS);
067    /**
068     * How large a data timeout on our FTP connections.
069     */
070    public static int FTP_DATATIMEOUT = Settings.getInt(CommonSettings.FTP_DATATIMEOUT_SETTINGS);
071
072    /** The default place in classpath where the settings file can be found. */
073    private static final String DEFAULT_SETTINGS_CLASSPATH = "dk/netarkivet/common/distribute/FTPRemoteFileSettings.xml";
074
075    /*
076     * The static initialiser is called when the class is loaded. It will add default values for all settings defined in
077     * this class, by loading them from a settings.xml file in classpath.
078     */
079    static {
080        Settings.addDefaultClasspathSettings(DEFAULT_SETTINGS_CLASSPATH);
081    }
082
083    /**
084     * Ftp-connection information. FTP-related settings are by default read from settings, unless connectionParameters
085     * are given in the constructor.
086     */
087    private String ftpServerName;
088
089    /** The ftp-server port. */
090    private final int ftpServerPort;
091    /** The username used to connect to the ftp-server. */
092    private final String ftpUserName;
093    /** The password used to connect to the ftp-server. */
094    private final String ftpUserPassword;
095
096    /** The name that we use for the file on the FTP server. This is only for internal use. */
097    private final String ftpFileName;
098
099    /** If useChecksums is true, contains the file checksum. */
100    protected final String checksum;
101    
102    private FTPConnectionManager cm;
103
104    // NOTE: The constants defining setting names below are left non-final on
105    // purpose! Otherwise, the static initialiser that loads default values
106    // will not run.
107
108    /**
109     * Private constructor used by getInstance() static-method Tries to generate unique name on ftp-server.
110     *
111     * @param localFile File used to create new file on ftp-server.
112     * @param useChecksums If true, checksums will be used to check transfers.
113     * @param fileDeletable If true, this file will be deleted after upload to FTP.
114     * @param multipleDownloads If true, the file will not be removed from FTP server automatically after first
115     * download.
116     * @param connectionParams If not null, contains connection parameters to the FTP-server desired by the user
117     * @throws IOFailure if MD5 checksum fails, or ftp fails
118     * @throws ArgumentNotValid if the local file cannot be read.
119     */
120    private FTPRemoteFile(File localFile, boolean useChecksums, boolean fileDeletable, boolean multipleDownloads,
121            RemoteFileSettings connectionParams) throws IOFailure {
122        super(localFile, useChecksums, fileDeletable, multipleDownloads);
123        if (connectionParams != null) {
124            // use the connection parameters desired by the user.
125            this.ftpServerName = connectionParams.getServerName();
126            this.ftpServerPort = connectionParams.getServerPort();
127            this.ftpUserName = connectionParams.getUserName();
128            this.ftpUserPassword = connectionParams.getUserPassword();
129        } else {
130            // use the connection parameters specified by the settings.
131            this.ftpServerName = Settings.get(CommonSettings.FTP_SERVER_NAME);
132            this.ftpServerPort = Settings.getInt(CommonSettings.FTP_SERVER_PORT);
133            this.ftpUserName = Settings.get(CommonSettings.FTP_USER_NAME);
134            this.ftpUserPassword = Settings.get(CommonSettings.FTP_USER_PASSWORD);
135        }
136        this.cm = new FTPConnectionManager(ftpUserName, ftpUserPassword, ftpServerName, ftpServerPort, 
137                        Settings.getInt(CommonSettings.FTP_RETRIES_SETTINGS), Settings.getInt(CommonSettings.FTP_DATATIMEOUT_SETTINGS));
138
139        if (filesize == 0) {
140            if (useChecksums) {
141                checksum = ChecksumCalculator.calculateMd5(file);
142            } else {
143                checksum = null;
144            }
145            ftpFileName = "-";
146        } else { 
147                // A large enough number to make it unlikely that two files are
148            // created with the same FTP server name. Already the millisecond
149            // datestamp reduces the likelihood, with this even if two
150            // processes/threads try to upload the same file in the same
151            // millisecond (very unlikely) they have only .01% chance of
152            // clashing.
153            final int aMagicNumber = 100000;
154            ftpFileName = file.getName() + "-" + new Random().nextInt(aMagicNumber) + "-" + new Date().getTime();
155            InputStream in;
156
157            try {
158                in = new FileInputStream(localFile);
159            } catch (FileNotFoundException e) {
160                final String message = "Couldn't prepare file '" + localFile + "' for remote access. File not found.";
161                log.debug(message, e);
162                throw new IOFailure(message, e);
163            }
164            log.debug("Writing '{}' as '{}' on ftp-server {}", file.getName(), ftpFileName, cm.getFtpServer());
165
166            // Writing inlined in constructor to allow the checksum field to
167            // be final (and thus must be set in constructor).
168            try {
169                cm.logOn();
170                if (useChecksums) {
171                    in = new DigestInputStream(in, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
172                }
173                boolean success = false;
174                int tried = 0;
175                String message = null;
176                while (!success && tried < FTP_RETRIES) {
177                    tried++;
178                    try {
179                        success = cm.getFTPClient().storeFile(ftpFileName, in);
180                        if (!success) {
181                            log.debug("FTP store failed attempt '{}' of {}: {}", tried, FTP_RETRIES,
182                                    cm.getFtpErrorMessage());
183                        }
184                    } catch (IOException e) {
185                        message = "Write operation to '" + ftpFileName + "' failed on attempt " + tried + " of "
186                                + FTP_RETRIES;
187                        if (e instanceof CopyStreamException) {
188                            CopyStreamException realException = (CopyStreamException) e;
189                            message += "(real cause = " + realException.getIOException() + ")";
190                        }
191                        log.debug(message, e);
192                    }
193                }
194                if (!success) {
195                    final String msg = "Failed to upload '" + localFile + "' after " + tried 
196                            + " attempts. Reason for last failure: " +  message;
197                    log.warn(msg);
198                    // Send an Notification because of this
199                    NotificationsFactory.getInstance().notify(msg, NotificationType.ERROR);
200                    throw new IOFailure(msg);
201                }
202                log.debug("Completed writing the file '{}'", ftpFileName);
203
204                if (useChecksums) {
205                    checksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest().digest());
206                    log.debug("Checksum of '{}' is:{}", ftpFileName, checksum);
207                } else {
208                    checksum = null;
209                }
210            } finally {
211                IOUtils.closeQuietly(in);
212                cm.logOut();
213                log.debug("Ftp logout");
214            }
215        }
216        if (fileDeletable) {
217            try {
218                FileUtils.removeRecursively(localFile);
219            } catch (IOFailure e) {
220                // Not fatal
221                log.warn("Couldn't remove tmp file {}", localFile, e);
222            }
223        }
224    }
225
226    /**
227     * Create a remote file that handles the transport of the remote file data. This method is used by the sender to
228     * prepare the transport.
229     *
230     * @param localFile File object for the remote file
231     * @param useChecksums If true, checksums will be used to check transfers.
232     * @param fileDeletable If true, this file will be deleted after upload to FTP.
233     * @param multipleDownloads If true, the file will not be removed from FTP server automatically after first
234     * download.
235     * @return FTPRemoteFile object
236     * @throws IOFailure if FTPRemoteFile creation fails
237     */
238    public static RemoteFile getInstance(File localFile, Boolean useChecksums, Boolean fileDeletable,
239            Boolean multipleDownloads) throws IOFailure {
240        ArgumentNotValid.checkNotNull(localFile, "File remoteFile");
241        return new FTPRemoteFile(localFile, useChecksums, fileDeletable, multipleDownloads, null);
242    }
243
244    public static RemoteFile getInstance(File localFile, Boolean useChecksums, Boolean fileDeletable,
245            Boolean multipleDownloads, RemoteFileSettings connectionParams) throws IOFailure {
246        ArgumentNotValid.checkNotNull(localFile, "File remoteFile");
247        return new FTPRemoteFile(localFile, useChecksums, fileDeletable, multipleDownloads, connectionParams);
248    }
249
250    /**
251     * An implementation of the getInputStream operation that works with FTP. Notice that most of the special work
252     * (logging out and checking MD5) happens in the close() method of the returned InputStream, since that is the only
253     * place where we can know we're done.
254     *
255     * @return An InputStream that will deliver the data transferred by FTP. Holding on to this for long periods without
256     * reading any data might cause a timeout.
257     */
258    @Override
259    public InputStream getInputStream() {
260        if (filesize == 0) {
261            return new ByteArrayInputStream(new byte[] {});
262        }
263        try {
264            cm.logOn();
265
266            InputStream in = cm.getFTPClient().retrieveFileStream(ftpFileName);
267            if (in == null) {
268                throw new IOFailure("Unable to retrieve input stream:" + cm.getFtpErrorMessage());
269            }
270            if (useChecksums) {
271                in = new DigestInputStream(in, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
272            }
273            return new FilterInputStream(in) {
274                public void close() throws IOException {
275                    try {
276                        super.close();
277                        if (useChecksums) {
278                            String newChecksum = ChecksumCalculator.toHex(((DigestInputStream) in).getMessageDigest()
279                                    .digest());
280                            if (!newChecksum.equals(checksum)) {
281                                final String msg = "Checksums of '" + ftpFileName + "' do not match! " + "Should be "
282                                        + checksum + " but was " + newChecksum;
283                                log.warn(msg);
284                                throw new IOFailure(msg);
285                            }
286                        }
287                    } finally {
288                        cm.logOut();
289                        if (!multipleDownloads) {
290                            cleanup();
291                        }
292                    }
293                }
294            };
295        } catch (IOException e) {
296            String msg = "Creating inputstream from '" + ftpFileName + "' failed ";
297            if (e instanceof CopyStreamException) {
298                CopyStreamException realException = (CopyStreamException) e;
299                msg += "(real cause = " + realException.getIOException() + ")";
300            }
301            log.warn(msg, e);
302            throw new IOFailure(msg, e);
303        }
304    }
305
306    /**
307     * Write the contents of this ftp remote file to an output stream. Notice that while the checksum of the transferred
308     * data is checked, no retries are performed, and in case of failure, there is no guarantee that any data have been
309     * transferred.
310     *
311     * @param out OutputStream that the data will be written to. This stream will not be closed by this operation.
312     * @throws IOFailure If append operation fails
313     */
314    @Override
315    public void appendTo(OutputStream out) {
316        ArgumentNotValid.checkNotNull(out, "OutputStream out");
317
318        if (filesize == 0) {
319            return;
320        }
321
322        try {
323            cm.logOn();
324
325            if (useChecksums) {
326                out = new DigestOutputStream(out, ChecksumCalculator.getMessageDigest(ChecksumCalculator.MD5));
327            }
328            if (!cm.getFTPClient().retrieveFile(ftpFileName, out)) {
329                final String msg = "Append operation from '" + ftpFileName + "' failed: " + cm.getFtpErrorMessage();
330                log.warn(msg);
331                throw new IOFailure(msg);
332            }
333            out.flush();
334            if (useChecksums) {
335                String newChecksum = ChecksumCalculator.toHex(((DigestOutputStream) out).getMessageDigest().digest());
336                if (checksum != null && !checksum.equals(newChecksum)) {
337                    final String msg = "Checksums of '" + ftpFileName + "' do not match! Should be " + checksum
338                            + " but was " + newChecksum;
339                    log.warn(msg);
340                    throw new IOFailure(msg);
341                }
342            }
343        } catch (IOException e) {
344            String msg = "Append operation from '" + ftpFileName + "' failed ";
345            if (e instanceof CopyStreamException) {
346                CopyStreamException realException = (CopyStreamException) e;
347                msg += "(real cause = " + realException.getIOException() + ")";
348            }
349            log.warn(msg, e);
350            throw new IOFailure(msg, e);
351        } finally {
352            cm.logOut();
353            if (!multipleDownloads) {
354                cleanup();
355            }
356        }
357    }
358
359    /**
360     * Cleanup will delete the file on the FTP server. This method should never throw exceptions. It is idempotent,
361     * meaning it can be called twice without trouble.
362     */
363    @Override
364    public void cleanup() {
365        if (filesize == 0) {
366            return;
367        }
368        log.debug("Deleting file '{}' from ftp server", ftpFileName);
369        try {
370            cm.logOn();
371            cm.getFTPClient().deleteFile(ftpFileName);
372        } catch (Exception e) {
373            log.warn("Error while deleting ftp file '{}' for file '{}'", ftpFileName, file.getName(), e);
374        } finally {
375            // try to disconnect before returning from method
376            try {
377                cm.logOut();
378            } catch (Exception e) {
379                log.warn("Unexpected error while logging out ", e);
380            }
381        }
382        log.debug("File '{}' deleted from ftp server. Cleanup finished.", ftpFileName);
383    }
384
385    /**
386     * Return a human-readable description of the object.
387     *
388     * @return description of object -- not machine readable
389     */
390    public String toString() {
391        return "RemoteFile '" + file.getName() + "' (#" + checksum + ")";
392    }
393
394    /**
395     * Get checksum for file, or null if checksums were not requested.
396     *
397     * @return checksum for file, or null if checksums were not requested.
398     */
399    public String getChecksum() {
400        return checksum;
401    }
402
403    /**
404     * Retrieval of the number of retries for retrieving a file from a FTP server. Returns the setting for number of
405     * retries.
406     *
407     * @return The number of retries for the FTP connection, defined in settings.
408     */
409    @Override
410    public int getNumberOfRetries() {
411        return FTP_RETRIES;
412    }
413
414    public static RemoteFileSettings getRemoteFileSettings() {
415        return new RemoteFileSettings(Settings.get(CommonSettings.FTP_SERVER_NAME),
416                Settings.getInt(CommonSettings.FTP_SERVER_PORT), Settings.get(CommonSettings.FTP_USER_NAME),
417                Settings.get(CommonSettings.FTP_USER_PASSWORD));
418    }
419
420}