001package is.hi.bok.deduplicator; 002 003import java.text.DateFormat; 004import java.text.SimpleDateFormat; 005import java.util.TimeZone; 006 007/** 008 * 009 * TODO merge with dk.netarkivet.common.utils.archive.ArchiveDateConverter 010 */ 011public class ArchiveDateConverter { 012 /** ARC date format string as specified in the ARC documentation (14 digits) */ 013 public static final String ARC_DATE_FORMAT = "yyyyMMddHHmmss"; 014 015 /** WARC date format string as specified by the WARC ISO standard. */ 016 public static final String WARC_DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss'Z'"; 017 018 /** date format string used by Heritrix with 17 digits */ 019 public static final String HERITRIX_DATE_FORMAT = "yyyyMMddHHmmssSSS"; 020 021 /** ARC <code>DateFormat</code> as specified in the ARC documentation. */ 022 private final DateFormat arcDateFormat; 023 024 /** WARC <code>DateFormat</code> as specified in the WARC ISO standard. */ 025 private final DateFormat warcDateFormat; 026 027 /** code>DateFormat</code> as used by Heritrix */ 028 private final DateFormat d17DateFormat; 029 030 031 /** 032 * Creates a new <code>ArchiveDate</code>. 033 */ 034 private ArchiveDateConverter() { 035 arcDateFormat = new SimpleDateFormat(ARC_DATE_FORMAT); 036 arcDateFormat.setLenient(false); 037 arcDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); 038 warcDateFormat = new SimpleDateFormat(WARC_DATE_FORMAT); 039 warcDateFormat.setLenient(false); 040 warcDateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); 041 d17DateFormat = new SimpleDateFormat(HERITRIX_DATE_FORMAT); 042 d17DateFormat.setLenient(false); 043 d17DateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); 044 } 045 046 /** 047 * <code>DateFormat</code> is not thread safe, so we wrap its construction inside a <code>ThreadLocal</code> object. 048 */ 049 private static final ThreadLocal<ArchiveDateConverter> DateParserTL = new ThreadLocal<ArchiveDateConverter>() { 050 @Override 051 public ArchiveDateConverter initialValue() { 052 return new ArchiveDateConverter(); 053 } 054 }; 055 056 /** 057 * Returns a <code>DateFormat</code> object for ARC date conversion. 058 * 059 * @return a <code>DateFormat</code> object for ARC date conversion 060 */ 061 public static DateFormat getArcDateFormat() { 062 return DateParserTL.get().arcDateFormat; 063 } 064 065 /** 066 * Returns a <code>DateFormat</code> object for WARC date conversion. 067 * 068 * @return a <code>DateFormat</code> object for WARC date conversion 069 */ 070 public static DateFormat getWarcDateFormat() { 071 return DateParserTL.get().warcDateFormat; 072 } 073 074 /** 075 * Returns a <code>DateFormat</code> object for Heritrix 17-digit date conversion 076 * 077 * @return a <code>DateFormat</code> object for WARC date conversion 078 */ 079 public static DateFormat getHeritrixDateFormat() { 080 return DateParserTL.get().d17DateFormat; 081 } 082}