001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils.archive;
024
025import java.io.File;
026import java.text.DateFormat;
027import java.text.ParseException;
028import java.util.Collections;
029import java.util.Date;
030import java.util.HashMap;
031import java.util.Iterator;
032import java.util.Map;
033import java.util.Set;
034
035import org.archive.io.ArchiveRecord;
036import org.archive.io.ArchiveRecordHeader;
037import org.archive.io.arc.ARCRecord;
038import org.archive.io.warc.WARCRecord;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import dk.netarkivet.common.exceptions.ArgumentNotValid;
043
044/**
045 * Heritrix wrapper implementation of the abstract archive header interface.
046 */
047@SuppressWarnings({"unchecked"})
048public class HeritrixArchiveHeaderWrapper extends ArchiveHeaderBase {
049
050    /** The logger for this class. */
051    private static final Logger log = LoggerFactory.getLogger(HeritrixArchiveHeaderWrapper.class);
052
053    /** Reuse the sme WARC <code>DateFormat</code> object. */
054    protected DateFormat warcDateFormat = ArchiveDateConverter.getWarcDateFormat();
055
056    /** Reuse the same ARC <code>DateFormat</code> object. */
057    protected DateFormat arcDateFormat = ArchiveDateConverter.getArcDateFormat();
058
059    /** Wrapper Heritrix header. */
060    protected HeritrixArchiveRecordWrapper recordWrapper;
061
062    /** Original Heritrix header object. */
063    protected ArchiveRecordHeader header;
064
065    /**
066     * Map of header fields extracted from the Heritrix header. Only difference is that the keys are normalized to lower
067     * case.
068     */
069    protected Map<String, Object> headerFields = new HashMap<String, Object>();
070
071    /**
072     * Construct a Heritrix record header wrapper object.
073     *
074     * @param recordWrapper wrapped Heritrix header
075     * @param record original Heritrix record
076     * @return wrapped Heritrix record header
077     */
078    public static HeritrixArchiveHeaderWrapper wrapArchiveHeader(HeritrixArchiveRecordWrapper recordWrapper,
079            ArchiveRecord record) {
080        // ArgumentNotValid.checkNotNull(recordWrapper, "recordWrapper");
081        ArgumentNotValid.checkNotNull(record, "record");
082        HeritrixArchiveHeaderWrapper headerWrapper = new HeritrixArchiveHeaderWrapper();
083        headerWrapper.recordWrapper = recordWrapper;
084        headerWrapper.header = record.getHeader();
085        Map<String, Object> heritrixHeaderFields = (Map<String, Object>) headerWrapper.header.getHeaderFields();
086        Iterator<Map.Entry<String, Object>> iter = heritrixHeaderFields.entrySet().iterator();
087        Map.Entry<String, Object> entry;
088        while (iter.hasNext()) {
089            entry = iter.next();
090            headerWrapper.headerFields.put(entry.getKey().toLowerCase(), entry.getValue());
091        }
092        if (record instanceof ARCRecord) {
093            headerWrapper.bIsArc = true;
094        } else if (record instanceof WARCRecord) {
095            headerWrapper.bIsWarc = true;
096        } else {
097            throw new ArgumentNotValid("Unsupported ArchiveRecord type: " + record.getClass().getName());
098        }
099        return headerWrapper;
100    }
101
102    @Override
103    public Object getHeaderValue(String key) {
104        return headerFields.get(key.toLowerCase());
105    }
106
107    @Override
108    public String getHeaderStringValue(String key) {
109        Object tmpObj = headerFields.get(key.toLowerCase());
110        String str;
111        if (tmpObj != null) {
112            str = tmpObj.toString();
113        } else {
114            str = null;
115        }
116        return str;
117    }
118
119    @Override
120    public Set<String> getHeaderFieldKeys() {
121        return Collections.unmodifiableSet(headerFields.keySet());
122    }
123
124    @Override
125    public Map<String, Object> getHeaderFields() {
126        return Collections.unmodifiableMap(headerFields);
127    }
128
129    /*
130     * The following fields do not need converting.
131     */
132
133    @Override
134    public String getVersion() {
135        return header.getVersion();
136    }
137
138    @Override
139    public String getReaderIdentifier() {
140        return header.getReaderIdentifier();
141    }
142
143    @Override
144    public String getRecordIdentifier() {
145        return header.getRecordIdentifier();
146    }
147
148    @Override
149    public String getUrl() {
150        return header.getUrl();
151    }
152
153    @Override
154    public String getIp() {
155        Object tmpObj = getHeaderValue("WARC-IP-Address");
156        String ip;
157        if (tmpObj != null) {
158            ip = tmpObj.toString();
159        } else {
160            ip = null;
161        }
162        return ip;
163    }
164
165    @Override
166    public long getOffset() {
167        return header.getOffset();
168    }
169
170    @Override
171    public long getLength() {
172        return header.getLength();
173    }
174
175    /*
176     * Conversion required.
177     */
178
179    @Override
180    public Date getDate() {
181        String dateStr = header.getDate();
182        Date date = null;
183        try {
184            if (bIsArc) {
185                date = arcDateFormat.parse(dateStr);
186            } else if (bIsWarc) {
187                date = warcDateFormat.parse(dateStr);
188            }
189        } catch (ParseException e) {
190            log.info("Archive date could not be parsed: '{}'.", dateStr);
191        }
192        return date;
193    }
194
195    @Override
196    public String getArcDateStr() {
197        String dateStr = header.getDate();
198        if (bIsWarc) {
199            try {
200                Date warcDate = warcDateFormat.parse(dateStr);
201                dateStr = arcDateFormat.format(warcDate);
202                return dateStr;
203            } catch (Exception e) {
204                log.info("Archive date could not be parsed: {}.", dateStr);
205            }
206        }
207        return dateStr;
208    }
209
210    @Override
211    public String getMimetype() {
212        return header.getMimetype();
213    }
214
215    @Override
216    public File getArchiveFile() {
217        return new File(header.getReaderIdentifier());
218    }
219
220}