001/*
002 * #%L
003 * Netarchivesuite - wayback
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.wayback;
024
025import java.io.IOException;
026import java.io.InputStream;
027import java.util.HashMap;
028import java.util.Map;
029
030import org.apache.commons.logging.Log;
031import org.apache.commons.logging.LogFactory;
032import org.archive.format.ArchiveFileConstants;
033import org.archive.io.ArchiveRecordHeader;
034import org.archive.io.arc.ARCRecord;
035import org.archive.io.arc.ARCRecordMetaData;
036import org.archive.wayback.ResourceStore;
037import org.archive.wayback.core.CaptureSearchResult;
038import org.archive.wayback.core.Resource;
039import org.archive.wayback.exception.ResourceNotAvailableException;
040import org.archive.wayback.resourcestore.resourcefile.ArcResource;
041
042import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
043import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
044import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
045
046/**
047 * This is the connector between netarchivesuite and wayback. And is based on PrototypeNetarchiveResourceStore.java
048 * which was made as a prototype connector.
049 */
050public class NetarchiveResourceStore implements ResourceStore {
051
052    /** JMS ArcRepositoryClient. */
053    protected ViewerArcRepositoryClient client;
054    
055    /** Logger. */
056    private Log logger = LogFactory.getLog(getClass().getName());
057
058    /**
059     * Constructor.
060     */
061    public NetarchiveResourceStore() {
062        client = ArcRepositoryClientFactory.getViewerInstance();
063    }
064
065    /**
066     * Transforms search result into a resource, according to the ResourceStore interface.
067     *
068     * @param captureSearchResult the search result.
069     * @return a valid resource containing metadata and a link to the ARC record.
070     * @throws ResourceNotAvailableException if something went wrong fetching record.
071     */
072    public Resource retrieveResource(CaptureSearchResult captureSearchResult) throws ResourceNotAvailableException {
073        long offset;
074        Map<String, Object> metadata = new HashMap<String, Object>();
075        ARCRecord arcRecord;
076        ArchiveRecordHeader arcRecordMetaData;
077
078        String filename = captureSearchResult.getFile();
079        try {
080            offset = captureSearchResult.getOffset();
081        } catch (NumberFormatException e) {
082            logger.error("Error looking for non existing resource", e);
083            throw new ResourceNotAvailableException("NetarchiveResourceStore "
084                    + "thows NumberFormatException when reading offset.");
085        } catch (NullPointerException e) {
086            logger.error("Error looking for non existing resource", e);
087            throw new ResourceNotAvailableException("NetarchiveResourceStore "
088                    + "throws NullPointerException when accessing " + "CaptureResult given from Wayback.");
089        }
090        logger.info("Received request for resource from file '" + filename + "' at offset '" + offset + "'");
091        BitarchiveRecord bitarchiveRecord = client.get(filename, offset);
092        if (bitarchiveRecord == null) {
093            throw new ResourceNotAvailableException("NetarchiveResourceStore: "
094                    + "Bitarchive didn't return the requested record.");
095        }
096        logger.info("Retrieved resource from file '" + filename + "' at offset '" + offset + "'");
097
098        // This InputStream is just the http-response, starting with the HTTP arcRecordMetaData.
099        InputStream is = bitarchiveRecord.getData();
100
101        metadata.put(ARCRecordMetaData.URL_FIELD_KEY, captureSearchResult.getOriginalUrl());
102        try {
103            metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, captureSearchResult.getOriginalHost());
104        } catch (NullPointerException ex) {
105            metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, "");
106        }
107        metadata.put(ARCRecordMetaData.DATE_FIELD_KEY, captureSearchResult.getCaptureDate().toString());
108        metadata.put(ARCRecordMetaData.MIMETYPE_FIELD_KEY, captureSearchResult.getMimeType());
109        metadata.put(ARCRecordMetaData.VERSION_FIELD_KEY, captureSearchResult.getHttpCode());
110        metadata.put(ARCRecordMetaData.ABSOLUTE_OFFSET_KEY, "" + offset);
111        metadata.put(ARCRecordMetaData.LENGTH_FIELD_KEY, "" + bitarchiveRecord.getLength());
112        metadata.put(ARCRecordMetaData.STATUSCODE_FIELD_KEY, captureSearchResult.getHttpCode());
113        metadata.put(ArchiveFileConstants.ORIGIN_FIELD_KEY, captureSearchResult.getOriginalUrl());
114        // create arcRecordMetaData.
115        try {
116            arcRecordMetaData = new ARCRecordMetaData(filename, metadata);
117        } catch (IOException e) {
118            logger.error("Could not create arcRecordMetaData", e);
119            throw new ResourceNotAvailableException(e.getMessage());
120        }
121
122        // create ARCRecord.
123        try {
124            arcRecord = new ARCRecord(is, arcRecordMetaData, 0, false, false, true);
125            //arcRecord.getHttpHeaders();
126            //arcRecord.skipHttpHeader();
127            logger.debug("ARCRecord created with code '" + arcRecord.getStatusCode() + "'");
128            logger.debug("Headers: " + arcRecord.getHeaderString());
129        } catch (NullPointerException e) {
130            logger.error("Could not create ARCRecord", e);
131            throw new ResourceNotAvailableException("ARC record doesn't contain" + " valid http URL");
132        } catch (IOException e) {
133            logger.error("Could not create ARCRecord", e);
134            throw new ResourceNotAvailableException(e.getMessage());
135        }
136        Resource resource = new ArcResource(arcRecord, null);
137        try {
138            //This call has the side-effect of queueing up the resource at the start of the response-body, after the http headers.
139            resource.parseHeaders();
140        } catch (IOException e) {
141            logger.debug(e);
142        }
143        logger.info("Returning resource '" + resource + "'");
144        return resource;
145    }
146
147    /**
148     * Shuts down this resource store, closing the arcrepository client.
149     *
150     * @throws IOException if an exception occurred while closing the client.
151     */
152    public void shutdown() throws IOException {
153        // Close JMS connection.
154        client.close();
155    }
156}