001/*
002 * #%L
003 * Netarchivesuite - wayback
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.wayback;
024
025import java.io.IOException;
026import java.io.InputStream;
027import java.util.HashMap;
028import java.util.Map;
029
030import org.apache.commons.logging.Log;
031import org.apache.commons.logging.LogFactory;
032import org.archive.format.ArchiveFileConstants;
033import org.archive.io.ArchiveRecordHeader;
034import org.archive.io.arc.ARCRecord;
035import org.archive.io.arc.ARCRecordMetaData;
036import org.archive.wayback.ResourceStore;
037import org.archive.wayback.core.CaptureSearchResult;
038import org.archive.wayback.core.Resource;
039import org.archive.wayback.exception.ResourceNotAvailableException;
040import org.archive.wayback.resourcestore.resourcefile.ArcResource;
041
042import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClient;
043import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory;
044import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
045import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient;
046
047/**
048 * This is the connector between netarchivesuite and wayback. And is based on PrototypeNetarchiveResourceStore.java
049 * which was made as a prototype connector.
050 */
051public class NetarchiveResourceStore implements ResourceStore {
052
053    /** JMS ArcRepositoryClient. */
054    protected ViewerArcRepositoryClient client;
055    
056    /** Logger. */
057    private Log logger = LogFactory.getLog(getClass().getName());
058
059    /**
060     * Constructor.
061     */
062    public NetarchiveResourceStore() {
063        client = ArcRepositoryClientFactory.getViewerInstance();
064    }
065
066    /**
067     * Transforms search result into a resource, according to the ResourceStore interface.
068     *
069     * @param captureSearchResult the search result.
070     * @return a valid resource containing metadata and a link to the ARC record.
071     * @throws ResourceNotAvailableException if something went wrong fetching record.
072     */
073    public Resource retrieveResource(CaptureSearchResult captureSearchResult) throws ResourceNotAvailableException {
074        long offset;
075        Map<String, Object> metadata = new HashMap<String, Object>();
076        ARCRecord arcRecord;
077        ArchiveRecordHeader arcRecordMetaData;
078
079        String filename = captureSearchResult.getFile();
080        try {
081            offset = captureSearchResult.getOffset();
082        } catch (NumberFormatException e) {
083            logger.error("Error looking for non existing resource", e);
084            throw new ResourceNotAvailableException("NetarchiveResourceStore "
085                    + "thows NumberFormatException when reading offset.");
086        } catch (NullPointerException e) {
087            logger.error("Error looking for non existing resource", e);
088            throw new ResourceNotAvailableException("NetarchiveResourceStore "
089                    + "throws NullPointerException when accessing " + "CaptureResult given from Wayback.");
090        }
091        logger.info("Received request for resource from file '" + filename + "' at offset '" + offset + "'");
092        BitarchiveRecord bitarchiveRecord = client.get(filename, offset);
093        if (bitarchiveRecord == null) {
094            logger.warn(client.getClass() + " instance returned null. Connection may have been closed. Trying to refresh instance.");
095            client.close();
096            client = ArcRepositoryClientFactory.getViewerInstance();
097            bitarchiveRecord = client.get(filename, offset);
098            if (bitarchiveRecord != null) {
099                logger.info("Success! Reconnecting " + client.getClass() + " worked.");
100            }
101            if (bitarchiveRecord == null) {
102                throw new ResourceNotAvailableException("NetarchiveResourceStore: "
103                        + "Bitarchive didn't return the requested record.");
104            }
105        }
106        logger.info("Retrieved resource from file '" + filename + "' at offset '" + offset + "'");
107
108        // This InputStream is just the http-response, starting with the HTTP arcRecordMetaData.
109        InputStream is = bitarchiveRecord.getData();
110
111        metadata.put(ARCRecordMetaData.URL_FIELD_KEY, captureSearchResult.getOriginalUrl());
112        try {
113            metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, captureSearchResult.getOriginalHost());
114        } catch (NullPointerException ex) {
115            metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, "");
116        }
117        metadata.put(ARCRecordMetaData.DATE_FIELD_KEY, captureSearchResult.getCaptureDate().toString());
118        metadata.put(ARCRecordMetaData.MIMETYPE_FIELD_KEY, captureSearchResult.getMimeType());
119        metadata.put(ARCRecordMetaData.VERSION_FIELD_KEY, captureSearchResult.getHttpCode());
120        metadata.put(ARCRecordMetaData.ABSOLUTE_OFFSET_KEY, "" + offset);
121        metadata.put(ARCRecordMetaData.LENGTH_FIELD_KEY, "" + bitarchiveRecord.getLength());
122        metadata.put(ARCRecordMetaData.STATUSCODE_FIELD_KEY, captureSearchResult.getHttpCode());
123        metadata.put(ArchiveFileConstants.ORIGIN_FIELD_KEY, captureSearchResult.getOriginalUrl());
124        // create arcRecordMetaData.
125        try {
126            arcRecordMetaData = new ARCRecordMetaData(filename, metadata);
127        } catch (IOException e) {
128            logger.error("Could not create arcRecordMetaData", e);
129            throw new ResourceNotAvailableException(e.getMessage());
130        }
131
132        // create ARCRecord.
133        try {
134            arcRecord = new ARCRecord(is, arcRecordMetaData, 0, false, false, true);
135            //arcRecord.getHttpHeaders();
136            //arcRecord.skipHttpHeader();
137            logger.debug("ARCRecord created with code '" + arcRecord.getStatusCode() + "'");
138            logger.debug("Headers: " + arcRecord.getHeaderString());
139        } catch (NullPointerException e) {
140            logger.error("Could not create ARCRecord", e);
141            throw new ResourceNotAvailableException("ARC record doesn't contain" + " valid http URL");
142        } catch (IOException e) {
143            logger.error("Could not create ARCRecord", e);
144            throw new ResourceNotAvailableException(e.getMessage());
145        }
146        Resource resource = new ArcResource(arcRecord, null);
147        try {
148            //This call has the side-effect of queueing up the resource at the start of the response-body, after the http headers.
149            resource.parseHeaders();
150        } catch (IOException e) {
151            logger.debug(e);
152        }
153        logger.info("Returning resource '" + resource + "'");
154        return resource;
155    }
156
157    /**
158     * Shuts down this resource store, closing the arcrepository client.
159     *
160     * @throws IOException if an exception occurred while closing the client.
161     */
162    public void shutdown() throws IOException {
163        // Close JMS connection.
164        logger.info("Closing JMSConnection for " + this.getClass());
165        client.close();
166    }
167}