001/* 002 * #%L 003 * Netarchivesuite - wayback 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.wayback; 024 025import java.io.IOException; 026import java.io.InputStream; 027import java.util.HashMap; 028import java.util.Map; 029 030import org.apache.commons.logging.Log; 031import org.apache.commons.logging.LogFactory; 032import org.archive.format.ArchiveFileConstants; 033import org.archive.io.ArchiveRecordHeader; 034import org.archive.io.arc.ARCRecord; 035import org.archive.io.arc.ARCRecordMetaData; 036import org.archive.wayback.ResourceStore; 037import org.archive.wayback.core.CaptureSearchResult; 038import org.archive.wayback.core.Resource; 039import org.archive.wayback.exception.ResourceNotAvailableException; 040import org.archive.wayback.resourcestore.resourcefile.ArcResource; 041 042import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClient; 043import dk.netarkivet.common.distribute.arcrepository.ArcRepositoryClientFactory; 044import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord; 045import dk.netarkivet.common.distribute.arcrepository.ViewerArcRepositoryClient; 046 047/** 048 * This is the connector between netarchivesuite and wayback. And is based on PrototypeNetarchiveResourceStore.java 049 * which was made as a prototype connector. 050 */ 051public class NetarchiveResourceStore implements ResourceStore { 052 053 /** JMS ArcRepositoryClient. */ 054 protected ViewerArcRepositoryClient client; 055 056 /** Logger. */ 057 private Log logger = LogFactory.getLog(getClass().getName()); 058 059 /** 060 * Constructor. 061 */ 062 public NetarchiveResourceStore() { 063 client = ArcRepositoryClientFactory.getViewerInstance(); 064 } 065 066 /** 067 * Transforms search result into a resource, according to the ResourceStore interface. 068 * 069 * @param captureSearchResult the search result. 070 * @return a valid resource containing metadata and a link to the ARC record. 071 * @throws ResourceNotAvailableException if something went wrong fetching record. 072 */ 073 public Resource retrieveResource(CaptureSearchResult captureSearchResult) throws ResourceNotAvailableException { 074 long offset; 075 Map<String, Object> metadata = new HashMap<String, Object>(); 076 ARCRecord arcRecord; 077 ArchiveRecordHeader arcRecordMetaData; 078 079 String filename = captureSearchResult.getFile(); 080 try { 081 offset = captureSearchResult.getOffset(); 082 } catch (NumberFormatException e) { 083 logger.error("Error looking for non existing resource", e); 084 throw new ResourceNotAvailableException("NetarchiveResourceStore " 085 + "thows NumberFormatException when reading offset."); 086 } catch (NullPointerException e) { 087 logger.error("Error looking for non existing resource", e); 088 throw new ResourceNotAvailableException("NetarchiveResourceStore " 089 + "throws NullPointerException when accessing " + "CaptureResult given from Wayback."); 090 } 091 logger.info("Received request for resource from file '" + filename + "' at offset '" + offset + "'"); 092 BitarchiveRecord bitarchiveRecord = client.get(filename, offset); 093 if (bitarchiveRecord == null) { 094 logger.warn(client.getClass() + " instance returned null. Connection may have been closed. Trying to refresh instance."); 095 client.close(); 096 client = ArcRepositoryClientFactory.getViewerInstance(); 097 bitarchiveRecord = client.get(filename, offset); 098 if (bitarchiveRecord != null) { 099 logger.info("Success! Reconnecting " + client.getClass() + " worked."); 100 } 101 if (bitarchiveRecord == null) { 102 throw new ResourceNotAvailableException("NetarchiveResourceStore: " 103 + "Bitarchive didn't return the requested record."); 104 } 105 } 106 logger.info("Retrieved resource from file '" + filename + "' at offset '" + offset + "'"); 107 108 // This InputStream is just the http-response, starting with the HTTP arcRecordMetaData. 109 InputStream is = bitarchiveRecord.getData(); 110 111 metadata.put(ARCRecordMetaData.URL_FIELD_KEY, captureSearchResult.getOriginalUrl()); 112 try { 113 metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, captureSearchResult.getOriginalHost()); 114 } catch (NullPointerException ex) { 115 metadata.put(ARCRecordMetaData.IP_HEADER_FIELD_KEY, ""); 116 } 117 metadata.put(ARCRecordMetaData.DATE_FIELD_KEY, captureSearchResult.getCaptureDate().toString()); 118 metadata.put(ARCRecordMetaData.MIMETYPE_FIELD_KEY, captureSearchResult.getMimeType()); 119 metadata.put(ARCRecordMetaData.VERSION_FIELD_KEY, captureSearchResult.getHttpCode()); 120 metadata.put(ARCRecordMetaData.ABSOLUTE_OFFSET_KEY, "" + offset); 121 metadata.put(ARCRecordMetaData.LENGTH_FIELD_KEY, "" + bitarchiveRecord.getLength()); 122 metadata.put(ARCRecordMetaData.STATUSCODE_FIELD_KEY, captureSearchResult.getHttpCode()); 123 metadata.put(ArchiveFileConstants.ORIGIN_FIELD_KEY, captureSearchResult.getOriginalUrl()); 124 // create arcRecordMetaData. 125 try { 126 arcRecordMetaData = new ARCRecordMetaData(filename, metadata); 127 } catch (IOException e) { 128 logger.error("Could not create arcRecordMetaData", e); 129 throw new ResourceNotAvailableException(e.getMessage()); 130 } 131 132 // create ARCRecord. 133 try { 134 arcRecord = new ARCRecord(is, arcRecordMetaData, 0, false, false, true); 135 //arcRecord.getHttpHeaders(); 136 //arcRecord.skipHttpHeader(); 137 logger.debug("ARCRecord created with code '" + arcRecord.getStatusCode() + "'"); 138 logger.debug("Headers: " + arcRecord.getHeaderString()); 139 } catch (NullPointerException e) { 140 logger.error("Could not create ARCRecord", e); 141 throw new ResourceNotAvailableException("ARC record doesn't contain" + " valid http URL"); 142 } catch (IOException e) { 143 logger.error("Could not create ARCRecord", e); 144 throw new ResourceNotAvailableException(e.getMessage()); 145 } 146 Resource resource = new ArcResource(arcRecord, null); 147 try { 148 //This call has the side-effect of queueing up the resource at the start of the response-body, after the http headers. 149 resource.parseHeaders(); 150 } catch (IOException e) { 151 logger.debug(e); 152 } 153 logger.info("Returning resource '" + resource + "'"); 154 return resource; 155 } 156 157 /** 158 * Shuts down this resource store, closing the arcrepository client. 159 * 160 * @throws IOException if an exception occurred while closing the client. 161 */ 162 public void shutdown() throws IOException { 163 // Close JMS connection. 164 logger.info("Closing JMSConnection for " + this.getClass()); 165 client.close(); 166 } 167}