001package dk.netarkivet.common.utils.service; 002 003import java.io.IOException; 004import java.io.InputStream; 005import java.net.URI; 006import java.nio.file.Paths; 007 008import org.apache.http.HttpEntity; 009import org.apache.http.client.methods.CloseableHttpResponse; 010import org.apache.http.client.methods.HttpUriRequest; 011import org.apache.http.impl.client.CloseableHttpClient; 012import org.archive.io.ArchiveReader; 013import org.archive.io.ArchiveReaderFactory; 014import org.archive.io.ArchiveRecord; 015import org.slf4j.Logger; 016import org.slf4j.LoggerFactory; 017 018import dk.netarkivet.common.CommonSettings; 019import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord; 020import dk.netarkivet.common.exceptions.ArgumentNotValid; 021import dk.netarkivet.common.utils.HttpsClientBuilder; 022import dk.netarkivet.common.utils.Settings; 023 024public class WarcRecordClient { 025 private static final Logger log = LoggerFactory.getLogger(WarcRecordClient.class); 026 private static final HttpsClientBuilder clientBuilder; 027 private final URI baseUri; 028 private long offset; 029 030 static { 031 String privateKeyFile = Settings.get(CommonSettings.WRS_KEYFILE); 032 clientBuilder = new HttpsClientBuilder(privateKeyFile); 033 } 034 035 public WarcRecordClient(URI baseUri) { 036 this.baseUri = baseUri; 037 } 038 039 /** 040 * Retrieves a single BitarchiveRecord from the repository from a given file and offset. If the operation fails for 041 * any reason, this method returns null. 042 * 043 * @param arcfileName Name of the arcfile/warcfile to retrieve. 044 * @param index offset to fetch specific record from warc or arc file 045 */ 046 public BitarchiveRecord getBitarchiveRecord(String arcfileName, long index) { 047 BitarchiveRecord bitarchiveRecord = null; 048 try { 049 ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfile"); 050 ArgumentNotValid.checkNotNegative(index, "index"); 051 052 log.debug("Requesting get of record '{}:{}'", arcfileName, index); 053 054 String strUri = this.getBaseUri().toString() + "/" + arcfileName; 055 056 URI uri = new URI(strUri); 057 bitarchiveRecord = this.fetchBitarchiveRecord(uri, index); 058 } catch (Exception e) { 059 log.error("Failed to retrieve record at offset {} from file {}.", index, arcfileName, e); 060 } 061 return bitarchiveRecord; 062 } 063 064 /** 065 * Uses WarcRecordClient to call ApacheHttpClient 066 * 067 * @param uri Uniform Resource Identifier including base uri and name of file 068 * @param offset offset to fetch specific record from warc file index must be the same as the offset that ends up in 069 * the range header 070 * @throws ArgumentNotValid if arcfilename is null or empty, or if toFile is null 071 * @throws IOException if reading file fails 072 * @throws UnsupportedOperationException is used if method is not implemented 073 */ 074 private BitarchiveRecord fetchBitarchiveRecord(URI uri, long offset) throws Exception { 075 String fileName = Paths.get(uri.getPath()).getFileName().toString(); 076 log.debug("fileName: " + fileName); 077 CGIRequestBuilder requestBuilder = new CGIRequestBuilder(uri); 078 HttpUriRequest request = requestBuilder.buildWRSRequest(offset); 079 CloseableHttpClient closableHttpClient = clientBuilder.getHttpsClient(); 080 081 log.debug("Executing request " + request.getRequestLine()); 082 try (CloseableHttpResponse httpResponse = closableHttpClient.execute(request)) { 083 log.debug("httpResponse status: " + httpResponse.getStatusLine().toString()); 084 if (httpResponse.getStatusLine().getStatusCode() != 200) { 085 log.error("Http request error " + httpResponse.getStatusLine().getStatusCode()); 086 return null; 087 } 088 HttpEntity entity = httpResponse.getEntity(); 089 if (entity != null) { 090 //Note that data that comes back from WarcRecordService has been decompressed so to get the 091 //right arc/warc parser from the ArchiveReaderFactory we have to give it the name of the 092 //uncompressed file. 093 final String inflatedName = fileName.replace(".gz", ""); 094 InputStream iStr = entity.getContent(); 095 boolean atFirst = (offset == 0L); 096 ArchiveReader archiveReader = ArchiveReaderFactory.get(inflatedName, iStr, atFirst); 097 ArchiveRecord archiveRecord = archiveReader.get(); 098 BitarchiveRecord reply = new BitarchiveRecord(archiveRecord, fileName); 099 log.debug("reply: " + reply.toString()); 100 return reply; 101 } else { 102 log.warn("Received null response entity for request for {}, {}", uri, offset); 103 return null; 104 } 105 } 106 } 107 108 public URI getBaseUri() { 109 return baseUri; 110 } 111 112 public long getOffset() { 113 return offset; 114 } 115 116 public void setOffset(long offset) { 117 this.offset = offset; 118 } 119}