001package dk.netarkivet.common.utils.service;
002
003import java.io.IOException;
004import java.io.InputStream;
005import java.net.URI;
006import java.nio.file.Paths;
007
008import org.apache.http.HttpEntity;
009import org.apache.http.client.methods.CloseableHttpResponse;
010import org.apache.http.client.methods.HttpUriRequest;
011import org.apache.http.impl.client.CloseableHttpClient;
012import org.archive.io.ArchiveReader;
013import org.archive.io.ArchiveReaderFactory;
014import org.archive.io.ArchiveRecord;
015import org.slf4j.Logger;
016import org.slf4j.LoggerFactory;
017
018import dk.netarkivet.common.CommonSettings;
019import dk.netarkivet.common.distribute.arcrepository.BitarchiveRecord;
020import dk.netarkivet.common.exceptions.ArgumentNotValid;
021import dk.netarkivet.common.utils.HttpsClientBuilder;
022import dk.netarkivet.common.utils.Settings;
023
024public class WarcRecordClient {
025    private static final Logger log = LoggerFactory.getLogger(WarcRecordClient.class);
026    private static final HttpsClientBuilder clientBuilder;
027    private final URI baseUri;
028    private long offset;
029
030    static {
031        String privateKeyFile = Settings.get(CommonSettings.WRS_KEYFILE);
032        clientBuilder = new HttpsClientBuilder(privateKeyFile);
033    }
034
035    public WarcRecordClient(URI baseUri) {
036        this.baseUri = baseUri;
037    }
038
039    /**
040     * Retrieves a single BitarchiveRecord from the repository from a given file and offset. If the operation fails for
041     * any reason, this method returns null.
042     *
043     * @param arcfileName Name of the arcfile/warcfile to retrieve.
044     * @param index offset to fetch specific record from warc or arc file
045     */
046    public BitarchiveRecord getBitarchiveRecord(String arcfileName, long index) {
047        BitarchiveRecord bitarchiveRecord = null;
048        try {
049            ArgumentNotValid.checkNotNullOrEmpty(arcfileName, "arcfile");
050            ArgumentNotValid.checkNotNegative(index, "index");
051
052            log.debug("Requesting get of record '{}:{}'", arcfileName, index);
053
054            String strUri = this.getBaseUri().toString() + "/" + arcfileName;
055
056            URI uri = new URI(strUri);
057            bitarchiveRecord = this.fetchBitarchiveRecord(uri, index);
058        } catch (Exception e) {
059            log.error("Failed to retrieve record at offset {} from file {}.", index, arcfileName, e);
060        }
061        return bitarchiveRecord;
062    }
063
064    /**
065     * Uses WarcRecordClient to call ApacheHttpClient
066     *
067     * @param uri    Uniform Resource Identifier including base uri and name of file
068     * @param offset offset to fetch specific record from warc file index must be the same as the offset that ends up in
069     *               the range header
070     * @throws ArgumentNotValid              if arcfilename is null or empty, or if toFile is null
071     * @throws IOException                   if reading file fails
072     * @throws UnsupportedOperationException is used if method is not implemented
073     */
074    private BitarchiveRecord fetchBitarchiveRecord(URI uri, long offset) throws Exception {
075        String fileName = Paths.get(uri.getPath()).getFileName().toString();
076        log.debug("fileName: " + fileName);
077        CGIRequestBuilder requestBuilder = new CGIRequestBuilder(uri);
078        HttpUriRequest request = requestBuilder.buildWRSRequest(offset);
079        CloseableHttpClient closableHttpClient = clientBuilder.getHttpsClient();
080
081        log.debug("Executing request " + request.getRequestLine());
082        try (CloseableHttpResponse httpResponse = closableHttpClient.execute(request)) {
083            log.debug("httpResponse status: " + httpResponse.getStatusLine().toString());
084            if (httpResponse.getStatusLine().getStatusCode() != 200) {
085                log.error("Http request error " + httpResponse.getStatusLine().getStatusCode());
086                return null;
087            }
088            HttpEntity entity = httpResponse.getEntity();
089            if (entity != null) {
090                //Note that data that comes back from WarcRecordService has been decompressed so to get the
091                //right arc/warc parser from the ArchiveReaderFactory we have to give it the name of the
092                //uncompressed file.
093                final String inflatedName = fileName.replace(".gz", "");
094                InputStream iStr = entity.getContent();
095                boolean atFirst = (offset == 0L);
096                ArchiveReader archiveReader = ArchiveReaderFactory.get(inflatedName, iStr, atFirst);
097                ArchiveRecord archiveRecord = archiveReader.get();
098                BitarchiveRecord reply = new BitarchiveRecord(archiveRecord, fileName);
099                log.debug("reply: " + reply.toString());
100                return reply;
101            } else {
102                log.warn("Received null response entity for request for {}, {}", uri, offset);
103                return null;
104            }
105        }
106    }
107
108    public URI getBaseUri() {
109        return baseUri;
110    }
111
112    public long getOffset() {
113        return offset;
114    }
115
116    public void setOffset(long offset) {
117        this.offset = offset;
118    }
119}