package org.archive.modules.writer;

import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.io.ReplayInputStream;
import org.archive.modules.CrawlURI;
import org.archive.modules.Processor;
import org.archive.modules.fetcher.FetchHTTP;
import org.archive.modules.net.CrawlHost;
import org.archive.modules.net.ServerCache;
import org.archive.spring.ConfigPath;
import org.archive.util.FileUtils;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:org/archive/modules/writer/Kw3WriterProcessor.class */
public class Kw3WriterProcessor extends Processor {
    private static final long serialVersionUID = 3;
    public static final String ATTR_MAX_BYTES_WRITTEN = "total-bytes-to-write";
    protected ServerCache serverCache;
    private static String COLON = ":";
    private static String WS = " ";
    private static String LF = "\n";
    private static final Logger logger = Logger.getLogger(Kw3WriterProcessor.class.getName());
    private static String BOUNDARY_START = "KulturArw3_";
    protected ConfigPath path = new ConfigPath("Kw3Writer subdirectory", "arcs");
    protected long maxFileSizeBytes = 100000000;
    protected boolean chmod = false;
    protected String chmodValue = "777";
    protected String collection = "kw3";
    protected String harvester = "heritrix";

    public ConfigPath getPath() {
        return this.path;
    }

    public void setPath(ConfigPath configPath) {
        this.path = configPath;
    }

    public long getMaxFileSizeBytes() {
        return this.maxFileSizeBytes;
    }

    public void setMaxFileSizeBytes(long j) {
        this.maxFileSizeBytes = j;
    }

    public boolean getChmod() {
        return this.chmod;
    }

    public void setChmod(boolean z) {
        this.chmod = z;
    }

    public String getChmodValue() {
        return this.chmodValue;
    }

    public void setChmodValue(String str) {
        this.chmodValue = str;
    }

    public String getCollection() {
        return this.collection;
    }

    public void setCollection(String str) {
        this.collection = str;
    }

    public String getHarvester() {
        return this.harvester;
    }

    public void setHarvester(String str) {
        this.harvester = str;
    }

    public ServerCache getServerCache() {
        return this.serverCache;
    }

    @Autowired
    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }

    @Override // org.archive.modules.Processor
    protected boolean shouldProcess(CrawlURI crawlURI) {
        if (!isSuccess(crawlURI)) {
            return false;
        }
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        return FetchHTTP.HTTP_SCHEME.equalsIgnoreCase(lowerCase) || FetchHTTP.HTTPS_SCHEME.equalsIgnoreCase(lowerCase);
    }

    @Override // org.archive.modules.Processor
    protected void innerProcess(CrawlURI crawlURI) {
        try {
            writeMimeFile(crawlURI);
        } catch (IOException e) {
            logger.log(Level.WARNING, "i/o error", (Throwable) e);
        }
    }

    protected void writeMimeFile(CrawlURI crawlURI) throws IOException {
        ReplayInputStream replayInputStream = null;
        OutputStream outputStream = null;
        try {
            String str = BOUNDARY_START + stringToMD5(crawlURI.toString());
            replayInputStream = crawlURI.getRecorder().getRecordedInput().getReplayInputStream();
            outputStream = initOutputStream(crawlURI);
            writeArchiveInfoPart(str, crawlURI, replayInputStream, outputStream);
            writeHeaderPart(str, replayInputStream, outputStream);
            writeContentPart(str, crawlURI, replayInputStream, outputStream);
            outputStream.write(("\n--" + str + "--\n").getBytes());
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            if (outputStream != null) {
                outputStream.close();
            }
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            if (outputStream != null) {
                outputStream.close();
            }
            throw th;
        }
    }

    protected OutputStream initOutputStream(CrawlURI crawlURI) throws IOException {
        String crawlURI2 = crawlURI.toString();
        int port = crawlURI.getUURI().getPort();
        String host = (port == 80 || port <= 0) ? crawlURI.getUURI().getHost() : crawlURI.getUURI().getHost() + ":" + port;
        long fetchBeginTime = crawlURI.getFetchBeginTime() / 1000;
        File file = new File(getPath().getFile(), stringToMD5(host).substring(0, 2) + "/" + host + "/current");
        if (!file.exists()) {
            FileUtils.ensureWriteableDirectory(file);
            if (this.chmod) {
                chmods(file, getPath().getFile());
            }
        }
        return new FastBufferedOutputStream(new FileOutputStream(new File(file, stringToMD5(crawlURI2) + "." + fetchBeginTime)));
    }

    protected void writeArchiveInfoPart(String str, CrawlURI crawlURI, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        String crawlURI2 = crawlURI.toString();
        String hostAddress = getHostAddress(crawlURI);
        long headerSize = replayInputStream.getHeaderSize();
        long contentSize = replayInputStream.getContentSize();
        long currentTimeMillis = System.currentTimeMillis() / 1000;
        int fetchStatus = crawlURI.getFetchStatus();
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        replayInputStream.readHeaderTo(byteArrayOutputStream);
        String stringToMD5 = stringToMD5(byteArrayOutputStream.toString());
        byte[] contentDigest = crawlURI.getContentDigest();
        if (contentDigest != null) {
            contentDigest = getHexString(contentDigest);
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("MIME-version: 1.1" + LF);
        stringBuffer.append("Content-Type: multipart/mixed; boundary=" + str + LF);
        stringBuffer.append("HTTP-Part: ArchiveInfo" + LF);
        stringBuffer.append(Kw3Constants.COLLECTION_KEY + COLON + WS + this.collection + LF);
        stringBuffer.append(Kw3Constants.HARVESTER_KEY + COLON + WS + this.harvester + LF);
        stringBuffer.append(Kw3Constants.URL_KEY + COLON + WS + crawlURI2 + LF);
        stringBuffer.append(Kw3Constants.IP_ADDRESS_KEY + COLON + WS + hostAddress + LF);
        stringBuffer.append(Kw3Constants.HEADER_LENGTH_KEY + COLON + WS + headerSize + LF);
        stringBuffer.append(Kw3Constants.HEADER_MD5_KEY + COLON + WS + stringToMD5 + LF);
        stringBuffer.append(Kw3Constants.CONTENT_LENGTH_KEY + COLON + WS + contentSize + LF);
        stringBuffer.append(Kw3Constants.CONTENT_MD5_KEY + COLON + WS + contentDigest + LF);
        stringBuffer.append(Kw3Constants.ARCHIVE_TIME_KEY + COLON + WS + currentTimeMillis + LF);
        stringBuffer.append(Kw3Constants.STATUS_CODE_KEY + COLON + WS + fetchStatus + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
    }

    protected void writeHeaderPart(String str, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("--" + str + LF);
        stringBuffer.append("Content-Type: text/plain; charset=\"US-ascii\"" + LF);
        stringBuffer.append("HTTP-Part: Header" + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
        replayInputStream.readHeaderTo(outputStream);
    }

    protected void writeContentPart(String str, CrawlURI crawlURI, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        String crawlURI2 = crawlURI.toString();
        String contentType = crawlURI.getContentType();
        long contentSize = replayInputStream.getContentSize();
        if (contentSize == 0) {
            return;
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("--" + str + LF);
        stringBuffer.append("Content-Type: " + contentType + LF);
        stringBuffer.append("HTTP-Part: Content" + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
        if (contentSize <= getMaxFileSizeBytes()) {
            replayInputStream.readContentTo(outputStream);
        } else {
            replayInputStream.readContentTo(outputStream, getMaxFileSizeBytes());
            logger.info(" Truncated url: " + crawlURI2 + ", Size: " + contentSize + ", Content-type: " + contentType);
        }
    }

    private String stringToMD5(String str) {
        try {
            byte[] bytes = str.getBytes();
            MessageDigest messageDigest = MessageDigest.getInstance("MD5");
            messageDigest.update(bytes);
            return getHexString(messageDigest.digest());
        } catch (NoSuchAlgorithmException e) {
            logger.log(Level.WARNING, "md5 error", (Throwable) e);
            return null;
        }
    }

    private String getHexString(byte[] bArr) {
        StringBuffer stringBuffer = new StringBuffer();
        for (byte b : bArr) {
            String hexString = Integer.toHexString(b & 255);
            if (hexString.length() < 2) {
                stringBuffer.append("0" + hexString);
            } else {
                stringBuffer.append(hexString);
            }
        }
        return stringBuffer.toString();
    }

    private void chmods(File file, File file2) {
        String absolutePath = file2.getAbsolutePath();
        chmod(file, this.chmodValue);
        File parentFile = file.getParentFile();
        while (true) {
            File file3 = parentFile;
            if (file3.getAbsolutePath().equalsIgnoreCase(absolutePath)) {
                return;
            }
            chmod(file3, this.chmodValue);
            parentFile = file3.getParentFile();
        }
    }

    private void chmod(File file, String str) {
        try {
            Process exec = Runtime.getRuntime().exec("chmod " + str + " " + file.getAbsolutePath());
            exec.waitFor();
            exec.getInputStream().close();
            exec.getOutputStream().close();
            exec.getErrorStream().close();
        } catch (IOException e) {
            logger.log(Level.WARNING, "chmod failed", (Throwable) e);
        } catch (InterruptedException e2) {
            logger.log(Level.WARNING, "chmod failed", (Throwable) e2);
        }
    }

    private String getHostAddress(CrawlURI crawlURI) {
        CrawlHost hostFor = this.serverCache.getHostFor(crawlURI.getUURI());
        if (hostFor == null) {
            throw new NullPointerException("Crawlhost is null for " + crawlURI + " " + crawlURI.getVia());
        }
        if (hostFor.getIP() == null) {
            throw new NullPointerException("Address is null for " + crawlURI + " " + crawlURI.getVia() + ". Address " + (hostFor.getIpFetched() == -2 ? "was never looked up." : (System.currentTimeMillis() - hostFor.getIpFetched()) + " ms ago."));
        }
        return hostFor.getIP().getHostAddress();
    }
}
