package org.archive.crawler.migrate;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.archive.util.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/archive/crawler/migrate/MigrateH1to3Tool.class */
public class MigrateH1to3Tool {
    protected Document sourceOrderXmlDom;
    protected static DocumentBuilder DOCUMENT_BUILDER;

    public static void main(String[] strArr) throws Exception {
        new MigrateH1to3Tool().instanceMain(strArr);
    }

    public void instanceMain(String[] strArr) throws Exception {
        if (strArr.length != 2) {
            printHelp();
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        File file = new File(str);
        if (!file.isFile()) {
            System.err.println("ERROR sourceOrderXmlFileArg is not a file: " + str);
            System.exit(1);
        }
        File file2 = new File(str2);
        FileUtils.ensureWriteableDirectory(file2);
        System.out.println("H1 source: " + file.getAbsolutePath());
        System.out.println("H3 destination: " + file2.getAbsolutePath());
        System.out.print("Migrating settings...");
        InputStream resourceAsStream = getClass().getResourceAsStream("/org/archive/crawler/migrate/migrate-template-crawler-beans.cxml");
        String iOUtils = IOUtils.toString(resourceAsStream);
        resourceAsStream.close();
        Map<String, String> migrateMap = getMigrateMap();
        try {
            this.sourceOrderXmlDom = DOCUMENT_BUILDER.parse(file);
        } catch (SAXException e) {
            System.err.println("ERROR caught exception parsing input file: " + e.getMessage() + "\n");
            e.printStackTrace();
        }
        Map<String, String> flattenH1Order = flattenH1Order(this.sourceOrderXmlDom);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        int i = 0;
        StringBuilder sb = new StringBuilder();
        for (String str3 : flattenH1Order.keySet()) {
            String str4 = migrateMap.get(str3);
            String str5 = flattenH1Order.get(str3);
            System.out.print(".");
            if (str4 == null) {
                arrayList2.add(str3 + " " + str5);
            } else if (str4.startsWith("$")) {
                arrayList.add(str3 + " " + str5);
            } else if (!str4.startsWith("*")) {
                if (str4.startsWith("^")) {
                    str5 = str5.toUpperCase();
                    str4 = str4.substring(1);
                }
                sb.append(str4).append("=").append(str5).append("\n");
                i++;
            } else if (str4.equals("*metadata.userAgentTemplate")) {
                splitH1userAgent(str5, sb);
                i += 2;
            } else {
                arrayList2.add(str3 + " " + str5);
            }
        }
        System.out.println();
        System.out.println();
        String replace = iOUtils.replace("###MIGRATE_OVERRIDES###", sb.toString());
        File file3 = new File(file2, "crawler-beans.cxml");
        org.apache.commons.io.FileUtils.writeStringToFile(file3, replace);
        File file4 = new File(file.getParentFile(), "seeds.txt");
        File file5 = new File(file2, "seeds.txt");
        if (!file4.isFile()) {
            System.err.println("ERROR sourceSeedsTxtFile not found: " + file4);
            System.exit(1);
        }
        org.apache.commons.io.FileUtils.copyFile(file4, file5);
        System.out.println(arrayList.size() + " settings skipped as not-applicable");
        System.out.println("These are probably harmless, but if the following settings were");
        System.out.println("important to your crawl process, investigate other options.");
        listProblems(arrayList);
        System.out.println();
        System.out.println(arrayList2.size() + " settings may need attention");
        System.out.println("Please review your original crawl and the created H3 job, for each");
        System.out.println("of the following, and manually update as needed.");
        listProblems(arrayList2);
        System.out.println();
        System.out.println(i + " H1 settings successfully migrated to H3 configuration");
        System.out.println();
        System.out.println("Review your converted crawler-beans.cxml at:");
        System.out.println(file3.getAbsolutePath());
    }

    protected void listProblems(List<String> list) {
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            System.out.println(" " + it.next());
        }
    }

    protected void printHelp() {
        System.out.println("Usage: takes two arguments. First argument is path to a Heritrix 1.X order.xml, second argument is path for a new Heritrix 3.X job directory. Will generate a basic H3 job with as many of the H1 settings replicated as currently possible.");
    }

    protected void splitH1userAgent(String str, StringBuilder sb) {
        String replaceAll = str.replaceAll("^.*?\\+(http://[^)]*).*$", "$1");
        sb.append("metadata.operatorContactUrl=").append(replaceAll).append("\n").append("metadata.userAgentTemplate=").append(str.replace(replaceAll, "@OPERATOR_CONTACT_URL@")).append("\n");
    }

    protected Map<String, String> getMigrateMap() throws IOException {
        HashMap hashMap = new HashMap();
        InputStream resourceAsStream = getClass().getResourceAsStream("/org/archive/crawler/migrate/H1toH3.map");
        LineIterator lineIterator = IOUtils.lineIterator(resourceAsStream, "UTF-8");
        while (lineIterator.hasNext()) {
            String[] split = lineIterator.nextLine().split("\\|");
            hashMap.put(split[1], split[0]);
        }
        resourceAsStream.close();
        return hashMap;
    }

    public static Map<String, String> flattenH1Order(Document document) throws XPathExpressionException {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        NodeList nodeList = (NodeList) XPathFactory.newInstance().newXPath().compile("//text()").evaluate(document, XPathConstants.NODESET);
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node item = nodeList.item(i);
            if (StringUtils.isNotBlank(item.getTextContent())) {
                linkedHashMap.put(getPseudoXpath(item.getParentNode()).replaceFirst("/crawl-order", "/"), item.getTextContent());
            }
        }
        return linkedHashMap;
    }

    protected static String getPseudoXpath(Node node) {
        String str = "";
        Node node2 = node;
        while (true) {
            Node node3 = node2;
            if (node3.getParentNode() == null) {
                return str;
            }
            String nodeName = node3.getNodeName();
            if (node3.getAttributes().getNamedItem("name") != null) {
                nodeName = "*[@" + node3.getAttributes().getNamedItem("name") + "]";
            }
            str = "/" + nodeName + str;
            node2 = node3.getParentNode();
        }
    }

    static {
        try {
            DOCUMENT_BUILDER = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        }
    }
}
