001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.common.utils; 025 026import java.io.File; 027import java.io.FileOutputStream; 028import java.io.IOException; 029import java.io.InputStream; 030import java.io.StringReader; 031import java.util.List; 032 033import javax.xml.XMLConstants; 034import javax.xml.parsers.DocumentBuilder; 035import javax.xml.parsers.DocumentBuilderFactory; 036import javax.xml.parsers.ParserConfigurationException; 037import javax.xml.transform.Source; 038import javax.xml.transform.dom.DOMSource; 039import javax.xml.transform.stream.StreamSource; 040import javax.xml.validation.Schema; 041import javax.xml.validation.SchemaFactory; 042import javax.xml.validation.Validator; 043 044import org.dom4j.Document; 045import org.dom4j.DocumentException; 046import org.dom4j.Node; 047import org.dom4j.io.SAXReader; 048import org.xml.sax.SAXException; 049 050import dk.netarkivet.common.exceptions.ArgumentNotValid; 051import dk.netarkivet.common.exceptions.IOFailure; 052 053/** 054 * Utilities for handling XML-files. 055 */ 056@SuppressWarnings({"unchecked"}) 057public class XmlUtils { 058 /** 059 * Read and parse an XML-file, and return a Document object representing this object. 060 * 061 * @param f a given xml file 062 * @return a Document representing the xml file 063 * @throws IOFailure if unable to read the xml file or unable to parse the file as XML 064 */ 065 public static Document getXmlDoc(File f) throws IOFailure { 066 ArgumentNotValid.checkNotNull(f, "File f"); 067 SAXReader reader = new SAXReader(); 068 if (!f.canRead()) { 069 throw new IOFailure("Could not read file: '" + f + "'"); 070 } 071 072 try { 073 return reader.read(f); 074 } catch (DocumentException e) { 075 throw new IOFailure("Could not parse the file as XML: '" + f + "'", e); 076 } 077 } 078 079 /** 080 * Read and parse an XML stream, and return a Document object representing this object. 081 * 082 * @param resourceAsStream a given xml document 083 * @return a Document representing the xml document 084 * @throws IOFailure if unable to read the xml document or unable to parse the document as XML 085 */ 086 public static Document getXmlDoc(InputStream resourceAsStream) { 087 ArgumentNotValid.checkNotNull(resourceAsStream, "InputStream resourceAsStream"); 088 SAXReader reader = new SAXReader(); 089 try { 090 return reader.read(resourceAsStream); 091 } catch (DocumentException e) { 092 throw new IOFailure("Could not parse inputstream as XML:" + resourceAsStream, e); 093 } 094 } 095 096 /** 097 * Set a XmlNode defined by the given XPath to the given value. 098 * 099 * @param doc the Document, which is being modified 100 * @param xpath the given XPath 101 * @param value the given value 102 * @throws IOFailure If the given XPath was not found in the document 103 */ 104 public static void setNode(Document doc, String xpath, String value) { 105 ArgumentNotValid.checkNotNull(doc, "Document doc"); 106 ArgumentNotValid.checkNotNullOrEmpty(xpath, "String xpath"); 107 ArgumentNotValid.checkNotNull(value, "String value"); 108 109 Node xpathNode = doc.selectSingleNode(xpath); 110 if (xpathNode == null) { 111 throw new IOFailure("Element '" + xpath + "' could not be found in the document '" 112 + doc.getRootElement().getName() + "'!"); 113 } 114 xpathNode.setText(value); 115 } 116 117 /** 118 * Set a List of XmlNodes defined by the given XPath to the given value. 119 * 120 * @param doc the Document, which is being modified 121 * @param xpath the given XPath 122 * @param value the given value 123 * @throws IOFailure If the given XPath was not found in the document 124 */ 125 public static void setNodes(Document doc, String xpath, String value) { 126 ArgumentNotValid.checkNotNull(doc, "Document doc"); 127 ArgumentNotValid.checkNotNullOrEmpty(xpath, "String xpath"); 128 ArgumentNotValid.checkNotNull(value, "String value"); 129 List<Node> xpathNodes = doc.selectNodes(xpath); 130 if (xpathNodes == null) { 131 throw new IOFailure("Element '" + xpath + "' could not be found in the document '" 132 + doc.getRootElement().getName() + "'!"); 133 } 134 for (int i = 0; i < xpathNodes.size(); ++i) { 135 xpathNodes.get(i).setText(value); 136 } 137 } 138 139 /** 140 * Validate that the settings xml files conforms to the XSD. 141 * 142 * @param xsdFile Schema to check settings against. 143 * @throws ArgumentNotValid if unable to validate the settings files 144 * @throws IOFailure If unable to read the settings files and/or the xsd file. 145 */ 146 public static void validateWithXSD(File xsdFile) { 147 ArgumentNotValid.checkNotNull(xsdFile, "File xsdFile"); 148 List<File> settingsFiles = Settings.getSettingsFiles(); 149 for (File settingsFile : settingsFiles) { 150 try { 151 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); 152 builderFactory.setNamespaceAware(true); 153 DocumentBuilder parser = builderFactory.newDocumentBuilder(); 154 org.w3c.dom.Document document = parser.parse(settingsFile); 155 156 // create a SchemaFactory capable of understanding WXS schemas 157 SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); 158 159 // load a WXS schema, represented by a Schema instance 160 Source schemaFile = new StreamSource(xsdFile); 161 Schema schema = factory.newSchema(schemaFile); 162 163 // create a Validator instance, which can be used to validate an 164 // instance document 165 Validator validator = schema.newValidator(); 166 167 // validate the DOM tree 168 try { 169 validator.validate(new DOMSource(document)); 170 } catch (SAXException e) { 171 // instance document is invalid! 172 throw new ArgumentNotValid( 173 "Settings file '" + settingsFile + "' does not validate using '" + xsdFile + "'", e); 174 } 175 } catch (IOException e) { 176 throw new IOFailure("Error while validating: ", e); 177 } catch (ParserConfigurationException e) { 178 final String msg = "Error validating settings file '" + settingsFile + "'"; 179 throw new ArgumentNotValid(msg, e); 180 } catch (SAXException e) { 181 final String msg = "Error validating settings file '" + settingsFile + "'"; 182 throw new ArgumentNotValid(msg, e); 183 } 184 } 185 } 186 187 /** 188 * Write document tree to file. 189 * 190 * @param doc the document tree to save. 191 * @param f the file to write the document to. 192 * @throws IOFailure On trouble writing XML file to disk. 193 */ 194 public static void writeXmlToFile(Document doc, File f) throws IOFailure { 195 FileOutputStream fos = null; 196 try { 197 try { 198 fos = new FileOutputStream(f); 199 StreamUtils.writeXmlToStream(doc, fos); 200 } finally { 201 if (fos != null) { 202 fos.close(); 203 } 204 } 205 } catch (IOException e) { 206 throw new IOFailure("Unable to write XML to file '" + f.getAbsolutePath() + "'", e); 207 } 208 } 209 210 /** 211 * Parses a given string to produce a {@link org.w3c.dom.Document} instance. 212 * 213 * @param xml Some XML text. 214 * @return a {@link org.w3c.dom.Document} parsed from the given xml. 215 * @throws DocumentException If unable to parse the given text as XML. 216 */ 217 public static Document documentFromString(String xml) throws DocumentException { 218 Document doc; 219 SAXReader reader = new SAXReader(); 220 StringReader in = new StringReader(xml); 221 doc = reader.read(in); 222 in.close(); 223 return doc; 224 } 225 226}