001/* 002 * #%L 003 * Netarchivesuite - deploy 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.deploy; 024 025import java.io.ByteArrayInputStream; 026import java.io.File; 027import java.util.ArrayList; 028import java.util.List; 029 030import org.dom4j.Document; 031import org.dom4j.DocumentException; 032import org.dom4j.Element; 033import org.dom4j.io.SAXReader; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import dk.netarkivet.common.exceptions.ArgumentNotValid; 038import dk.netarkivet.common.exceptions.IOFailure; 039import dk.netarkivet.common.exceptions.IllegalState; 040import dk.netarkivet.common.utils.XmlUtils; 041 042/** 043 * The structure for handling the XML files. 044 */ 045public class XmlStructure { 046 /** the log, for logging stuff instead of displaying them directly. */ 047 private static final Logger log = LoggerFactory.getLogger(XmlStructure.class); 048 /** The root of this branch in the XML tree. */ 049 private Element root; 050 051 /** 052 * Constructor. Create an instance of this data-structure from an XML file. 053 * 054 * @param f The XML file 055 * @param encoding the encoding to use to read the file 056 */ 057 public XmlStructure(File f, final String encoding) { 058 ArgumentNotValid.checkNotNull(f, "File f"); 059 ArgumentNotValid.checkNotNullOrEmpty(encoding, "String encoding"); 060 ArgumentNotValid.checkTrue(f.exists(), "File f : " + f.getName() + " does not exist!"); 061 // get into 'document' format 062 Document doc = loadDocument(f, encoding); 063 // get root node 064 root = doc.getRootElement(); 065 } 066 067 /** 068 * Constructor. Creating a new instance of this data-structure from the branch of another instance. 069 * 070 * @param subTreeRoot The root of the tree for this instance 071 */ 072 public XmlStructure(Element subTreeRoot) { 073 ArgumentNotValid.checkNotNull(subTreeRoot, "Element tree"); 074 root = subTreeRoot.createCopy(); 075 } 076 077 /** 078 * Function to retrieving the root of this branch in the XML tree. 079 * 080 * @return The root element 081 */ 082 public Element getRoot() { 083 return root; 084 } 085 086 /** 087 * Loading the file into the document data structure. 088 * 089 * @param f The XML file to be loaded. 090 * @param encoding the encoding to use to read the file 091 * @return The XML file loaded into the document data structure 092 * @throws IOFailure If the file was not correctly read 093 */ 094 private Document loadDocument(File f, final String encoding) throws IOFailure { 095 ArgumentNotValid.checkNotNull(f, "File f"); 096 SAXReader reader = new SAXReader(); 097 reader.setEncoding(encoding); 098 if (!f.canRead()) { 099 String msg = "Could not read file: '" + f.getAbsolutePath() + "'"; 100 log.debug(msg); 101 throw new IOFailure(msg); 102 } 103 try { 104 return reader.read(f); 105 } catch (DocumentException e) { 106 String msg = "Could not parse file: '" + f.getAbsolutePath() + "' as XML."; 107 log.warn(msg, e); 108 throw new IOFailure(msg, e); 109 } 110 } 111 112 /** 113 * Function for retrieving a single specific branch. 114 * 115 * @param name The name of the branch 116 * @return The child element of the XML tree structure 117 */ 118 public Element getChild(String name) { 119 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 120 return root.element(name); 121 } 122 123 /** 124 * For receiving a list of specific branches. 125 * 126 * @param name The name of the children to be found. 127 * @return A list of the children with the given name. 128 */ 129 @SuppressWarnings("unchecked") 130 public List<Element> getChildren(String name) { 131 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 132 return root.elements(name); 133 } 134 135 /** 136 * Retrieves the XML code for this entire branch. 137 * 138 * @return The XML code. 139 */ 140 public String getXML() { 141 return root.asXML(); 142 } 143 144 /** 145 * For retrieving the first children along a path. 146 * 147 * @param name The path to the child. 148 * @return The child element, or null if no such child exists. 149 */ 150 public Element getSubChild(String... name) { 151 // if no arguments, the XML is returned 152 ArgumentNotValid.checkNotNull(name, "String ...name"); 153 Element e = root; 154 // go through the tree to get the correct 155 for (String n : name) { 156 if (e != null) { 157 e = e.element(n); 158 } else { 159 // the element does not exist 160 log.debug("Element {} is not a branch in the tree. Null returned", n); 161 return null; 162 } 163 } 164 return e; 165 } 166 167 /** 168 * Retrieves the content of a branch deep in tree structure. 169 * 170 * @param name Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the 171 * title of a HTML document) 172 * @return The content of the leaf. If it is not a leaf, the entire XML-branch is returned. Returns 'null' if the 173 * path to the branch cannot be found. 174 */ 175 public String getSubChildValue(String... name) { 176 ArgumentNotValid.checkNotNull(name, "String ...name"); 177 Element e = getSubChild(name); 178 if (e != null) { 179 if (e.isTextOnly()) { 180 return e.getText().trim(); 181 } else { 182 log.debug("Element is not text. The entire XML-branch is returned."); 183 return e.asXML(); 184 } 185 } else { 186 return null; 187 } 188 } 189 190 /** 191 * Retrieves the content of a branch deep in tree structure. 192 * 193 * @param path Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the 194 * title of a HTML document) 195 * @return The content of the leaf. If it is not a leaf, return null. Returns 'null' if the path to the branch 196 * cannot be found. 197 */ 198 public String getLeafValue(String... path) { 199 ArgumentNotValid.checkNotNull(path, "String ...name"); 200 Element e = getSubChild(path); 201 if (e != null && e.isTextOnly()) { 202 return e.getText().trim(); 203 } else { 204 log.debug("Element is not text. Null returned."); 205 return null; 206 } 207 } 208 209 /** 210 * Retrieves the content of a the leafs deep in the tree structure. It only retrieves branches at the first path. 211 * 212 * @param path Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the 213 * title of a HTML document) 214 * @return The content of the leaf. If no leafs are found then an empty collection of strings are returned (new 215 * String[0]). 216 */ 217 public String[] getLeafValues(String... path) { 218 // check argument 219 ArgumentNotValid.checkNotNull(path, "String ...path"); 220 221 // get all leafs along path 222 List<Element> elemList = getAllChildrenAlongPath(root, path); 223 // check that any leafs exist. 224 if (elemList.isEmpty()) { 225 return new String[0]; 226 } 227 228 // extract the value of the elements to an array. 229 String[] res = new String[elemList.size()]; 230 for (int i = 0; i < elemList.size(); i++) { 231 res[i] = elemList.get(i).getText().trim(); 232 } 233 234 return res; 235 } 236 237 /** 238 * This function initialise the process of overwriting a part of the tree. 239 * <p> 240 * This is used for the Settings attributes in the deploy. 241 * 242 * @param overwriter The settings instance for the current element 243 */ 244 public void overWrite(Element overwriter) { 245 ArgumentNotValid.checkNotNull(overwriter, "Element overwriter"); 246 try { 247 overWriting(root, overwriter); 248 } catch (IllegalState e) { 249 log.trace("Overwriting illegal area. ", e); 250 } 251 } 252 253 /** 254 * The current tree will be overwritten by the overwriter tree. The new branches in overwriter will be added to the 255 * current tree. For the leafs which are present in both overwriter and current, the value in the current-leaf will 256 * be overwritten by the overwriter-leaf. 257 * <p> 258 * The subtrees which exists in both the overwriter and the current tree, this function will be run recursively on 259 * these subtrees. 260 * 261 * @param current The base element 262 * @param overwriter The element to have its values overwrite the base element 263 * @throws IllegalState If a leaf in current is about to be replaced by a tree 264 */ 265 @SuppressWarnings("unchecked") 266 private void overWriting(Element current, Element overwriter) throws IllegalState { 267 ArgumentNotValid.checkNotNull(current, "Element current"); 268 ArgumentNotValid.checkNotNull(overwriter, "Element overwriter"); 269 // get the attributes to be overwritten 270 List<Element> attributes = overwriter.elements(); 271 List<Element> addElements = new ArrayList<Element>(); 272 273 // add branch if it does not exists 274 for (Element e : attributes) { 275 // find corresponding attribute in current element 276 List<Element> curElems = current.elements(e.getName()); 277 278 // if no such elements in current tree, add branch. 279 if (curElems.isEmpty()) { 280 addElements.add(e); 281 } else { 282 // 283 List<Element> overElems = overwriter.elements(e.getName()); 284 285 // if the lists have a 1-1 ratio, then overwrite 286 if (curElems.size() == 1 && overElems.size() == 1) { 287 // only one branch, thus overwrite 288 Element curE = curElems.get(0); 289 // if leaf overwrite value, otherwise repeat for branches. 290 if (curE.isTextOnly()) { 291 curE.setText(e.getText().trim()); // TODO Is this necessary 292 } else { 293 overWriting(curE, e); 294 } 295 } else { 296 // a different amount of current branches exist (not 0). 297 // Therefore remove the branches in current tree, 298 // and add replacements. 299 for (Element curE : curElems) { 300 current.remove(curE); 301 } 302 // add only current branch, since the others will follow. 303 addElements.add(e); 304 } 305 } 306 } 307 308 // add all the new branches to the current branch. 309 for (Element e : addElements) { 310 current.add(e.createCopy()); 311 } 312 } 313 314 /** 315 * Overwrites the leaf at the end of the path from the branch. 316 * 317 * @param branch The branch where to begin. 318 * @param value The value to overwrite the leaf with. 319 * @param path The path from the branch to the leaf. 320 */ 321 public void overWriteOnly(Element branch, String value, String... path) { 322 ArgumentNotValid.checkNotNullOrEmpty(value, "String Value"); 323 ArgumentNotValid.checkNotNull(path, "String path"); 324 ArgumentNotValid.checkPositive(path.length, "Size of String path[]"); 325 326 // get leaf element 327 Element current = branch; 328 for (String s : path) { 329 current = current.element(s); 330 331 // Do not overwrite non-existing element. 332 if (current == null) { 333 return; 334 } 335 } 336 337 // Set the new value 338 current.setText(value); 339 } 340 341 /** 342 * Specific overwrite function for overwriting a specific character in a string. 343 * 344 * @param branch The initial branch of the XML tree. 345 * @param position The position in the String where the character are to be changed. 346 * @param value The new value of the character to change. 347 * @param path The path to the leaf of the string to change. 348 */ 349 public void overWriteOnlyInt(Element branch, int position, char value, String... path) { 350 ArgumentNotValid.checkNotNull(path, "String path"); 351 ArgumentNotValid.checkPositive(path.length, "Size of String path[]"); 352 ArgumentNotValid.checkPositive(position, "int position"); 353 354 // get leaf element 355 Element current = branch; 356 for (String s : path) { 357 current = current.element(s); 358 359 // Do not overwrite non-existing element. 360 if (current == null) { 361 return; 362 } 363 } 364 365 // Set the new value 366 char[] txt = current.getText().toCharArray(); 367 txt[position] = value; 368 String res = new String(txt); 369 current.setText(res); 370 } 371 372 /** 373 * Creates an dom4j.Element from a String. This string has to be in the XML format, otherwise return null. 374 * 375 * @param content The content of a String. 376 * @return The Element. 377 */ 378 public static Element makeElementFromString(String content) { 379 ArgumentNotValid.checkNotNullOrEmpty(content, "String name"); 380 381 try { 382 ByteArrayInputStream in = new ByteArrayInputStream(content.getBytes()); 383 Document doc = XmlUtils.getXmlDoc(in); 384 return doc.getRootElement(); 385 } catch (Exception e) { 386 log.warn("makeElementFromString error caugth. Null returned.", e); 387 return null; 388 } 389 } 390 391 /** 392 * This function creates the XML code for the path. 393 * 394 * @param content The content at the leaf of the branch. 395 * @param path The path to the branch. 396 * @return The XML code for the branch with content. 397 */ 398 public static String pathAndContentToXML(String content, String... path) { 399 ArgumentNotValid.checkNotNullOrEmpty(content, "String content"); 400 ArgumentNotValid.checkNotNegative(path.length, "Size of 'String ... path'"); 401 402 StringBuilder res = new StringBuilder(); 403 404 // write path to the leaf 405 for (int i = 0; i < path.length; i++) { 406 String st = path[i]; 407 res.append(Constants.changeToXMLBeginScope(st)); 408 } 409 410 res.append(content); 411 412 // write path back from leaf (close xml). 413 for (int i = path.length - 1; i >= 0; i--) { 414 String st = path[i]; 415 res.append(Constants.changeToXMLEndScope(st)); 416 } 417 418 return res.toString(); 419 } 420 421 /** 422 * This function recursively calls it self, and retrieves all the leaf children from all sibling branches along the 423 * path. When a call to it-self is made, the first string in path is removed. 424 * 425 * @param current The current element to retrieve children along the path. 426 * @param path The path to the leafs. 427 * @return The complete list of elements which can be found along the path. 428 */ 429 @SuppressWarnings("unchecked") 430 public static List<Element> getAllChildrenAlongPath(Element current, String... path) { 431 ArgumentNotValid.checkNotNull(current, "Element current"); 432 ArgumentNotValid.checkNotNull(path, "String ... path"); 433 434 // make the resulting element list. 435 List<Element> res = new ArrayList<Element>(); 436 437 // get value from children 438 if (path.length > 1) { 439 // create the new path 440 String[] nextPath = new String[path.length - 1]; 441 for (int i = 1; i < path.length; i++) { 442 nextPath[i - 1] = path[i]; 443 } 444 445 // Get the list of children at next level of the path. 446 List<Element> children = current.elements(path[0]); 447 for (Element el : children) { 448 // the the result of these children. 449 List<Element> childRes = getAllChildrenAlongPath(el, nextPath); 450 // put children result into current result. 451 for (Element cr : childRes) { 452 res.add(cr); 453 } 454 } 455 } else if (path.length == 1) { 456 // if next level is leaf (or goal of path) return them. 457 return current.elements(path[0]); 458 } 459 460 return res; 461 } 462 463}