001/* 002 * #%L 003 * Netarchivesuite - common 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.common.utils; 024 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.regex.Pattern; 030 031import org.dom4j.Document; 032import org.dom4j.Element; 033import org.dom4j.Node; 034 035import dk.netarkivet.common.exceptions.ArgumentNotValid; 036import dk.netarkivet.common.exceptions.IllegalState; 037 038/** 039 * A class that implements the StringTree<T> interface by backing it with XML. The name of each XML node corresponds to 040 * the identifier of a node in the tree. 041 * 042 * @param <T> The type of XmlTree 043 */ 044@SuppressWarnings({"unchecked"}) 045public class XmlTree<T> implements StringTree<T> { 046 047 /** This matches string values that are valid for identifying a field. */ 048 private static final Pattern LEGAL_FIELD_NAME = Pattern.compile("[a-zA-Z0-9.-]*"); 049 050 /** 051 * This interface defines how the value of an xml leaf is parsed to get a value of type T. 052 */ 053 interface ValueParser<T> { 054 T parse(String s); 055 } 056 057 ; 058 059 /** 060 * A value parser that simply converts an XML node to a string, by trimming the text contents. 061 */ 062 private static final ValueParser<String> TRIMMING_STRING_PARSER = new ValueParser<String>() { 063 public String parse(String s) { 064 return s.trim(); 065 } 066 }; 067 068 /** The element we are encapsulating. */ 069 private final Element element; 070 071 /** If this tree is based on a Document, this is non-null. */ 072 private final Document root; 073 074 /** The parser for contents of leaf nodes. */ 075 private final ValueParser<T> parser; 076 077 /** 078 * Initialise a node in an XML tree. 079 * 080 * @param n The XML node for this node 081 * @param parser The parser that can convert a leaf node to a value of type T. 082 * @throws ArgumentNotValid on null argument, or if n is not of type element or document. 083 */ 084 private XmlTree(Node n, ValueParser<T> parser) { 085 ArgumentNotValid.checkNotNull(n, "Node n"); 086 ArgumentNotValid.checkNotNull(parser, "ValueParser<T> parser"); 087 if (n.getNodeType() == Node.DOCUMENT_NODE) { 088 root = (Document) n; 089 element = null; 090 } else if (n.getNodeType() == Node.ELEMENT_NODE) { 091 element = (Element) n; 092 root = null; 093 } else { 094 throw new ArgumentNotValid("Invalid XML node type '" + n.getNodeTypeName() + "'"); 095 } 096 this.parser = parser; 097 } 098 099 /** 100 * Returns a StringTree<String> view of the given XML node. 101 * 102 * @param n A part of an XML document structure 103 * @return A StringTree<String> backed by the given XML part. 104 * @throws ArgumentNotValid on null argument. 105 */ 106 public static StringTree<String> getStringTree(Node n) { 107 ArgumentNotValid.checkNotNull(n, "Node n"); 108 return new XmlTree<String>(n, TRIMMING_STRING_PARSER); 109 } 110 111 /** 112 * Returns true if this object is a leaf, and thus if getValue is legal. 113 * 114 * @return True if the implementing object is a leaf, false otherwise. 115 */ 116 public boolean isLeaf() { 117 return element != null && elementIsLeaf(element); 118 } 119 120 /** 121 * Get the value of a named sub-leaf. 122 * 123 * @param name Name of the sub-leaf to get the value of. These are strings, and as a shorthand may specify subtrees 124 * of subtrees by separating each level with '.', i.e. getSubtrees("subtree.subsubtree"). 125 * @return The value of the named leaf of this Tree, if it exists. 126 * @throws IllegalState if this StringTree does not have exactly one leaf sub-node with the given name. 127 * @throws ArgumentNotValid if argument is null or empty. 128 */ 129 public T getValue(String name) { 130 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 131 Element n = selectSingleNode(name); 132 if (!elementIsLeaf(n)) { 133 throw new IllegalState("Subtree '" + name + "' is not a leaf"); 134 } 135 return parseLeafNode(n); 136 } 137 138 /** 139 * Get the value of a leaf. 140 * 141 * @return The value of this Tree, if it is a leaf. 142 * @throws IllegalState if this Tree is a node. 143 */ 144 public T getValue() { 145 if (!isLeaf()) { 146 throw new IllegalState("Node is not text, but " 147 + (element != null ? element.getNodeTypeName() : root.getNodeTypeName())); 148 } 149 return parseLeafNode(element); 150 } 151 152 /** 153 * Get the only subtree with the given name. 154 * 155 * @param name The name of the subtree. These are strings, and as a shorthand may specify subtrees of subtrees by 156 * separating each level with '.', i.e. getSubtrees("subtree.subsubtree"). 157 * @return The single subtree with the given name. 158 * @throws IllegalState if this object is a leaf, or there is not exactly one subtree with the given name. 159 * @throws ArgumentNotValid if argument is null or empty. 160 */ 161 public StringTree<T> getSubTree(String name) { 162 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 163 if (isLeaf()) { 164 throw new IllegalState("Cannot find subtrees in a leaf"); 165 } 166 final Element n = selectSingleNode(name); 167 return new XmlTree<T>(n, parser); 168 } 169 170 /** 171 * Get the named subtrees. 172 * 173 * @param name The name of the subtrees. These are strings, and as a shorthand may specify subtrees of subtrees by 174 * separating each level with '.', i.e. getSubtrees("subtree.subsubtree"). 175 * @return All subtrees with the given name, or an empty list for none. 176 * @throws IllegalState if this object is a leaf. 177 * @throws ArgumentNotValid if argument is null or empty. 178 */ 179 public List<StringTree<T>> getSubTrees(String name) { 180 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 181 if (isLeaf()) { 182 throw new IllegalState("Cannot find subtrees in a leaf"); 183 } 184 List<Element> nodeList = selectMultipleNodes(name); 185 List<StringTree<T>> resultList = new ArrayList<StringTree<T>>(nodeList.size()); 186 for (Element n : nodeList) { 187 resultList.add(new XmlTree<T>(n, parser)); 188 } 189 return resultList; 190 } 191 192 /** 193 * Get a map of all the children of this node. 194 * 195 * @return Map of children of this node. 196 * @throws IllegalState if this object is a leaf. 197 */ 198 public Map<String, List<StringTree<T>>> getChildMultimap() { 199 if (isLeaf()) { 200 throw new IllegalState("Cannot find subtrees in a leaf"); 201 } 202 Map<String, List<StringTree<T>>> children = new HashMap<String, List<StringTree<T>>>(); 203 List<Element> nodeList = getChildNodes(); 204 if (nodeList != null) { 205 for (Element n : nodeList) { 206 List<StringTree<T>> childList = children.get(n.getName()); 207 if (childList == null) { 208 childList = new ArrayList<StringTree<T>>(); 209 children.put(n.getName(), childList); 210 } 211 childList.add(new XmlTree<T>(n, parser)); 212 } 213 } 214 return children; 215 } 216 217 /** 218 * Get a map of all direct subtrees, assuming that all subtrees are uniquely named. 219 * 220 * @return Map of all subtrees. 221 * @throws IllegalState if this object is a leaf, or if the subtrees are not uniquely named. 222 */ 223 public Map<String, StringTree<T>> getChildMap() { 224 if (isLeaf()) { 225 throw new IllegalState("Cannot find subtrees in a leaf"); 226 } 227 Map<String, List<StringTree<T>>> map = getChildMultimap(); 228 return convertMultimapToMap(map); 229 } 230 231 /** 232 * Get a multimap of the names and values of all subtrees, assuming that all subtrees are leafs. 233 * 234 * @return Multimap from subtree names to values of their leaves. 235 * @throws ArgumentNotValid if this object is not a node, or if any of its children are not leaves. 236 */ 237 public Map<String, List<T>> getLeafMultimap() { 238 if (isLeaf()) { 239 throw new IllegalState("Cannot find subtrees in a leaf"); 240 } 241 Map<String, List<T>> children = new HashMap<String, List<T>>(); 242 List<Element> nodeList = getChildNodes(); 243 if (nodeList != null) { 244 for (Element n : nodeList) { 245 if (!elementIsLeaf(n)) { 246 throw new IllegalState("Child " + n.getName() + " is not a leaf"); 247 } 248 List<T> childList = children.get(n.getName()); 249 if (childList == null) { 250 childList = new ArrayList<T>(); 251 children.put(n.getName(), childList); 252 } 253 childList.add(parseLeafNode(n)); 254 } 255 } 256 return children; 257 } 258 259 /** 260 * Get a map of the names and values of all subtrees, assuming that all subtrees are leafs and are uniquely named. 261 * 262 * @return Map from subtree names to values of their leaves. 263 * @throws IllegalState if this object is a leaf or if the subtrees are not uniquely named, or if any of its 264 * children are not leaves. 265 */ 266 public Map<String, T> getLeafMap() { 267 if (isLeaf()) { 268 throw new IllegalState("Cannot find subtrees in a leaf"); 269 } 270 Map<String, List<T>> map = getLeafMultimap(); 271 return convertMultimapToMap(map); 272 } 273 274 /** 275 * Select a list of nodes from the current tree, resolving dotted paths. Currently, if any part of the dotted path 276 * has multiple subtrees with the given name, this method will find all that match. 277 * 278 * @param name Name of a node (tree or leaf), possibly via a dotted path 279 * @return A list of nodes found via the given name from the root of this tree, mapping names to nodes. 280 * @throws ArgumentNotValid on null or empty argument, or if the path is illegal 281 */ 282 private List<Element> selectMultipleNodes(String name) { 283 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 284 ArgumentNotValid.checkTrue(LEGAL_FIELD_NAME.matcher(name).matches(), 285 "Name must contain only alphanumeric, dash and period"); 286 // parts contains the dotted path, split into individual components 287 String[] parts = name.split("\\."); 288 // elements contains the root elements to find children from. 289 List<Element> elements = new ArrayList<Element>(); 290 int i = 0; 291 if (root != null) { 292 if (parts[i].equals(root.getRootElement().getName())) { 293 elements.add(root.getRootElement()); 294 ++i; 295 } // We allow zero-element results here, so a no-match is okay. 296 } else { 297 elements.add(element); 298 } 299 for (; i < parts.length; ++i) { 300 // newElements contains the children matching the name from the 301 // dotted path 302 List<Element> newElements = new ArrayList<Element>(); 303 final String subname = parts[i]; 304 for (Element n : elements) { 305 List<Element> childList = n.elements(subname); 306 if (childList != null) { 307 newElements.addAll(childList); 308 } 309 } 310 // loop to find children of the found nodes with the next name in 311 // the dotted paths 312 elements = newElements; 313 } 314 return elements; 315 } 316 317 /** 318 * Select a single node from the current tree, resolving dotted paths. 319 * 320 * @param name Name of a node (tree or leaf), possibly via a dotted path 321 * @return A single node found via the given name from the root of this tree. 322 * @throws ArgumentNotValid on null or empty argument, or if the path is illegal 323 * @throws IllegalState if there is no such node, or if there is more than one candidate for the node. 324 */ 325 private Element selectSingleNode(String name) { 326 ArgumentNotValid.checkNotNullOrEmpty(name, "String name"); 327 List<Element> elements = selectMultipleNodes(name); 328 if (elements.size() == 0) { 329 throw new IllegalState("No subtree with name '" + name + "'"); 330 } 331 if (elements.size() > 1) { 332 throw new IllegalState("Multiple subtrees with name '" + name + "'"); 333 } 334 return elements.get(0); 335 } 336 337 /** 338 * Parse the contents of this leaf node according to the parser. 339 * 340 * @param e A leaf node to parse. 341 * @return The parsed contents of the node. 342 */ 343 private T parseLeafNode(Element e) { 344 return parser.parse(e.getText()); 345 } 346 347 /** 348 * Returns true if the given node is a leaf (i.e. has only text). 349 * 350 * @param e A node to check 351 * @return True if the given node is a leaf. 352 */ 353 private boolean elementIsLeaf(Element e) { 354 return e.isTextOnly(); 355 } 356 357 /** 358 * Get the nodes that are children of the current node. This works for both Documents and Elements. 359 * 360 * @return List of Element objects that are children of the current node. 361 */ 362 private List<Element> getChildNodes() { 363 List<Element> nodeList; 364 if (root != null) { 365 nodeList = new ArrayList<Element>(1); 366 nodeList.add(root.getRootElement()); 367 } else { 368 nodeList = (List<Element>) element.elements(); 369 } 370 return nodeList; 371 } 372 373 /** 374 * Convert a multimap into a map, checking that there is only one value for each key. 375 * 376 * @param map The multimap to convert from 377 * @return The map to convert to. 378 * @throws IllegalState if the map does not contain exactly one value for each key. 379 */ 380 private static <K, V> Map<K, V> convertMultimapToMap(Map<K, List<V>> map) { 381 Map<K, V> result = new HashMap<K, V>(); 382 for (Map.Entry<K, List<V>> entry : map.entrySet()) { 383 if (entry.getValue().size() != 1) { 384 throw new IllegalState("More than one value for key '" + entry.getKey() + "' found"); 385 } 386 result.put(entry.getKey(), entry.getValue().get(0)); 387 } 388 return result; 389 } 390 391}