001/*
002 * #%L
003 * Netarchivesuite - common
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.common.utils;
024
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029import java.util.regex.Pattern;
030
031import org.dom4j.Document;
032import org.dom4j.Element;
033import org.dom4j.Node;
034
035import dk.netarkivet.common.exceptions.ArgumentNotValid;
036import dk.netarkivet.common.exceptions.IllegalState;
037
038/**
039 * A class that implements the StringTree<T> interface by backing it with XML. The name of each XML node corresponds to
040 * the identifier of a node in the tree.
041 *
042 * @param <T> The type of XmlTree
043 */
044@SuppressWarnings({"unchecked"})
045public class XmlTree<T> implements StringTree<T> {
046
047    /** This matches string values that are valid for identifying a field. */
048    private static final Pattern LEGAL_FIELD_NAME = Pattern.compile("[a-zA-Z0-9.-]*");
049
050    /**
051     * This interface defines how the value of an xml leaf is parsed to get a value of type T.
052     */
053    interface ValueParser<T> {
054        T parse(String s);
055    }
056
057    ;
058
059    /**
060     * A value parser that simply converts an XML node to a string, by trimming the text contents.
061     */
062    private static final ValueParser<String> TRIMMING_STRING_PARSER = new ValueParser<String>() {
063        public String parse(String s) {
064            return s.trim();
065        }
066    };
067
068    /** The element we are encapsulating. */
069    private final Element element;
070
071    /** If this tree is based on a Document, this is non-null. */
072    private final Document root;
073
074    /** The parser for contents of leaf nodes. */
075    private final ValueParser<T> parser;
076
077    /**
078     * Initialise a node in an XML tree.
079     *
080     * @param n The XML node for this node
081     * @param parser The parser that can convert a leaf node to a value of type T.
082     * @throws ArgumentNotValid on null argument, or if n is not of type element or document.
083     */
084    private XmlTree(Node n, ValueParser<T> parser) {
085        ArgumentNotValid.checkNotNull(n, "Node n");
086        ArgumentNotValid.checkNotNull(parser, "ValueParser<T> parser");
087        if (n.getNodeType() == Node.DOCUMENT_NODE) {
088            root = (Document) n;
089            element = null;
090        } else if (n.getNodeType() == Node.ELEMENT_NODE) {
091            element = (Element) n;
092            root = null;
093        } else {
094            throw new ArgumentNotValid("Invalid XML node type '" + n.getNodeTypeName() + "'");
095        }
096        this.parser = parser;
097    }
098
099    /**
100     * Returns a StringTree&lt;String&gt; view of the given XML node.
101     *
102     * @param n A part of an XML document structure
103     * @return A StringTree&lt;String&gt; backed by the given XML part.
104     * @throws ArgumentNotValid on null argument.
105     */
106    public static StringTree<String> getStringTree(Node n) {
107        ArgumentNotValid.checkNotNull(n, "Node n");
108        return new XmlTree<String>(n, TRIMMING_STRING_PARSER);
109    }
110
111    /**
112     * Returns true if this object is a leaf, and thus if getValue is legal.
113     *
114     * @return True if the implementing object is a leaf, false otherwise.
115     */
116    public boolean isLeaf() {
117        return element != null && elementIsLeaf(element);
118    }
119
120    /**
121     * Get the value of a named sub-leaf.
122     *
123     * @param name Name of the sub-leaf to get the value of. These are strings, and as a shorthand may specify subtrees
124     * of subtrees by separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
125     * @return The value of the named leaf of this Tree, if it exists.
126     * @throws IllegalState if this StringTree does not have exactly one leaf sub-node with the given name.
127     * @throws ArgumentNotValid if argument is null or empty.
128     */
129    public T getValue(String name) {
130        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
131        Element n = selectSingleNode(name);
132        if (!elementIsLeaf(n)) {
133            throw new IllegalState("Subtree '" + name + "' is not a leaf");
134        }
135        return parseLeafNode(n);
136    }
137
138    /**
139     * Get the value of a leaf.
140     *
141     * @return The value of this Tree, if it is a leaf.
142     * @throws IllegalState if this Tree is a node.
143     */
144    public T getValue() {
145        if (!isLeaf()) {
146            throw new IllegalState("Node is not text, but "
147                    + (element != null ? element.getNodeTypeName() : root.getNodeTypeName()));
148        }
149        return parseLeafNode(element);
150    }
151
152    /**
153     * Get the only subtree with the given name.
154     *
155     * @param name The name of the subtree. These are strings, and as a shorthand may specify subtrees of subtrees by
156     * separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
157     * @return The single subtree with the given name.
158     * @throws IllegalState if this object is a leaf, or there is not exactly one subtree with the given name.
159     * @throws ArgumentNotValid if argument is null or empty.
160     */
161    public StringTree<T> getSubTree(String name) {
162        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
163        if (isLeaf()) {
164            throw new IllegalState("Cannot find subtrees in a leaf");
165        }
166        final Element n = selectSingleNode(name);
167        return new XmlTree<T>(n, parser);
168    }
169
170    /**
171     * Get the named subtrees.
172     *
173     * @param name The name of the subtrees. These are strings, and as a shorthand may specify subtrees of subtrees by
174     * separating each level with '.', i.e. getSubtrees("subtree.subsubtree").
175     * @return All subtrees with the given name, or an empty list for none.
176     * @throws IllegalState if this object is a leaf.
177     * @throws ArgumentNotValid if argument is null or empty.
178     */
179    public List<StringTree<T>> getSubTrees(String name) {
180        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
181        if (isLeaf()) {
182            throw new IllegalState("Cannot find subtrees in a leaf");
183        }
184        List<Element> nodeList = selectMultipleNodes(name);
185        List<StringTree<T>> resultList = new ArrayList<StringTree<T>>(nodeList.size());
186        for (Element n : nodeList) {
187            resultList.add(new XmlTree<T>(n, parser));
188        }
189        return resultList;
190    }
191
192    /**
193     * Get a map of all the children of this node.
194     *
195     * @return Map of children of this node.
196     * @throws IllegalState if this object is a leaf.
197     */
198    public Map<String, List<StringTree<T>>> getChildMultimap() {
199        if (isLeaf()) {
200            throw new IllegalState("Cannot find subtrees in a leaf");
201        }
202        Map<String, List<StringTree<T>>> children = new HashMap<String, List<StringTree<T>>>();
203        List<Element> nodeList = getChildNodes();
204        if (nodeList != null) {
205            for (Element n : nodeList) {
206                List<StringTree<T>> childList = children.get(n.getName());
207                if (childList == null) {
208                    childList = new ArrayList<StringTree<T>>();
209                    children.put(n.getName(), childList);
210                }
211                childList.add(new XmlTree<T>(n, parser));
212            }
213        }
214        return children;
215    }
216
217    /**
218     * Get a map of all direct subtrees, assuming that all subtrees are uniquely named.
219     *
220     * @return Map of all subtrees.
221     * @throws IllegalState if this object is a leaf, or if the subtrees are not uniquely named.
222     */
223    public Map<String, StringTree<T>> getChildMap() {
224        if (isLeaf()) {
225            throw new IllegalState("Cannot find subtrees in a leaf");
226        }
227        Map<String, List<StringTree<T>>> map = getChildMultimap();
228        return convertMultimapToMap(map);
229    }
230
231    /**
232     * Get a multimap of the names and values of all subtrees, assuming that all subtrees are leafs.
233     *
234     * @return Multimap from subtree names to values of their leaves.
235     * @throws ArgumentNotValid if this object is not a node, or if any of its children are not leaves.
236     */
237    public Map<String, List<T>> getLeafMultimap() {
238        if (isLeaf()) {
239            throw new IllegalState("Cannot find subtrees in a leaf");
240        }
241        Map<String, List<T>> children = new HashMap<String, List<T>>();
242        List<Element> nodeList = getChildNodes();
243        if (nodeList != null) {
244            for (Element n : nodeList) {
245                if (!elementIsLeaf(n)) {
246                    throw new IllegalState("Child " + n.getName() + " is not a leaf");
247                }
248                List<T> childList = children.get(n.getName());
249                if (childList == null) {
250                    childList = new ArrayList<T>();
251                    children.put(n.getName(), childList);
252                }
253                childList.add(parseLeafNode(n));
254            }
255        }
256        return children;
257    }
258
259    /**
260     * Get a map of the names and values of all subtrees, assuming that all subtrees are leafs and are uniquely named.
261     *
262     * @return Map from subtree names to values of their leaves.
263     * @throws IllegalState if this object is a leaf or if the subtrees are not uniquely named, or if any of its
264     * children are not leaves.
265     */
266    public Map<String, T> getLeafMap() {
267        if (isLeaf()) {
268            throw new IllegalState("Cannot find subtrees in a leaf");
269        }
270        Map<String, List<T>> map = getLeafMultimap();
271        return convertMultimapToMap(map);
272    }
273
274    /**
275     * Select a list of nodes from the current tree, resolving dotted paths. Currently, if any part of the dotted path
276     * has multiple subtrees with the given name, this method will find all that match.
277     *
278     * @param name Name of a node (tree or leaf), possibly via a dotted path
279     * @return A list of nodes found via the given name from the root of this tree, mapping names to nodes.
280     * @throws ArgumentNotValid on null or empty argument, or if the path is illegal
281     */
282    private List<Element> selectMultipleNodes(String name) {
283        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
284        ArgumentNotValid.checkTrue(LEGAL_FIELD_NAME.matcher(name).matches(),
285                "Name must contain only alphanumeric, dash and period");
286        // parts contains the dotted path, split into individual components
287        String[] parts = name.split("\\.");
288        // elements contains the root elements to find children from.
289        List<Element> elements = new ArrayList<Element>();
290        int i = 0;
291        if (root != null) {
292            if (parts[i].equals(root.getRootElement().getName())) {
293                elements.add(root.getRootElement());
294                ++i;
295            } // We allow zero-element results here, so a no-match is okay.
296        } else {
297            elements.add(element);
298        }
299        for (; i < parts.length; ++i) {
300            // newElements contains the children matching the name from the
301            // dotted path
302            List<Element> newElements = new ArrayList<Element>();
303            final String subname = parts[i];
304            for (Element n : elements) {
305                List<Element> childList = n.elements(subname);
306                if (childList != null) {
307                    newElements.addAll(childList);
308                }
309            }
310            // loop to find children of the found nodes with the next name in
311            // the dotted paths
312            elements = newElements;
313        }
314        return elements;
315    }
316
317    /**
318     * Select a single node from the current tree, resolving dotted paths.
319     *
320     * @param name Name of a node (tree or leaf), possibly via a dotted path
321     * @return A single node found via the given name from the root of this tree.
322     * @throws ArgumentNotValid on null or empty argument, or if the path is illegal
323     * @throws IllegalState if there is no such node, or if there is more than one candidate for the node.
324     */
325    private Element selectSingleNode(String name) {
326        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
327        List<Element> elements = selectMultipleNodes(name);
328        if (elements.size() == 0) {
329            throw new IllegalState("No subtree with name '" + name + "'");
330        }
331        if (elements.size() > 1) {
332            throw new IllegalState("Multiple subtrees with name '" + name + "'");
333        }
334        return elements.get(0);
335    }
336
337    /**
338     * Parse the contents of this leaf node according to the parser.
339     *
340     * @param e A leaf node to parse.
341     * @return The parsed contents of the node.
342     */
343    private T parseLeafNode(Element e) {
344        return parser.parse(e.getText());
345    }
346
347    /**
348     * Returns true if the given node is a leaf (i.e. has only text).
349     *
350     * @param e A node to check
351     * @return True if the given node is a leaf.
352     */
353    private boolean elementIsLeaf(Element e) {
354        return e.isTextOnly();
355    }
356
357    /**
358     * Get the nodes that are children of the current node. This works for both Documents and Elements.
359     *
360     * @return List of Element objects that are children of the current node.
361     */
362    private List<Element> getChildNodes() {
363        List<Element> nodeList;
364        if (root != null) {
365            nodeList = new ArrayList<Element>(1);
366            nodeList.add(root.getRootElement());
367        } else {
368            nodeList = (List<Element>) element.elements();
369        }
370        return nodeList;
371    }
372
373    /**
374     * Convert a multimap into a map, checking that there is only one value for each key.
375     *
376     * @param map The multimap to convert from
377     * @return The map to convert to.
378     * @throws IllegalState if the map does not contain exactly one value for each key.
379     */
380    private static <K, V> Map<K, V> convertMultimapToMap(Map<K, List<V>> map) {
381        Map<K, V> result = new HashMap<K, V>();
382        for (Map.Entry<K, List<V>> entry : map.entrySet()) {
383            if (entry.getValue().size() != 1) {
384                throw new IllegalState("More than one value for key '" + entry.getKey() + "' found");
385            }
386            result.put(entry.getKey(), entry.getValue().get(0));
387        }
388        return result;
389    }
390
391}