001/*
002 * #%L
003 * Netarchivesuite - deploy
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.deploy;
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.util.ArrayList;
028import java.util.List;
029
030import org.dom4j.Document;
031import org.dom4j.DocumentException;
032import org.dom4j.Element;
033import org.dom4j.io.SAXReader;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import dk.netarkivet.common.exceptions.ArgumentNotValid;
038import dk.netarkivet.common.exceptions.IOFailure;
039import dk.netarkivet.common.exceptions.IllegalState;
040import dk.netarkivet.common.utils.XmlUtils;
041
042/**
043 * The structure for handling the XML files.
044 */
045public class XmlStructure {
046    /** the log, for logging stuff instead of displaying them directly. */
047    private static final Logger log = LoggerFactory.getLogger(XmlStructure.class);
048    /** The root of this branch in the XML tree. */
049    private Element root;
050
051    /**
052     * Constructor. Create an instance of this data-structure from an XML file.
053     *
054     * @param f The XML file
055     * @param encoding the encoding to use to read the file
056     */
057    public XmlStructure(File f, final String encoding) {
058        ArgumentNotValid.checkNotNull(f, "File f");
059        ArgumentNotValid.checkNotNullOrEmpty(encoding, "String encoding");
060        ArgumentNotValid.checkTrue(f.exists(), "File f : " + f.getName() + " does not exist!");
061        // get into 'document' format
062        Document doc = loadDocument(f, encoding);
063        // get root node
064        root = doc.getRootElement();
065    }
066
067    /**
068     * Constructor. Creating a new instance of this data-structure from the branch of another instance.
069     *
070     * @param subTreeRoot The root of the tree for this instance
071     */
072    public XmlStructure(Element subTreeRoot) {
073        ArgumentNotValid.checkNotNull(subTreeRoot, "Element tree");
074        root = subTreeRoot.createCopy();
075    }
076
077    /**
078     * Function to retrieving the root of this branch in the XML tree.
079     *
080     * @return The root element
081     */
082    public Element getRoot() {
083        return root;
084    }
085
086    /**
087     * Loading the file into the document data structure.
088     *
089     * @param f The XML file to be loaded.
090     * @param encoding the encoding to use to read the file
091     * @return The XML file loaded into the document data structure
092     * @throws IOFailure If the file was not correctly read
093     */
094    private Document loadDocument(File f, final String encoding) throws IOFailure {
095        ArgumentNotValid.checkNotNull(f, "File f");
096        SAXReader reader = new SAXReader();
097        reader.setEncoding(encoding);
098        if (!f.canRead()) {
099            String msg = "Could not read file: '" + f.getAbsolutePath() + "'";
100            log.debug(msg);
101            throw new IOFailure(msg);
102        }
103        try {
104            return reader.read(f);
105        } catch (DocumentException e) {
106            String msg = "Could not parse file: '" + f.getAbsolutePath() + "' as XML.";
107            log.warn(msg, e);
108            throw new IOFailure(msg, e);
109        }
110    }
111
112    /**
113     * Function for retrieving a single specific branch.
114     *
115     * @param name The name of the branch
116     * @return The child element of the XML tree structure
117     */
118    public Element getChild(String name) {
119        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
120        return root.element(name);
121    }
122
123    /**
124     * For receiving a list of specific branches.
125     *
126     * @param name The name of the children to be found.
127     * @return A list of the children with the given name.
128     */
129    @SuppressWarnings("unchecked")
130    public List<Element> getChildren(String name) {
131        ArgumentNotValid.checkNotNullOrEmpty(name, "String name");
132        return root.elements(name);
133    }
134
135    /**
136     * Retrieves the XML code for this entire branch.
137     *
138     * @return The XML code.
139     */
140    public String getXML() {
141        return root.asXML();
142    }
143
144    /**
145     * For retrieving the first children along a path.
146     *
147     * @param name The path to the child.
148     * @return The child element, or null if no such child exists.
149     */
150    public Element getSubChild(String... name) {
151        // if no arguments, the XML is returned
152        ArgumentNotValid.checkNotNull(name, "String ...name");
153        Element e = root;
154        // go through the tree to get the correct
155        for (String n : name) {
156            if (e != null) {
157                e = e.element(n);
158            } else {
159                // the element does not exist
160                log.debug("Element {} is not a branch in the tree. Null returned", n);
161                return null;
162            }
163        }
164        return e;
165    }
166
167    /**
168     * Retrieves the content of a branch deep in tree structure.
169     *
170     * @param name Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the
171     * title of a HTML document)
172     * @return The content of the leaf. If it is not a leaf, the entire XML-branch is returned. Returns 'null' if the
173     * path to the branch cannot be found.
174     */
175    public String getSubChildValue(String... name) {
176        ArgumentNotValid.checkNotNull(name, "String ...name");
177        Element e = getSubChild(name);
178        if (e != null) {
179            if (e.isTextOnly()) {
180                return e.getText().trim();
181            } else {
182                log.debug("Element is not text. The entire XML-branch is returned.");
183                return e.asXML();
184            }
185        } else {
186            return null;
187        }
188    }
189
190    /**
191     * Retrieves the content of a branch deep in tree structure.
192     *
193     * @param path Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the
194     * title of a HTML document)
195     * @return The content of the leaf. If it is not a leaf, return null. Returns 'null' if the path to the branch
196     * cannot be found.
197     */
198    public String getLeafValue(String... path) {
199        ArgumentNotValid.checkNotNull(path, "String ...name");
200        Element e = getSubChild(path);
201        if (e != null && e.isTextOnly()) {
202            return e.getText().trim();
203        } else {
204            log.debug("Element is not text. Null returned.");
205            return null;
206        }
207    }
208
209    /**
210     * Retrieves the content of a the leafs deep in the tree structure. It only retrieves branches at the first path.
211     *
212     * @param path Specifies the path in the tree (e.g. in HTML: GetSubChildValue("HTML", "HEAD", "TITLE") to get the
213     * title of a HTML document)
214     * @return The content of the leaf. If no leafs are found then an empty collection of strings are returned (new
215     * String[0]).
216     */
217    public String[] getLeafValues(String... path) {
218        // check argument
219        ArgumentNotValid.checkNotNull(path, "String ...path");
220
221        // get all leafs along path
222        List<Element> elemList = getAllChildrenAlongPath(root, path);
223        // check that any leafs exist.
224        if (elemList.isEmpty()) {
225            return new String[0];
226        }
227
228        // extract the value of the elements to an array.
229        String[] res = new String[elemList.size()];
230        for (int i = 0; i < elemList.size(); i++) {
231            res[i] = elemList.get(i).getText().trim();
232        }
233
234        return res;
235    }
236
237    /**
238     * This function initialise the process of overwriting a part of the tree.
239     * <p>
240     * This is used for the Settings attributes in the deploy.
241     *
242     * @param overwriter The settings instance for the current element
243     */
244    public void overWrite(Element overwriter) {
245        ArgumentNotValid.checkNotNull(overwriter, "Element overwriter");
246        try {
247            overWriting(root, overwriter);
248        } catch (IllegalState e) {
249            log.trace("Overwriting illegal area. ", e);
250        }
251    }
252
253    /**
254     * The current tree will be overwritten by the overwriter tree. The new branches in overwriter will be added to the
255     * current tree. For the leafs which are present in both overwriter and current, the value in the current-leaf will
256     * be overwritten by the overwriter-leaf.
257     * <p>
258     * The subtrees which exists in both the overwriter and the current tree, this function will be run recursively on
259     * these subtrees.
260     *
261     * @param current The base element
262     * @param overwriter The element to have its values overwrite the base element
263     * @throws IllegalState If a leaf in current is about to be replaced by a tree
264     */
265    @SuppressWarnings("unchecked")
266    private void overWriting(Element current, Element overwriter) throws IllegalState {
267        ArgumentNotValid.checkNotNull(current, "Element current");
268        ArgumentNotValid.checkNotNull(overwriter, "Element overwriter");
269        // get the attributes to be overwritten
270        List<Element> attributes = overwriter.elements();
271        List<Element> addElements = new ArrayList<Element>();
272
273        // add branch if it does not exists
274        for (Element e : attributes) {
275            // find corresponding attribute in current element
276            List<Element> curElems = current.elements(e.getName());
277
278            // if no such elements in current tree, add branch.
279            if (curElems.isEmpty()) {
280                addElements.add(e);
281            } else {
282                //
283                List<Element> overElems = overwriter.elements(e.getName());
284
285                // if the lists have a 1-1 ratio, then overwrite
286                if (curElems.size() == 1 && overElems.size() == 1) {
287                    // only one branch, thus overwrite
288                    Element curE = curElems.get(0);
289                    // if leaf overwrite value, otherwise repeat for branches.
290                    if (curE.isTextOnly()) {
291                        curE.setText(e.getText().trim()); // TODO Is this necessary
292                    } else {
293                        overWriting(curE, e);
294                    }
295                } else {
296                    // a different amount of current branches exist (not 0).
297                    // Therefore remove the branches in current tree,
298                    // and add replacements.
299                    for (Element curE : curElems) {
300                        current.remove(curE);
301                    }
302                    // add only current branch, since the others will follow.
303                    addElements.add(e);
304                }
305            }
306        }
307
308        // add all the new branches to the current branch.
309        for (Element e : addElements) {
310            current.add(e.createCopy());
311        }
312    }
313
314    /**
315     * Overwrites the leaf at the end of the path from the branch.
316     *
317     * @param branch The branch where to begin.
318     * @param value The value to overwrite the leaf with.
319     * @param path The path from the branch to the leaf.
320     */
321    public void overWriteOnly(Element branch, String value, String... path) {
322        ArgumentNotValid.checkNotNullOrEmpty(value, "String Value");
323        ArgumentNotValid.checkNotNull(path, "String path");
324        ArgumentNotValid.checkPositive(path.length, "Size of String path[]");
325
326        // get leaf element
327        Element current = branch;
328        for (String s : path) {
329            current = current.element(s);
330
331            // Do not overwrite non-existing element.
332            if (current == null) {
333                return;
334            }
335        }
336
337        // Set the new value
338        current.setText(value);
339    }
340
341    /**
342     * Specific overwrite function for overwriting a specific character in a string.
343     *
344     * @param branch The initial branch of the XML tree.
345     * @param position The position in the String where the character are to be changed.
346     * @param value The new value of the character to change.
347     * @param path The path to the leaf of the string to change.
348     */
349    public void overWriteOnlyInt(Element branch, int position, char value, String... path) {
350        ArgumentNotValid.checkNotNull(path, "String path");
351        ArgumentNotValid.checkPositive(path.length, "Size of String path[]");
352        ArgumentNotValid.checkPositive(position, "int position");
353
354        // get leaf element
355        Element current = branch;
356        for (String s : path) {
357            current = current.element(s);
358
359            // Do not overwrite non-existing element.
360            if (current == null) {
361                return;
362            }
363        }
364
365        // Set the new value
366        char[] txt = current.getText().toCharArray();
367        txt[position] = value;
368        String res = new String(txt);
369        current.setText(res);
370    }
371
372    /**
373     * Creates an dom4j.Element from a String. This string has to be in the XML format, otherwise return null.
374     *
375     * @param content The content of a String.
376     * @return The Element.
377     */
378    public static Element makeElementFromString(String content) {
379        ArgumentNotValid.checkNotNullOrEmpty(content, "String name");
380
381        try {
382            ByteArrayInputStream in = new ByteArrayInputStream(content.getBytes());
383            Document doc = XmlUtils.getXmlDoc(in);
384            return doc.getRootElement();
385        } catch (Exception e) {
386            log.warn("makeElementFromString error caugth. Null returned.", e);
387            return null;
388        }
389    }
390
391    /**
392     * This function creates the XML code for the path.
393     *
394     * @param content The content at the leaf of the branch.
395     * @param path The path to the branch.
396     * @return The XML code for the branch with content.
397     */
398    public static String pathAndContentToXML(String content, String... path) {
399        ArgumentNotValid.checkNotNullOrEmpty(content, "String content");
400        ArgumentNotValid.checkNotNegative(path.length, "Size of 'String ... path'");
401
402        StringBuilder res = new StringBuilder();
403
404        // write path to the leaf
405        for (int i = 0; i < path.length; i++) {
406            String st = path[i];
407            res.append(Constants.changeToXMLBeginScope(st));
408        }
409
410        res.append(content);
411
412        // write path back from leaf (close xml).
413        for (int i = path.length - 1; i >= 0; i--) {
414            String st = path[i];
415            res.append(Constants.changeToXMLEndScope(st));
416        }
417
418        return res.toString();
419    }
420
421    /**
422     * This function recursively calls it self, and retrieves all the leaf children from all sibling branches along the
423     * path. When a call to it-self is made, the first string in path is removed.
424     *
425     * @param current The current element to retrieve children along the path.
426     * @param path The path to the leafs.
427     * @return The complete list of elements which can be found along the path.
428     */
429    @SuppressWarnings("unchecked")
430    public static List<Element> getAllChildrenAlongPath(Element current, String... path) {
431        ArgumentNotValid.checkNotNull(current, "Element current");
432        ArgumentNotValid.checkNotNull(path, "String ... path");
433
434        // make the resulting element list.
435        List<Element> res = new ArrayList<Element>();
436
437        // get value from children
438        if (path.length > 1) {
439            // create the new path
440            String[] nextPath = new String[path.length - 1];
441            for (int i = 1; i < path.length; i++) {
442                nextPath[i - 1] = path[i];
443            }
444
445            // Get the list of children at next level of the path.
446            List<Element> children = current.elements(path[0]);
447            for (Element el : children) {
448                // the the result of these children.
449                List<Element> childRes = getAllChildrenAlongPath(el, nextPath);
450                // put children result into current result.
451                for (Element cr : childRes) {
452                    res.add(cr);
453                }
454            }
455        } else if (path.length == 1) {
456            // if next level is leaf (or goal of path) return them.
457            return current.elements(path[0]);
458        }
459
460        return res;
461    }
462
463}