001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023package dk.netarkivet.harvester.utils;
024
025import java.io.ByteArrayInputStream;
026import java.util.ArrayList;
027import java.util.List;
028
029import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlEntityResolver;
030import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlErrorHandler;
031import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidationResult;
032import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidator;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/** Some utilities for validation of crawlertraps. */
037public class CrawlertrapsUtils {
038        
039    protected static final Logger log = LoggerFactory.getLogger(CrawlertrapsUtils.class);
040    
041    /**
042     * Test one or more lines for being XML wellformed.
043     * @param traps one or more strings
044     * @return false if exception is thrown during validation or validation false; otherwise true
045     */
046         public static boolean isCrawlertrapsWellformedXML(List<String> traps ) {
047                 String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><values>";
048                 StringBuilder sb = new StringBuilder();
049                 sb.append(prefix);
050                 for (String trimmedLine: traps) {
051                         sb.append("<value>" + trimmedLine + "</value>");
052                 }
053                 String end = "</values>";
054                 sb.append(end);
055
056                 ByteArrayInputStream bais = new ByteArrayInputStream(sb.toString().getBytes());
057                 try {
058                         XmlValidator xmlValidator = new XmlValidator();
059                         XmlEntityResolver entityResolver = null;
060                         XmlErrorHandler errorHandler = new XmlErrorHandler();
061                         XmlValidationResult result = new XmlValidationResult();
062                         return xmlValidator.testStructuralValidity(bais, entityResolver, errorHandler, result);
063                 } catch (Throwable t) {
064                         log.debug("Error found during xml validation", t);
065                         return false;
066                 }
067
068         }
069         /**
070          * Test one line for being XML wellformed.
071          * @param line a line being tested for wellformedness
072          * @return false if exception is thrown during validation or validation false; otherwise true
073          */
074         public static boolean isCrawlertrapsWellformedXML(String line) {
075                 List<String> oneElementList = new ArrayList<String>();
076                 oneElementList.add(line);
077                 return isCrawlertrapsWellformedXML(oneElementList);
078         }
079}