001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2018 The Royal Danish Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023package dk.netarkivet.harvester.utils; 024 025import java.io.ByteArrayInputStream; 026import java.util.ArrayList; 027import java.util.List; 028 029import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlEntityResolver; 030import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlErrorHandler; 031import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidationResult; 032import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidator; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** Some utilities for validation of crawlertraps. */ 037public class CrawlertrapsUtils { 038 039 protected static final Logger log = LoggerFactory.getLogger(CrawlertrapsUtils.class); 040 041 /** 042 * Test one or more lines for being XML wellformed. 043 * @param traps one or more strings 044 * @return false if exception is thrown during validation or validation false; otherwise true 045 */ 046 public static boolean isCrawlertrapsWellformedXML(List<String> traps ) { 047 String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><values>"; 048 StringBuilder sb = new StringBuilder(); 049 sb.append(prefix); 050 for (String trimmedLine: traps) { 051 sb.append("<value>" + trimmedLine + "</value>"); 052 } 053 String end = "</values>"; 054 sb.append(end); 055 056 ByteArrayInputStream bais = new ByteArrayInputStream(sb.toString().getBytes()); 057 try { 058 XmlValidator xmlValidator = new XmlValidator(); 059 XmlEntityResolver entityResolver = null; 060 XmlErrorHandler errorHandler = new XmlErrorHandler(); 061 XmlValidationResult result = new XmlValidationResult(); 062 return xmlValidator.testStructuralValidity(bais, entityResolver, errorHandler, result); 063 } catch (Throwable t) { 064 log.debug("Error found during xml validation", t); 065 return false; 066 } 067 068 } 069 /** 070 * Test one line for being XML wellformed. 071 * @param line a line being tested for wellformedness 072 * @return false if exception is thrown during validation or validation false; otherwise true 073 */ 074 public static boolean isCrawlertrapsWellformedXML(String line) { 075 List<String> oneElementList = new ArrayList<String>(); 076 oneElementList.add(line); 077 return isCrawlertrapsWellformedXML(oneElementList); 078 } 079}