001package dk.netarkivet.harvester.utils; 002 003import java.io.ByteArrayInputStream; 004import java.util.ArrayList; 005import java.util.List; 006 007import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlEntityResolver; 008import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlErrorHandler; 009import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidationResult; 010import org.netarchivesuite.heritrix3wrapper.xmlutils.XmlValidator; 011import org.slf4j.Logger; 012import org.slf4j.LoggerFactory; 013 014public class CrawlertrapsUtils { 015 016 protected static final Logger log = LoggerFactory.getLogger(CrawlertrapsUtils.class); 017 018 /** 019 * Test one or more lines for being XML wellformed. 020 * @param traps one or more strings 021 * @return false if exception is thrown during validation or validation false; otherwise true 022 */ 023 public static boolean isCrawlertrapsWellformedXML(List<String> traps ) { 024 String prefix = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><values>"; 025 StringBuilder sb = new StringBuilder(); 026 sb.append(prefix); 027 for (String trimmedLine: traps) { 028 sb.append("<value>" + trimmedLine + "</value>"); 029 } 030 String end = "</values>"; 031 sb.append(end); 032 033 ByteArrayInputStream bais = new ByteArrayInputStream(sb.toString().getBytes()); 034 try { 035 XmlValidator xmlValidator = new XmlValidator(); 036 XmlEntityResolver entityResolver = null; 037 XmlErrorHandler errorHandler = new XmlErrorHandler(); 038 XmlValidationResult result = new XmlValidationResult(); 039 return xmlValidator.testStructuralValidity(bais, entityResolver, errorHandler, result); 040 } catch (Throwable t) { 041 log.debug("Error found during xml validation", t); 042 return false; 043 } 044 045 } 046 /** 047 * Test one line for being XML wellformed. 048 * @param line a line being tested for wellformedness 049 * @return false if exception is thrown during validation or validation false; otherwise true 050 */ 051 public static boolean isCrawlertrapsWellformedXML(String line) { 052 List<String> oneElementList = new ArrayList<String>(); 053 oneElementList.add(line); 054 return isCrawlertrapsWellformedXML(oneElementList); 055 } 056}