001package dk.netarkivet.harvester.tools; 002 003import java.util.List; 004 005import dk.netarkivet.harvester.datamodel.Domain; 006import dk.netarkivet.harvester.datamodel.DomainDAO; 007import dk.netarkivet.harvester.utils.CrawlertrapsUtils; 008 009/** 010 * Checks DomainCrawltraps in the Domain table for validity. 011 * usage: java -Dnetarkivet.settings.file=some-settings-file dk.netarkivet.harvester.tools.CheckDomainCrawltraps 012 * 013 * @author svc 014 * 015 */ 016public class CheckDomainCrawltraps { 017 018 public static void main(String[] args) { 019 DomainDAO dao = DomainDAO.getInstance(); 020 List<String> domainNames = dao.getAllDomainNames(); 021 long domaincount=0; 022 long baddomaincount=0; 023 long badTrapsCount=0; 024 for (String domainName: domainNames) { 025 Domain d = dao.read(domainName); 026 domaincount++; 027 List<String> traps = d.getCrawlerTraps(); 028 boolean isWellFormed = CrawlertrapsUtils.isCrawlertrapsWellformedXML(traps); 029 System.out.println("DomainCrawlertraps (" + traps.size() + ") for domain '" + d.getName() + "' is " 030 + (isWellFormed?"OK":"NOT OK")); 031 if (!isWellFormed) { // Examine the traps individually 032 baddomaincount++; 033 for (String trap: traps) { 034 boolean isWellFormedTrap = CrawlertrapsUtils.isCrawlertrapsWellformedXML(trap); 035 if (!isWellFormedTrap) { 036 System.out.println("domain '" + d.getName() + "' has the not wellformed trap '" + trap + "'"); 037 badTrapsCount++; 038 } 039 } 040 } 041 } 042 System.out.println("Examined " + domaincount + " domains."); 043 System.out.println("Domains with not wellformed traps: " + baddomaincount); 044 System.out.println("Found " + badTrapsCount + " not wellformed traps"); 045 } 046}