001package dk.netarkivet.harvester.tools;
002
003import java.util.List;
004
005import dk.netarkivet.harvester.datamodel.Domain;
006import dk.netarkivet.harvester.datamodel.DomainDAO;
007import dk.netarkivet.harvester.utils.CrawlertrapsUtils;
008
009/**
010 * Checks DomainCrawltraps in the Domain table for validity.
011 * usage: java -Dnetarkivet.settings.file=some-settings-file dk.netarkivet.harvester.tools.CheckDomainCrawltraps 
012 * 
013 * @author svc
014 *
015 */
016public class CheckDomainCrawltraps {
017
018    public static void main(String[] args) {
019        DomainDAO dao = DomainDAO.getInstance();
020        List<String> domainNames = dao.getAllDomainNames();
021        long domaincount=0;
022        long baddomaincount=0;
023        long badTrapsCount=0;
024        for (String domainName: domainNames) {
025            Domain d = dao.read(domainName);
026            domaincount++;
027            List<String> traps = d.getCrawlerTraps();
028            boolean isWellFormed = CrawlertrapsUtils.isCrawlertrapsWellformedXML(traps);
029            System.out.println("DomainCrawlertraps (" + traps.size() + ") for domain '" + d.getName() + "' is " 
030                        + (isWellFormed?"OK":"NOT OK"));
031            if (!isWellFormed) { // Examine the traps individually
032                baddomaincount++;
033                for (String trap: traps) {
034                    boolean isWellFormedTrap = CrawlertrapsUtils.isCrawlertrapsWellformedXML(trap);
035                    if (!isWellFormedTrap) {
036                        System.out.println("domain '" + d.getName() + "' has the not wellformed trap '" + trap + "'");
037                        badTrapsCount++;
038                    }
039                }
040            }
041        }
042        System.out.println("Examined " +  domaincount + " domains.");
043        System.out.println("Domains with not wellformed traps: " +  baddomaincount);
044        System.out.println("Found " +  badTrapsCount + " not wellformed traps");
045    }
046}