001/* 002 * #%L 003 * Netarchivesuite - harvester 004 * %% 005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library, 006 * the National Library of France and the Austrian National Library. 007 * %% 008 * This program is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU Lesser General Public License as 010 * published by the Free Software Foundation, either version 2.1 of the 011 * License, or (at your option) any later version. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Lesser Public License for more details. 017 * 018 * You should have received a copy of the GNU General Lesser Public 019 * License along with this program. If not, see 020 * <http://www.gnu.org/licenses/lgpl-2.1.html>. 021 * #L% 022 */ 023 024package dk.netarkivet.harvester.datamodel; 025 026import java.util.regex.Matcher; 027import java.util.regex.Pattern; 028 029import dk.netarkivet.common.utils.Settings; 030import dk.netarkivet.harvester.HarvesterSettings; 031 032/** 033 * Constants used by the datamodel and webinterface packages. 034 */ 035public final class Constants { 036 037 /** Pattern not used by anyone, except unittests. */ 038 private static final Pattern ID_PATTERN = Pattern.compile(".*_(\\d+)\\.xml"); 039 /** 040 * Regexp for checking, if URL contains a protocol, like ftp://, http:// . 041 */ 042 static final String PROTOCOL_REGEXP = "^[a-zA-Z]+:.*"; 043 /** Maximum size of name entries in the database. */ 044 static final int MAX_NAME_SIZE = 300; 045 /** Maximum size of comment entries in the database. */ 046 static final int MAX_COMMENT_SIZE = 30000; 047 /** Maximum size of password url entries in the database. */ 048 static final int MAX_URL_SIZE = 300; 049 /** Maximum size of password realm entries in the database. */ 050 static final int MAX_REALM_NAME_SIZE = 300; 051 /** Maximum size of password username entries in the database. */ 052 static final int MAX_USER_NAME_SIZE = 20; 053 /** Maximum size of password entries in the database. */ 054 static final int MAX_PASSWORD_SIZE = 40; 055 /** Maximum size of ownerinfo entries in the database. */ 056 static final int MAX_OWNERINFO_SIZE = 1000; 057 /** Maximum size of seedlist entries in the database. */ 058 static final int MAX_SEED_LIST_SIZE = 8 * 1024 * 1024; 059 /** 060 * Maximum size of a combined seedlist entry (for a job) in the database. 061 */ 062 static final int MAX_COMBINED_SEED_LIST_SIZE = 64 * 1024 * 1024; 063 /** Maximum size of orderxml entries (stringified XML) in the database. */ 064 static final int MAX_ORDERXML_SIZE = 64 * 1024 * 1024; 065 /** Maximum size of error messages from harvests and uploads. */ 066 public static final int MAX_ERROR_SIZE = 300; 067 /** Maximum size of detailed error messages from harvests and uploads. */ 068 public static final int MAX_ERROR_DETAIL_SIZE = 10000; 069 /** This is the default number set as max request rate. */ 070 public static final int DEFAULT_MAX_REQUEST_RATE = 60; 071 /** 072 * Max bytes of -1 means infinity (i.e other factors will determine when the job ends). 073 */ 074 public static final long HERITRIX_MAXBYTES_INFINITY = -1L; 075 /** 076 * Max objects of -1 means infinity (i.e other factors will determine when the job ends). 077 */ 078 public static final long HERITRIX_MAXOBJECTS_INFINITY = -1L; 079 080 /** 081 * Max job running time of 0 means infinite job running time (i.e other factors will determine when the job ends). 082 */ 083 public static final long HERITRIX_MAXJOBRUNNINGTIME_INFINITY = 0L; 084 085 /** 086 * This is the default number set as max bytes harvested. Set to the max number of bytes we harvest from any domain 087 * per harvest, unless explicitly deciding otherwise. 088 */ 089 public static final long DEFAULT_MAX_BYTES = Settings.getLong(HarvesterSettings.DOMAIN_CONFIG_MAXBYTES); 090 091 /** This is the default number set as max harvested objects. */ 092 public static final long DEFAULT_MAX_OBJECTS = Settings.getLong(HarvesterSettings.DOMAIN_CONFIG_MAXOBJECTS); 093 094 /** 095 * The default maximum time in seconds available for each harvesting job. Set to unlimited (0) in the default 096 * settings. Used to restrict the running time for snapshot harvest jobs. 097 */ 098 public static final long DEFAULT_MAX_JOB_RUNNING_TIME = Settings 099 .getLong(HarvesterSettings.JOBS_MAX_TIME_TO_COMPLETE); 100 101 /** 102 * The value for alias timeout, in milliseconds. 103 */ 104 public static final long ALIAS_TIMEOUT_IN_MILLISECONDS = Settings.getLong(HarvesterSettings.ALIAS_TIMEOUT) * 1000L; 105 106 /** Settings used in JobDBDao after admin machine break down. * */ 107 public static final String NEXT_JOB_ID = "settings.harvester.datamodel.domain.nextJobId"; 108 109 /** 110 * The name used for the element in order.xml which contains global crawler traps. 111 */ 112 public static final String GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME = "dk.netarkivet.global_crawler_traps"; 113 114 public static final long BYTES_PER_HERITRIX_BYTELIMIT_UNIT = 1024; 115 116 /** Uncallable constructor. */ 117 private Constants() { 118 } 119 120 /** 121 * Returns a new matcher that matches harvest definition file names and sets group 1 to be the id part. 122 * 123 * @return A new matcher instance. 124 */ 125 public static Matcher getIdMatcher() { 126 return ID_PATTERN.matcher(""); 127 } 128 129}