001/*
002 * #%L
003 * Netarchivesuite - harvester
004 * %%
005 * Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
006 *             the National Library of France and the Austrian National Library.
007 * %%
008 * This program is free software: you can redistribute it and/or modify
009 * it under the terms of the GNU Lesser General Public License as
010 * published by the Free Software Foundation, either version 2.1 of the
011 * License, or (at your option) any later version.
012 * 
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Lesser Public License for more details.
017 * 
018 * You should have received a copy of the GNU General Lesser Public
019 * License along with this program.  If not, see
020 * <http://www.gnu.org/licenses/lgpl-2.1.html>.
021 * #L%
022 */
023
024package dk.netarkivet.harvester.datamodel;
025
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028
029import dk.netarkivet.common.utils.Settings;
030import dk.netarkivet.harvester.HarvesterSettings;
031
032/**
033 * Constants used by the datamodel and webinterface packages.
034 */
035public final class Constants {
036
037    /** Pattern not used by anyone, except unittests. */
038    private static final Pattern ID_PATTERN = Pattern.compile(".*_(\\d+)\\.xml");
039    /**
040     * Regexp for checking, if URL contains a protocol, like ftp://, http:// .
041     */
042    static final String PROTOCOL_REGEXP = "^[a-zA-Z]+:.*";
043    /** Maximum size of name entries in the database. */
044    static final int MAX_NAME_SIZE = 300;
045    /** Maximum size of comment entries in the database. */
046    static final int MAX_COMMENT_SIZE = 30000;
047    /** Maximum size of password url entries in the database. */
048    static final int MAX_URL_SIZE = 300;
049    /** Maximum size of password realm entries in the database. */
050    static final int MAX_REALM_NAME_SIZE = 300;
051    /** Maximum size of password username entries in the database. */
052    static final int MAX_USER_NAME_SIZE = 20;
053    /** Maximum size of password entries in the database. */
054    static final int MAX_PASSWORD_SIZE = 40;
055    /** Maximum size of ownerinfo entries in the database. */
056    static final int MAX_OWNERINFO_SIZE = 1000;
057    /** Maximum size of seedlist entries in the database. */
058    static final int MAX_SEED_LIST_SIZE = 8 * 1024 * 1024;
059    /**
060     * Maximum size of a combined seedlist entry (for a job) in the database.
061     */
062    static final int MAX_COMBINED_SEED_LIST_SIZE = 64 * 1024 * 1024;
063    /** Maximum size of orderxml entries (stringified XML) in the database. */
064    static final int MAX_ORDERXML_SIZE = 64 * 1024 * 1024;
065    /** Maximum size of error messages from harvests and uploads. */
066    public static final int MAX_ERROR_SIZE = 300;
067    /** Maximum size of detailed error messages from harvests and uploads. */
068    public static final int MAX_ERROR_DETAIL_SIZE = 10000;
069    /** This is the default number set as max request rate. */
070    public static final int DEFAULT_MAX_REQUEST_RATE = 60;
071    /**
072     * Max bytes of -1 means infinity (i.e other factors will determine when the job ends).
073     */
074    public static final long HERITRIX_MAXBYTES_INFINITY = -1L;
075    /**
076     * Max objects of -1 means infinity (i.e other factors will determine when the job ends).
077     */
078    public static final long HERITRIX_MAXOBJECTS_INFINITY = -1L;
079
080    /**
081     * Max job running time of 0 means infinite job running time (i.e other factors will determine when the job ends).
082     */
083    public static final long HERITRIX_MAXJOBRUNNINGTIME_INFINITY = 0L;
084
085    /**
086     * This is the default number set as max bytes harvested. Set to the max number of bytes we harvest from any domain
087     * per harvest, unless explicitly deciding otherwise.
088     */
089    public static final long DEFAULT_MAX_BYTES = Settings.getLong(HarvesterSettings.DOMAIN_CONFIG_MAXBYTES);
090
091    /** This is the default number set as max harvested objects. */
092    public static final long DEFAULT_MAX_OBJECTS = Settings.getLong(HarvesterSettings.DOMAIN_CONFIG_MAXOBJECTS);
093
094    /**
095     * The default maximum time in seconds available for each harvesting job. Set to unlimited (0) in the default
096     * settings. Used to restrict the running time for snapshot harvest jobs.
097     */
098    public static final long DEFAULT_MAX_JOB_RUNNING_TIME = Settings
099            .getLong(HarvesterSettings.JOBS_MAX_TIME_TO_COMPLETE);
100
101    /**
102     * The value for alias timeout, in milliseconds.
103     */
104    public static final long ALIAS_TIMEOUT_IN_MILLISECONDS = Settings.getLong(HarvesterSettings.ALIAS_TIMEOUT) * 1000L;
105
106    /** Settings used in JobDBDao after admin machine break down. * */
107    public static final String NEXT_JOB_ID = "settings.harvester.datamodel.domain.nextJobId";
108
109    /**
110     * The name used for the element in order.xml which contains global crawler traps.
111     */
112    public static final String GLOBAL_CRAWLER_TRAPS_ELEMENT_NAME = "dk.netarkivet.global_crawler_traps";
113
114    public static final long BYTES_PER_HERITRIX_BYTELIMIT_UNIT = 1024;
115
116    /** Uncallable constructor. */
117    private Constants() {
118    }
119
120    /**
121     * Returns a new matcher that matches harvest definition file names and sets group 1 to be the id part.
122     *
123     * @return A new matcher instance.
124     */
125    public static Matcher getIdMatcher() {
126        return ID_PATTERN.matcher("");
127    }
128
129}