%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/309157/root/home/waritko/yacy/source/net/yacy/cora/protocol/
Upload File :
Create Path :
Current File : //proc/309157/root/home/waritko/yacy/source/net/yacy/cora/protocol/ClientIdentification.java

/**
 *  ClientIdentification
 *  Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany
 *  First released 26.04.2011 at http://yacy.net
 *
 *  $LastChangedDate: 2011-04-21 23:59:56 +0200 (Do, 21 Apr 2011) $
 *  $LastChangedRevision: 7673 $
 *  $LastChangedBy: orbiter $
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *  
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */


package net.yacy.cora.protocol;

import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;

public class ClientIdentification {

    public static final int clientTimeoutInit = 10000;
    public static final int minimumLocalDeltaInit  =  10; // the minimum time difference between access of the same local domain
    public static final int minimumGlobalDeltaInit = 250; // the minimum time difference between access of the same global domain
    
    public static class Agent {
        public final String userAgent;    // the name that is send in http request to identify the agent
        public final String[] robotIDs;     // the name that is used in robots.txt to identify the agent
        public final int    minimumDelta; // the minimum delay between two accesses
        public final int    clientTimeout;
        public Agent(final String userAgent, final String[] robotIDs, final int minimumDelta, final int clientTimeout) {
            this.userAgent = userAgent;
            this.robotIDs = robotIDs;
            this.minimumDelta = minimumDelta;
            this.clientTimeout = clientTimeout;
        }
    }
    
    private final static String[] browserAgents = new String[]{ // fake browser user agents are NOT AVAILABLE IN P2P OPERATION, only on special customer configurations (commercial users demanded this, I personally think this is inadvisable)
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/536.30.1 (KHTML, like Gecko) Version/6.0.5 Safari/536.30.1",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
        "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
        "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:22.0) Gecko/20100101 Firefox/22.0",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0",
        "Mozilla/5.0 (Windows NT 5.1; rv:22.0) Gecko/20100101 Firefox/22.0",
        "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20100101 Firefox/22.0",
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36",
        "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0"
        };
    private static final Random random = new Random(System.currentTimeMillis());
    private static Map<String, Agent> agents = new ConcurrentHashMap<String, Agent>();
    public final static String yacyInternetCrawlerAgentName = "YaCy Internet (cautious)";
    public static Agent yacyInternetCrawlerAgent = null; // defined later in static
    public final static String yacyIntranetCrawlerAgentName = "YaCy Intranet (greedy)";
    public static Agent yacyIntranetCrawlerAgent = null; // defined later in static
    public final static String googleAgentName = "Googlebot";
    public final static Agent googleAgentAgent = new Agent("Googlebot/2.1 (+http://www.google.com/bot.html)", new String[]{"Googlebot", "Googlebot-Mobile"}, minimumGlobalDeltaInit / 10, clientTimeoutInit);
    public final static String yacyProxyAgentName = "YaCyProxy";
    public final static Agent yacyProxyAgent = new Agent("yacy - this is a proxy access through YaCy from a browser, not a robot (the yacy bot user agent is 'yacybot')", new String[]{"yacy"}, minimumGlobalDeltaInit, clientTimeoutInit);
    public final static String customAgentName = "Custom Agent";
    public final static String browserAgentName = "Random Browser";
    public static Agent browserAgent;

    /**
     * provide system information (this is part of YaCy protocol)
     */
    public static final String yacySystem = System.getProperty("os.arch", "no-os-arch") + " " +
            System.getProperty("os.name", "no-os-name") + " " + System.getProperty("os.version", "no-os-version") +
            "; " + "java " + System.getProperty("java.version", "no-java-version") + "; " + generateLocation(); // keep this before the following static initialization block as this constant is used by generateYaCyBot()
    
    static {
        generateYaCyBot("new");
        browserAgent = new Agent(browserAgents[random.nextInt(browserAgents.length)], new String[]{"Mozilla"}, minimumLocalDeltaInit, clientTimeoutInit);
        agents.put(googleAgentName, googleAgentAgent);
        agents.put(browserAgentName, browserAgent);
        agents.put(yacyProxyAgentName, yacyProxyAgent);
    }
    
    /**
     * produce a YaCy user agent string
     * @param addinfo
     * @return
     */
    public static void generateYaCyBot(String addinfo) {
        String agentString = "yacybot (" + addinfo + "; " + yacySystem  + ") http://yacy.net/bot.html";
        yacyInternetCrawlerAgent = new Agent(agentString, new String[]{"yacybot"}, minimumGlobalDeltaInit, clientTimeoutInit);
        yacyIntranetCrawlerAgent = new Agent(agentString, new String[]{"yacybot"}, minimumLocalDeltaInit, clientTimeoutInit); // must have the same userAgent String as the web crawler because this is also used for snippets
        agents.put(yacyInternetCrawlerAgentName, yacyInternetCrawlerAgent);
        agents.put(yacyIntranetCrawlerAgentName, yacyIntranetCrawlerAgent);
    }
    
    public static void generateCustomBot(String name, String string, int minimumdelta, int clienttimeout) {
        if (name.toLowerCase().indexOf("yacy") >= 0 || string.toLowerCase().indexOf("yacy") >= 0) return; // don't allow 'yacy' in custom bot strings
        String agentString = string.replace("$$SYSTEM$$", yacySystem.replace("java", "O"));
        agents.put(customAgentName, new Agent(agentString, new String[]{name}, minimumdelta, clienttimeout));
    }

    /**
     * get the default agent
     * @param newagent
     */
    public static Agent getAgent(String agentName) {
        if (agentName == null || agentName.length() == 0) return yacyInternetCrawlerAgent;
        Agent agent = agents.get(agentName);
        return agent == null ? yacyInternetCrawlerAgent : agent;
    }
    
    /**
     * generating the location string
     * 
     * @return
     */
    public static String generateLocation() {
        String loc = System.getProperty("user.timezone", "nowhere");
        final int p = loc.indexOf('/');
        if (p > 0) {
            loc = loc.substring(0, p);
        }
        loc = loc + "/" + System.getProperty("user.language", "dumb");
        return loc;
    }

    /**
     * gets the location out of the user agent
     * 
     * location must be after last ; and before first )
     * 
     * @param userAgent in form "useragentinfo (some params; _location_) additional info"
     * @return
     */
    public static String parseLocationInUserAgent(final String userAgent) {
        final String location;

        final int firstOpenParenthesis = userAgent.indexOf('(');
        final int lastSemicolon = userAgent.lastIndexOf(';');
        final int firstClosedParenthesis = userAgent.indexOf(')');

        if (lastSemicolon < firstClosedParenthesis) {
            // ; Location )
            location = (firstClosedParenthesis > 0) ? userAgent.substring(lastSemicolon + 1, firstClosedParenthesis)
                    .trim() : userAgent.substring(lastSemicolon + 1).trim();
        } else {
            if (firstOpenParenthesis < userAgent.length()) {
                if (firstClosedParenthesis > firstOpenParenthesis) {
                    // ( Location )
                    location = userAgent.substring(firstOpenParenthesis + 1, firstClosedParenthesis).trim();
                } else {
                    // ( Location <end>
                    location = userAgent.substring(firstOpenParenthesis + 1).trim();
                }
            } else {
                location = "";
            }
        }

        return location;
    }
}

Zerion Mini Shell 1.0