%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /home/waritko/yacy/source/net/yacy/cora/geo/
Upload File :
Create Path :
Current File : //home/waritko/yacy/source/net/yacy/cora/geo/OpenGeoDBLocation.java

/**
 *  OpenGeoDBLocalization
 *  Copyright 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
 *  first published 04.10.2009 on http://yacy.net
 *
 *  This file is part of YaCy Content Integration
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program in the file lgpl21.txt
 *  If not, see <http://www.gnu.org/licenses/>.
 */

package net.yacy.cora.geo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.zip.GZIPInputStream;

import net.yacy.cora.document.WordCache;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.StringBuilderComparator;

/**
 * this class loads and parses database dumps from the OpenGeoDB project files can be loaded from
 * http://sourceforge.net/projects/opengeodb/files/ this class is used by the LibraryProvider, which expects
 * input files inside DATA\DICTIONARIES\source ATTENTION: if this class is used, expect an extra memory usage
 * of more than 100MB! This class will provide a super-fast access to the OpenGeoDB, since all request are
 * evaluated using data in the RAM.
 */
public class OpenGeoDBLocation implements Locations
{

    private final Map<Integer, GeoLocation> id2loc;
    private final TreeMap<StringBuilder, List<Integer>> name2ids;
    private final TreeMap<StringBuilder, List<Integer>> kfz2ids;
    private final Map<String, List<Integer>> predial2ids;
    private final Map<String, Integer> zip2id;
    private final File file;

    public OpenGeoDBLocation(final File file, WordCache dymLib) {

        this.file = file;
        this.id2loc = new HashMap<Integer, GeoLocation>();
        this.name2ids = new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
        this.kfz2ids = new TreeMap<StringBuilder, List<Integer>>(StringBuilderComparator.CASE_INSENSITIVE_ORDER);
        this.predial2ids = new HashMap<String, List<Integer>>();
        this.zip2id = new HashMap<String, Integer>();

        if ( file == null || !file.exists() ) {
            return;
        }
        InputStream is = null;
        BufferedReader reader = null;
        try {
            is = new FileInputStream(file);
            if ( file.getName().endsWith(".gz") ) {
                is = new GZIPInputStream(is);
            }
            reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
            String line;

            // read lines
            String[] v;
            Integer id;
            String h;
            float lon, lat;
            while ( (line = reader.readLine()) != null ) {
                line = line.trim();
                if ( !line.startsWith("INSERT INTO ") ) {
                    continue;
                }
                if ( !line.endsWith(");") ) {
                    continue;
                }
                line = line.substring(12);
                //p = line.indexOf(' '); if (p < 0) continue;
                if ( line.startsWith("geodb_coordinates ") ) {
                    line = line.substring(18 + 7);
                    v = CommonPattern.COMMA.split(line);
                    lat = Float.parseFloat(v[2]);
                    lon = Float.parseFloat(v[3]);
                    this.id2loc.put(Integer.parseInt(v[0]), new GeoLocation(lat, lon));
                }
                if ( line.startsWith("geodb_textdata ") ) {
                    line = line.substring(15 + 7);
                    v = CommonPattern.COMMA.split(line);
                    if ( v[1].equals("500100000") ) { // Ortsname
                        if (v.length > 10) {
                            // a ',' is probably inside the location name
                            v[2] = v[2] + "," + v[3];
                        }
                        id = Integer.parseInt(v[0]);
                        h = removeQuotes(v[2]);
                        if (h.length() < OverarchingLocation.MINIMUM_NAME_LENGTH) continue;
                        if (dymLib != null && dymLib.contains(new StringBuilder(h))) continue;
                        List<Integer> l = this.name2ids.get(new StringBuilder(h));
                        if ( l == null ) {
                            l = new ArrayList<Integer>(1);
                        }
                        l.add(id);
                        this.name2ids.put(new StringBuilder(h), l);
                        final GeoLocation loc = this.id2loc.get(id);
                        if ( loc != null ) {
                            loc.setName(h);
                        }
                    } else if ( v[1].equals("500400000") ) { // Vorwahl
                        id = Integer.parseInt(v[0]);
                        h = removeQuotes(v[2]);
                        List<Integer> l = this.predial2ids.get(h);
                        if ( l == null ) {
                            l = new ArrayList<Integer>(1);
                        }
                        l.add(id);
                        this.predial2ids.put(h, l);
                    } else if ( v[1].equals("400300000") ) { // Ortstyp
                        id = Integer.parseInt(v[0]);
                        h = removeQuotes(v[2]);
                        /*
                        final Integer hc = h.hashCode();
                        final byte[] tb = this.locTypeHash2locType.get(hc);
                        if ( tb == null ) {
                            this.locTypeHash2locType.put(hc, UTF8.getBytes(h));
                        }
                        this.id2locTypeHash.put(id, hc);
                        */
                    } else if ( v[1].equals("500300000") ) { // PLZ
                        this.zip2id.put(removeQuotes(v[2]), Integer.parseInt(v[0]));
                    } else if ( v[1].equals("500500000") ) { // KFZ-Kennzeichen
                        id = Integer.parseInt(v[0]);
                        h = removeQuotes(v[2]);
                        List<Integer> l = this.kfz2ids.get(new StringBuilder(h));
                        if ( l == null ) {
                            l = new ArrayList<Integer>(1);
                        }
                        l.add(id);
                        this.kfz2ids.put(new StringBuilder(h), l);
                    }
                }
                continue;
            }
        } catch (final IOException e ) {
            ConcurrentLog.logException(e);
        } finally {
            if ( reader != null ) {
                try {
                    reader.close();
                } catch (final Exception ignored ) {
                }
            } else if(is != null) {
                try {
                	is.close();
                } catch (final IOException ignored ) {
                }
            }
        }
    }

    private static final String removeQuotes(String s) {
        if ( s.length() > 0 && s.charAt(0) == '\'' ) {
            s = s.substring(1);
        }
        if ( s.charAt(s.length() - 1) == '\'' ) {
            s = s.substring(0, s.length() - 1);
        }
        return s;
    }

    @Override
    public int size() {
        return this.id2loc.size();
    }

	@Override
	public boolean isEmpty() {
		return this.id2loc.isEmpty();
	}

    /**
     * check database tables against occurrences of this entity the anyname - String may be one of: - name of
     * a town, villa, region etc - zip code - telephone prefix - kfz sign
     *
     * @param anyname
     * @return
     */
    @Override
    public TreeSet<GeoLocation> find(final String anyname, final boolean locationexact) {
        final HashSet<Integer> r = new HashSet<Integer>();
        List<Integer> c;
        final StringBuilder an = new StringBuilder(anyname);
        if ( locationexact ) {
            c = this.name2ids.get(an);
            if ( c != null ) {
                r.addAll(c);
            }
        } else {
            final SortedMap<StringBuilder, List<Integer>> cities = this.name2ids.tailMap(an);
            for ( final Map.Entry<StringBuilder, List<Integer>> e : cities.entrySet() ) {
                if ( StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(e.getKey(), an) ) {
                    r.addAll(e.getValue());
                } else {
                    break;
                }
            }
            c = this.kfz2ids.get(an);
            if ( c != null ) {
                r.addAll(c);
            }
            c = this.predial2ids.get(anyname);
            if ( c != null ) {
                r.addAll(c);
            }
            final Integer i = this.zip2id.get(anyname);
            if ( i != null ) {
                r.add(i);
            }
        }
        final TreeSet<GeoLocation> a = new TreeSet<GeoLocation>();
        for ( final Integer e : r ) {
            final GeoLocation w = this.id2loc.get(e);
            if ( w != null ) {
                a.add(w);
            }
        }
        return a;
    }

    /**
     * produce a set of location names
     * @return a set of names
     */
    @Override
    public Set<String> locationNames() {
        Set<String> locations = new HashSet<String>();
        Set<StringBuilder> l = this.name2ids.keySet();
        for (StringBuilder s: l) {
            locations.add(s.toString());
        }
        return locations;
    }

    /**
     * read the dictionary and construct a set of recommendations to a given string
     *
     * @param s input value that is used to match recommendations
     * @return a set that contains all words that start with the input value
     */
    @Override
    public Set<String> recommend(final String s) {
        final Set<String> a = new HashSet<String>();
        final StringBuilder an = new StringBuilder(s);
        if ( s.isEmpty() ) {
            return a;
        }
        final SortedMap<StringBuilder, List<Integer>> t = this.name2ids.tailMap(an);
        for ( final StringBuilder r : t.keySet() ) {
            if ( StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(r, an) ) {
                a.add(r.toString());
            } else {
                break;
            }
        }
        return a;
    }

    @Override
    public Set<StringBuilder> recommend(final StringBuilder s) {
        final Set<StringBuilder> a = new HashSet<StringBuilder>();
        if ( s.length() == 0 ) {
            return a;
        }
        final SortedMap<StringBuilder, List<Integer>> t = this.name2ids.tailMap(s);
        for ( final StringBuilder r : t.keySet() ) {
            if ( StringBuilderComparator.CASE_INSENSITIVE_ORDER.startsWith(r, s) ) {
                a.add(r);
            } else {
                break;
            }
        }
        return a;
    }

    @Override
    public String nickname() {
        return this.file.getName();
    }

    @Override
    public int hashCode() {
        return nickname().hashCode();
    }

    @Override
    public boolean equals(final Object other) {
        if ( !(other instanceof Locations) ) {
            return false;
        }
        return nickname().equals(((Locations) other).nickname());
    }
}

Zerion Mini Shell 1.0