%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /home/waritko/yacy/source/net/yacy/kelondro/data/word/
Upload File :
Create Path :
Current File : //home/waritko/yacy/source/net/yacy/kelondro/data/word/WordReferenceFactory.java

// WordReferenceFactory.java
// (C) 2009 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
// first published 09.04.2009 on http://yacy.net
//
// This is a part of YaCy, a peer-to-peer based web search engine
//
// $LastChangedDate$
// $LastChangedRevision$
// $LastChangedBy$
//
// LICENSE
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

package net.yacy.kelondro.data.word;

import java.io.Serializable;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;

import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.util.ByteBuffer;
import net.yacy.kelondro.index.Row;
import net.yacy.kelondro.index.Row.Entry;
import net.yacy.kelondro.rwi.ReferenceContainer;
import net.yacy.kelondro.rwi.ReferenceFactory;

public class WordReferenceFactory implements ReferenceFactory<WordReference>, Serializable {

    private static final long serialVersionUID=-7168706947127349876L;

    @Override
    public WordReference produceSlow(final Entry e) {
        return new WordReferenceRow(e);
    }

    @Override
    public WordReference produceFast(final WordReference r, final boolean local) {
        if (r instanceof WordReferenceVars) return r;
        return new WordReferenceVars(r, local);
    }

    @Override
    public Row getRow() {
        return WordReferenceRow.urlEntryRow;
    }

    /**
     * create an index abstract for a given WordReference ReferenceContainer
     * This extracts all the host hashes from a reference Container and returns a byte buffer
     * with a compressed representation of the host references
     * @param <ReferenceType>
     * @param inputContainer
     * @param excludeContainer
     * @param maxtime
     * @return
     */
    public static final <ReferenceType extends WordReference> ByteBuffer compressIndex(final ReferenceContainer<WordReference> inputContainer, final ReferenceContainer<WordReference> excludeContainer, final long maxtime) {
        // collect references according to domains
        final long timeout = maxtime == Long.MAX_VALUE ? Long.MAX_VALUE : (maxtime < 0) ? Long.MAX_VALUE : System.currentTimeMillis() + maxtime;
        final TreeMap<String, StringBuilder> doms = new TreeMap<String, StringBuilder>();
        synchronized (inputContainer) {
            final Iterator<WordReference> i = inputContainer.entries();
            WordReference iEntry;
            String dom, mod;
            StringBuilder paths;
            while (i.hasNext()) {
                iEntry = i.next();
                if ((excludeContainer != null) && (excludeContainer.getReference(iEntry.urlhash()) != null)) continue; // do not include urls that are in excludeContainer
                dom = (iEntry instanceof WordReferenceVars) ? ((WordReferenceVars) iEntry).hosthash() : ASCII.String(iEntry.urlhash(), 6, 6);
                mod = ASCII.String(iEntry.urlhash(), 0, 6);
                if ((paths = doms.get(dom)) == null) {
                    doms.put(dom, new StringBuilder(30).append(mod));
                } else {
                    doms.put(dom, paths.append(mod));
                }
                if (System.currentTimeMillis() > timeout)
                    break;
            }
        }
        // construct a result string
        final ByteBuffer bb = new ByteBuffer(inputContainer.size() * 6);
        bb.append('{');
        final Iterator<Map.Entry<String, StringBuilder>> i = doms.entrySet().iterator();
        Map.Entry<String, StringBuilder> entry;
        while (i.hasNext()) {
            entry = i.next();
            bb.append(entry.getKey());
            bb.append(':');
            bb.append(entry.getValue().toString());
            if (System.currentTimeMillis() > timeout)
                break;
            if (i.hasNext())
                bb.append(',');
        }
        bb.append('}');
        return bb;
    }

    /**
     * decompress an index abstract that was generated from a word index and transmitted over a network connection
     * @param ci
     * @param peerhash
     * @return a urlhash -> peerlist map: this shows in which peers an url is stored
     */
    public static final SortedMap<String, Set<String>> decompressIndex(ByteBuffer ci, final String peerhash) {
        SortedMap<String, Set<String>> target = Collections.synchronizedSortedMap(new TreeMap<String, Set<String>>());
        // target is a mapping from url-hashes to a string of peer-hashes
        if (ci.byteAt(0) != '{' || ci.byteAt(ci.length() - 1) != '}') return target;
        //System.out.println("DEBUG-DECOMPRESS: input is " + ci.toString());
        ci = ci.trim(1, ci.length() - 2);
        String dom, url;
        Set<String> peers;
        StringBuilder urlsb;
        while (ci.length() >= 13 && ci.byteAt(6) == ':') {
            assert ci.length() >= 6 : "ci.length() = " + ci.length();
            dom = ci.toStringBuilder(0, 6, 6).toString();
            ci.trim(7);
            while (!ci.isEmpty() && ci.byteAt(0) != ',') {
                assert ci.length() >= 6 : "ci.length() = " + ci.length();
                urlsb = ci.toStringBuilder(0, 6, 12);
                urlsb.append(dom);
                url = urlsb.toString();
                ci.trim(6);

                peers = target.get(url);
                if (peers == null) {
                    peers = new HashSet<String>();
                    target.put(url, peers);
                }
                peers.add(peerhash);
                //System.out.println("DEBUG-DECOMPRESS: " + url + ":" + target.get(url));
            }
            if (ci.byteAt(0) == ',') ci.trim(1);
        }
        //System.out.println("DEBUG-DECOMPRESS: " + target);
        return target;
    }
}

Zerion Mini Shell 1.0