%PDF- %PDF-
Direktori : /home/waritko/yacy/source/net/yacy/cora/federate/solr/connector/ |
Current File : //home/waritko/yacy/source/net/yacy/cora/federate/solr/connector/SolrServerConnector.java |
/** * SolrServerConnector * Copyright 2012 by Michael Peter Christen * First released 21.06.2012 at http://yacy.net * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.cora.federate.solr.connector; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import net.yacy.cora.federate.solr.instance.ServerShard; import net.yacy.cora.util.ConcurrentLog; import net.yacy.search.schema.CollectionSchema; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.LukeRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.LukeResponse.FieldInfo; import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.CommonParams; public abstract class SolrServerConnector extends AbstractSolrConnector implements SolrConnector { protected final static ConcurrentLog log = new ConcurrentLog(SolrServerConnector.class.getName()); public final static org.apache.lucene.analysis.CharArrayMap<Byte> classLoaderSynchro = new org.apache.lucene.analysis.CharArrayMap<Byte>(0, true); // pre-instantiate this object to prevent sun.misc.Launcher$AppClassLoader deadlocks // this is a very nasty problem; solr instantiates objects dynamically which can cause deadlocks static { assert classLoaderSynchro != null; } protected SolrClient server; protected SolrServerConnector() { this.server = null; } protected void init(SolrClient server) { this.server = server; } public SolrClient getServer() { return this.server; } @Override public void commit(final boolean softCommit) { if (this.server == null) return; synchronized (this.server) { try { this.server.commit(true, true, softCommit); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM //Log.logException(e); } } } /** * force an explicit merge of segments * @param maxSegments the maximum number of segments. Set to 1 for maximum optimization */ @Override public void optimize(int maxSegments) { if (this.server == null) return; synchronized (this.server) { try { //this.server.optimize(true, true, maxSegments); new UpdateRequest().setAction(UpdateRequest.ACTION.OPTIMIZE, true, true, maxSegments, true).process(this.server); // this includes a 'true' for expungeDelete } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM ConcurrentLog.logException(e); } } } @Override public boolean isClosed() { return this.server == null; // we cannot now this exactly when server != null, because SolrServer does not provide a method to test the close status } @Override public void close() { if (this.server == null) return; try { if (this.server instanceof EmbeddedSolrServer) { synchronized (this.server) { this.server.commit(true, true, false); } } synchronized (this.server) { this.server.close(); // if the server is embedded, resources are freed, if it is a HttpSolrServer, only the httpclient is shut down, not the remote server } this.server = null; } catch (final Throwable e) { ConcurrentLog.logException(e); } } /** * delete everything in the solr index * @throws IOException */ @Override public void clear() throws IOException { if (this.server == null) return; synchronized (this.server) { try { this.server.deleteByQuery(AbstractSolrConnector.CATCHALL_QUERY); this.server.commit(true, true, false); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM throw new IOException(e); } } } @Override public void deleteById(final String id) throws IOException { if (this.server == null) return; synchronized (this.server) { try { this.server.deleteById(id, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM throw new IOException(e); } } } @Override public void deleteByIds(final Collection<String> ids) throws IOException { if (this.server == null) return; List<String> l = new ArrayList<String>(); for (String s: ids) l.add(s); synchronized (this.server) { try { this.server.deleteById(l, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM throw new IOException(e); } } } /** * delete entries from solr according the given solr query string * @param id the url hash of the entry * @throws IOException */ @Override public void deleteByQuery(final String querystring) throws IOException { if (this.server == null) return; synchronized (this.server) { try { this.server.deleteByQuery(querystring, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM throw new IOException(e); } } } public void add(final File file, final String solrId) throws IOException { final ContentStreamUpdateRequest up = new ContentStreamUpdateRequest("/update/extract"); up.addFile(file, "application/octet-stream"); up.setParam("literal.id", solrId); up.setParam("uprefix", "attr_"); up.setParam("fmap.content", "attr_content"); up.setCommitWithin(-1); //up.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); try { this.server.request(up); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM throw new IOException(e); } } @Override public void add(final SolrInputDocument solrdoc) throws IOException, SolrException { if (this.server == null) return; if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict" synchronized (this.server) { try { this.server.add(solrdoc, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM ConcurrentLog.logException(e); // catches "version conflict for": try this again and delete the document in advance /* // with possible partial update docs, don't try to delete index doc and reinsert solrdoc // as this would result in a index doc with just the updated fields try { this.server.deleteById((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); } catch (final SolrServerException e1) { ConcurrentLog.logException(e1); }*/ try { this.server.add(solrdoc, -1); } catch (final Throwable ee) { ConcurrentLog.logException(ee); try { this.server.commit(); } catch (final Throwable eee) { ConcurrentLog.logException(eee); // a time-out may occur here } try { this.server.add(solrdoc, -1); } catch (final Throwable eee) { ConcurrentLog.logException(eee); throw new IOException(eee); } } } } } @Override public void add(final Collection<SolrInputDocument> solrdocs) throws IOException, SolrException { if (this.server == null) return; for (SolrInputDocument solrdoc : solrdocs) { if (solrdoc.containsKey("_version_")) solrdoc.setField("_version_",0L); // prevent Solr "version conflict" } synchronized (this.server) { try { this.server.add(solrdocs, -1); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM ConcurrentLog.logException(e); // catches "version conflict for": try this again and delete the document in advance /* // with possible partial update docs, don't try to delete index doc and reinsert solrdoc // as this would result in a index doc with just the updated fields List<String> ids = new ArrayList<String>(); for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); try { this.server.deleteById(ids); } catch (final SolrServerException e1) { ConcurrentLog.logException(e1); }*/ try { this.server.commit(); } catch (final Throwable eee) { ConcurrentLog.logException(eee); // a time-out may occur here } try { this.server.add(solrdocs, -1); } catch (final Throwable ee) { ConcurrentLog.logException(ee); List<String> ids = new ArrayList<String>(); for (SolrInputDocument solrdoc : solrdocs) ids.add((String) solrdoc.getFieldValue(CollectionSchema.id.getSolrFieldName())); log.warn(e.getMessage() + " IDs=" + ids.toString()); throw new IOException(ee); } } } } /** * get the solr document list from a query response * This differs from getResponseByParams in such a way that it does only create the fields of the response but * never search snippets and there are also no facets generated. * @param params * @return * @throws IOException * @throws SolrException */ @Override public SolrDocumentList getDocumentListByParams(ModifiableSolrParams params) throws IOException { if (this.server == null) throw new IOException("server disconnected"); // during the solr query we set the thread name to the query string to get more debugging info in thread dumps String q = params.get(CommonParams.Q); String fq = params.get(CommonParams.FQ); String sort = params.get(CommonParams.SORT); String fl = params.get(CommonParams.FL); String threadname = Thread.currentThread().getName(); QueryResponse rsp; int retry = 0; Throwable error = null; while (retry++ < 10) { try { if (q != null) Thread.currentThread().setName("solr query: q = " + q + (fq == null ? "" : ", fq = " + fq) + (sort == null ? "" : ", sort = " + sort) + "; retry = " + retry + "; fl = " + fl); // for debugging in Threaddump rsp = this.server.query(params); if (q != null) Thread.currentThread().setName(threadname); if (rsp != null) if (log.isFine()) log.fine(rsp.getResults().getNumFound() + " results for q=" + q); return rsp.getResults(); } catch (final SolrServerException e) { error = e; } catch (final Throwable e) { error = e; clearCaches(); // prevent further OOM if this was caused by OOM } ConcurrentLog.severe("SolrServerConnector", "Failed to query remote Solr: " + error.getMessage() + ", query:" + q + (fq == null ? "" : ", fq = " + fq)); try {Thread.sleep(1000);} catch (InterruptedException e) {} } throw new IOException("Error executing query", error); } // luke requests: these do not work for attached SolrCloud Server public Collection<FieldInfo> getFields() throws SolrServerException { // get all fields contained in index return getIndexBrowser(false).getFieldInfo().values(); } /** * get the number of segments. * @return the number of segments, or 0 if unknown */ @Override public int getSegmentCount() { if (this.server == null) return 0; try { LukeResponse lukeResponse = getIndexBrowser(false); NamedList<Object> info = lukeResponse.getIndexInfo(); if (info == null) return 0; Integer segmentCount = (Integer) info.get("segmentCount"); if (segmentCount == null) return 1; return segmentCount.intValue(); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM log.warn(e); return 0; } } private int useluke = 0; // 3-value logic: 1=yes, -1=no, 0=dontknow @Override public long getSize() { if (this.server == null) return 0; if (this.server instanceof ServerShard) { // the server can be a single shard; we don't know here // to test that, we submit requests to bots variants if (useluke == 1) return getSizeLukeRequest(); if (useluke == -1) return getSizeQueryRequest(); long ls = getSizeLukeRequest(); long qs = getSizeQueryRequest(); if (ls == 0 && qs == 0) { // we don't know if this is caused by an error or not; don't change the useluke value return 0; } if (ls == qs) { useluke = 1; return ls; } useluke = -1; return qs; } return getSizeLukeRequest(); } private long getSizeQueryRequest() { if (this.server == null) return 0; try { final QueryResponse rsp = getResponseByParams(AbstractSolrConnector.catchSuccessQuery); if (rsp == null) return 0; final SolrDocumentList docs = rsp.getResults(); if (docs == null) return 0; return docs.getNumFound(); } catch (final Throwable e) { log.warn(e); return 0; } } private long getSizeLukeRequest() { if (this.server == null) return 0; try { LukeResponse lukeResponse = getIndexBrowser(false); if (lukeResponse == null) return 0; Integer numDocs = lukeResponse.getNumDocs(); if (numDocs == null) return 0; return numDocs.longValue(); } catch (final Throwable e) { clearCaches(); // prevent further OOM if this was caused by OOM log.warn(e); return 0; } } private LukeResponse getIndexBrowser(final boolean showSchema) throws SolrServerException { // get all fields contained in index final LukeRequest lukeRequest = new LukeRequest(); lukeRequest.setResponseParser(new XMLResponseParser()); lukeRequest.setNumTerms(0); lukeRequest.setShowSchema(showSchema); LukeResponse lukeResponse = null; try { lukeResponse = lukeRequest.process(this.server); } catch (IOException e) { throw new SolrServerException(e.getMessage()); } return lukeResponse; } }