%PDF- %PDF-
Direktori : /home/waritko/yacy/source/net/yacy/cora/federate/solr/responsewriter/ |
Current File : //home/waritko/yacy/source/net/yacy/cora/federate/solr/responsewriter/GrepHTMLResponseWriter.java |
/** * GrepHTMLResponseWriter * Copyright 2013 by Michael Peter Christen * First released 09.06.2013 at http://yacy.net * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.cora.federate.solr.responsewriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Set; import java.util.regex.Pattern; import org.apache.lucene.document.Document; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.XML; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.QueryResponseWriter; import org.apache.solr.response.ResultContext; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.ReturnFields; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrReturnFields; import net.yacy.document.SentenceReader; import net.yacy.search.schema.CollectionSchema; /** * this response writer shows a list of documents with the lines containing matches * of the search request in 'grep-style', which means it is like doing a grep on a set * of files. Within the result list, the document is splitted into the sentences of the * text part and each sentence is shown as separate line. grep attributes can be used to * show leading and trainling lines. */ public class GrepHTMLResponseWriter implements QueryResponseWriter, SolrjResponseWriter { private static final Set<String> DEFAULT_FIELD_LIST = new HashSet<>(); private static final Pattern dqp = Pattern.compile("\""); static { DEFAULT_FIELD_LIST.add(CollectionSchema.id.getSolrFieldName()); DEFAULT_FIELD_LIST.add(CollectionSchema.sku.getSolrFieldName()); DEFAULT_FIELD_LIST.add(CollectionSchema.title.getSolrFieldName()); DEFAULT_FIELD_LIST.add(CollectionSchema.text_t.getSolrFieldName()); } @Override public String getContentType(final SolrQueryRequest request, final SolrQueryResponse response) { return "text/html"; } @Override public void init(@SuppressWarnings("rawtypes") NamedList n) { } @Override public void write(final Writer writer, final SolrQueryRequest request, final SolrQueryResponse rsp) throws IOException { writeHtmlHead(writer); final SolrParams params = request.getOriginalParams(); final String query = getQueryParam(params); final String grep = getGrepParam(params, query); final Object responseObj = rsp.getResponse(); if(responseObj instanceof SolrDocumentList) { /* * The response object can be a SolrDocumentList when the response is partial, * for example when the allowed processing time has been exceeded */ final SolrDocumentList docList = ((SolrDocumentList)responseObj); writeSolrDocumentList(writer, params, query, grep, docList); } else if(responseObj instanceof ResultContext) { /* Regular response object */ final DocList documents = ((ResultContext)responseObj).getDocList(); final int sz = documents.size(); if (sz > 0) { final SolrIndexSearcher searcher = request.getSearcher(); final DocIterator iterator = documents.iterator(); final IndexSchema schema = request.getSchema(); writeTitleAndHeadeing(writer, grep, query); writeApiLink(writer, params); for (int i = 0; i < sz; i++) { int id = iterator.nextDoc(); final Document doc = searcher.doc(id, DEFAULT_FIELD_LIST); final LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(schema, doc); final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName()); final String title = tdoc.get(CollectionSchema.title.getSolrFieldName()); final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName()); final ArrayList<String> sentences = extractSentences(title, text); writeDoc(writer, sku, sentences, grep); } } else { writer.write("<title>No Document Found</title>\n</head><body>\n"); } } else { writer.write("<title>Unable to process Solr response</title>\n</head><body>\n"); } writer.write("</body></html>\n"); } /** * Process the solr documents list and append a representation to the output writer. * @param writer an open output writer. Must not be null. * @param params the original Solr parameters * @param query the query parameter value * @param grep the grep parameter value * @param docList the solr documents list * @throws IOException when a write error occurred */ private void writeSolrDocumentList(final Writer writer, final SolrParams params, final String query, final String grep, final SolrDocumentList docList) throws IOException { if (docList == null || docList.isEmpty()) { writer.write("<title>No Document Found</title>\n</head><body>\n"); } else { writeTitleAndHeadeing(writer, grep, query); writeApiLink(writer, params); final ReturnFields fieldsToReturn = new SolrReturnFields(); for (final SolrDocument doc : docList) { final LinkedHashMap<String, String> tdoc = HTMLResponseWriter.translateDoc(doc, fieldsToReturn); final String sku = tdoc.get(CollectionSchema.sku.getSolrFieldName()); final String title = tdoc.get(CollectionSchema.title.getSolrFieldName()); final String text = tdoc.get(CollectionSchema.text_t.getSolrFieldName()); final ArrayList<String> sentences = extractSentences(title, text); writeDoc(writer, sku, sentences, grep); } } } /** * Write the html header beginning * @param writer an open output writer * @throws IOException when a write error occurred */ private void writeHtmlHead(final Writer writer) throws IOException { writer.write("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n"); writer.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"); writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"all\" href=\"../env/base.css\" />\n"); writer.write("<link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"../env/style.css\" />\n"); } /** * @param params the original request parameters. Must not be null. * @param query the query parameter value * @return the grep parameter value */ private String getGrepParam(final SolrParams params, String query) { String grep = params.get("grep"); if (grep == null) { if(query.length() > 0) { grep = query; } else { grep = ""; } } if (grep.length() > 0) { if (grep.charAt(0) == '"') { grep = grep.substring(1); } if (grep.charAt(grep.length() - 1) == '"') { grep = grep.substring(0, grep.length() - 1); } } return grep; } /** * @param params the original request parameters. Must not be null. * @return the query parameter value */ private String getQueryParam(final SolrParams params) { final String q = params.get(CommonParams.Q, ""); String query = ""; int p = q.indexOf(':'); if (p >= 0) { int r = q.charAt(p + 1) == '"' ? q.indexOf(p + 2, '"') : q.indexOf(' '); if (r < 0) { r = q.length(); } query = q.substring(p + 1, r); if (query.length() > 0) { if (query.charAt(0) == '"') { query = query.substring(1); } if (query.charAt(query.length() - 1) == '"') { query = query.substring(0, query.length() - 1); } } } return query; } /** * Append the response title and level 1 html heading * @param writer an open output writer. Must not be null. * @param grep the grep phrase * @param query the search query * @throws IOException when a write error occurred */ private void writeTitleAndHeadeing(final Writer writer, final String grep, final String query) throws IOException { final String h1 = "Document Grep for query \"" + query + "\" and grep phrase \"" + grep + "\""; writer.write("<title>" + h1 + "</title>\n</head><body>\n<h1>" + h1 + "</h1>\n"); } /** * Append a link to the related Solr api * @param writer an open output writer. Must not be null. * @param solrParams the original request parameters. Must not be null. * @throws IOException when a write error occurred */ private void writeApiLink(final Writer writer, final SolrParams solrParams) throws IOException { final NamedList<Object> paramsList = solrParams.toNamedList(); paramsList.remove("wt"); @SuppressWarnings("deprecation") String xmlquery = dqp.matcher("select?" + SolrParams.toSolrParams(paramsList).toString()).replaceAll("%22"); writer.write("<div id=\"api\"><a href=\"" + xmlquery + "\"><img src=\"../env/grafics/api.png\" width=\"60\" height=\"40\" alt=\"API\" /></a>\n"); writer.write("<span>This search result can also be retrieved as XML. Click the API icon to see an example call to the search rss API.</span></div>\n"); } /** * @param title * @param text * @return a list of sentences extracted from the given document text and title */ private ArrayList<String> extractSentences(final String title, final String text) { final ArrayList<String> sentences = new ArrayList<>(); if (title != null) { sentences.add(title); } if(text != null) { final SentenceReader sr = new SentenceReader(text); StringBuilder line; while (sr.hasNext()) { line = sr.next(); if (line.length() > 0) { sentences.add(line.toString()); } } } return sentences; } @Override public void write(Writer writer, SolrQueryRequest request, String coreName, QueryResponse rsp) throws IOException { writeHtmlHead(writer); final SolrParams params = request.getOriginalParams(); final String query = getQueryParam(params); final String grep = getGrepParam(params, query); writeSolrDocumentList(writer, params, query, grep, rsp.getResults()); writer.write("</body></html>\n"); } private static final void writeDoc(Writer writer, String url, ArrayList<String> sentences, String grep) throws IOException { writer.write("<form name=\"yacydoc" + url + "\" method=\"post\" action=\"#\" enctype=\"multipart/form-data\" accept-charset=\"UTF-8\">\n"); writer.write("<fieldset>\n"); writer.write("<h1><a href=\"" + url + "\">" + url + "</a></h1>\n"); writer.write("<dl>\n"); int c = 0; for (String line: sentences) { if (grep != null && grep.length() > 0 && line.indexOf(grep) < 0) continue; writer.write("<dt>"); if (c++ == 0) { if (grep == null || grep.length() == 0) writer.write("all lines in document"); else {writer.write("matches for grep phrase \"");writer.write(grep);writer.write("\"");} } writer.write("</dt>"); writedd(writer, line, grep); } writer.write("</dl>\n"); writer.write("</fieldset>\n"); writer.write("</form>\n"); } private static void writedd(Writer writer, String line, String grep) throws IOException { writer.write("<dd><a href=\"select?q=text_t:%22"); XML.escapeAttributeValue(line, writer); writer.write("%22&rows=100&grep=%22"); XML.escapeAttributeValue(grep, writer); writer.write("%22&wt=grephtml\">"); XML.escapeAttributeValue(line, writer); writer.write("</a></dd>\n"); } }