From f87201ef681eb3e94cb570c5c83a22d2e8b269d2 Mon Sep 17 00:00:00 2001 From: yvesf Date: Sat, 20 Nov 2010 20:24:19 +0100 Subject: web broken; indexer process Pool --- web.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'web.py') diff --git a/web.py b/web.py index b5494c6..38a7dbb 100644 --- a/web.py +++ b/web.py @@ -1,12 +1,15 @@ #!/usr/bin/python2.6 # coding: utf-8 import os +import json from StringIO import StringIO from whoosh.index import open_dir from whoosh.qparser import QueryParser +import whoosh.searching as searching import whoosh.fields as fields import whoosh.analysis as analysis import whoosh.highlight as highlight +import whoosh.query as query import flask import pyPdf import werkzeug @@ -49,6 +52,29 @@ class MyHtmlFormatter(highlight.HtmlFormatter): text = unicode( flask.Markup.escape(text) ) return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen) +@app.route("/json/excerpt//", methods=["GET"]) +def json_excerpt(docnum, term): + searcher = index.searcher() + + def generator(q): + for result in searcher.search(q, limit=1, sortedby="pagenumber"): + terms = [ text for fieldname, text in q.all_terms() + if fieldname == "content" ] + excerpt = highlight.highlight(result.get("content"), + terms, + analysis.FancyAnalyzer(), + highlight.SimpleFragmenter(), + MyHtmlFormatter()) + + yield json.dumps( { 'pagenumber':result.get("pagenumber"), + 'excerpt':excerpt, + 'docnum':result.docnum} ) + + document = searcher.reader().stored_fields(docnum) + q = QueryParser("content").parse(term) + q = query.And([ q, query.Term("path", document['path']) ] ) + return werkzeug.Response( generator(q) ) + @app.route("/search/skip=/",methods=["GET"]) @app.route("/search/",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -57,8 +83,16 @@ def do_search(skip=0,term=None): return flask.render_template('search.html', objects=[], term="", skip=0) query = QueryParser("content").parse(term) - results = searcher.search(query, limit=1001, sortedby="path") + facets = searching.Facets.from_field(searcher, "path") + results = searcher.search(query, limit=None, sortedby="path") + categories = {} + for key, value in facets.categorize(results).items(): + categories[key] = map(lambda v: v[0], value) + + print searcher.key_terms(categories[categories.keys()[0]], "content") + return flask.jsonify(categories) + """ terms = [text for fieldname, text in query.all_terms() if fieldname == "content"] @@ -92,6 +126,7 @@ def do_search(skip=0,term=None): objects = match_groups.values()[skip:skip+5] return flask.render_template('search.html', match_groups=objects, term=term, skip=skip, resultlen=len(results)) + """ if __name__ == "__main__": app.debug = True -- cgit v1.2.1