diff options
author | yvesf <yvesf-git@xapek.org> | 2010-11-20 20:24:19 +0100 |
---|---|---|
committer | yvesf <yvesf-git@xapek.org> | 2010-11-20 20:24:19 +0100 |
commit | f87201ef681eb3e94cb570c5c83a22d2e8b269d2 (patch) | |
tree | 8c98487b84b9656bbf6d4160e619cfbfc168ce6f /web.py | |
parent | f78911646cf53e37c47921f9dcd9702d6e946f54 (diff) | |
download | booksearch-f87201ef681eb3e94cb570c5c83a22d2e8b269d2.tar.gz booksearch-f87201ef681eb3e94cb570c5c83a22d2e8b269d2.zip |
web broken; indexer process Pool
Diffstat (limited to 'web.py')
-rw-r--r-- | web.py | 37 |
1 files changed, 36 insertions, 1 deletions
@@ -1,12 +1,15 @@ #!/usr/bin/python2.6 # coding: utf-8 import os +import json from StringIO import StringIO from whoosh.index import open_dir from whoosh.qparser import QueryParser +import whoosh.searching as searching import whoosh.fields as fields import whoosh.analysis as analysis import whoosh.highlight as highlight +import whoosh.query as query import flask import pyPdf import werkzeug @@ -49,6 +52,29 @@ class MyHtmlFormatter(highlight.HtmlFormatter): text = unicode( flask.Markup.escape(text) ) return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen) +@app.route("/json/excerpt/<int:docnum>/<path:term>", methods=["GET"]) +def json_excerpt(docnum, term): + searcher = index.searcher() + + def generator(q): + for result in searcher.search(q, limit=1, sortedby="pagenumber"): + terms = [ text for fieldname, text in q.all_terms() + if fieldname == "content" ] + excerpt = highlight.highlight(result.get("content"), + terms, + analysis.FancyAnalyzer(), + highlight.SimpleFragmenter(), + MyHtmlFormatter()) + + yield json.dumps( { 'pagenumber':result.get("pagenumber"), + 'excerpt':excerpt, + 'docnum':result.docnum} ) + + document = searcher.reader().stored_fields(docnum) + q = QueryParser("content").parse(term) + q = query.And([ q, query.Term("path", document['path']) ] ) + return werkzeug.Response( generator(q) ) + @app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"]) @app.route("/search/<path:term>",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -57,8 +83,16 @@ def do_search(skip=0,term=None): return flask.render_template('search.html', objects=[], term="", skip=0) query = QueryParser("content").parse(term) - results = searcher.search(query, limit=1001, sortedby="path") + facets = searching.Facets.from_field(searcher, "path") + results = searcher.search(query, limit=None, sortedby="path") + categories = {} + for key, value in facets.categorize(results).items(): + categories[key] = map(lambda v: v[0], value) + + print searcher.key_terms(categories[categories.keys()[0]], "content") + return flask.jsonify(categories) + """ terms = [text for fieldname, text in query.all_terms() if fieldname == "content"] @@ -92,6 +126,7 @@ def do_search(skip=0,term=None): objects = match_groups.values()[skip:skip+5] return flask.render_template('search.html', match_groups=objects, term=term, skip=skip, resultlen=len(results)) + """ if __name__ == "__main__": app.debug = True |