summaryrefslogtreecommitdiff
path: root/web.py
diff options
context:
space:
mode:
Diffstat (limited to 'web.py')
-rw-r--r--web.py37
1 files changed, 36 insertions, 1 deletions
diff --git a/web.py b/web.py
index b5494c6..38a7dbb 100644
--- a/web.py
+++ b/web.py
@@ -1,12 +1,15 @@
#!/usr/bin/python2.6
# coding: utf-8
import os
+import json
from StringIO import StringIO
from whoosh.index import open_dir
from whoosh.qparser import QueryParser
+import whoosh.searching as searching
import whoosh.fields as fields
import whoosh.analysis as analysis
import whoosh.highlight as highlight
+import whoosh.query as query
import flask
import pyPdf
import werkzeug
@@ -49,6 +52,29 @@ class MyHtmlFormatter(highlight.HtmlFormatter):
text = unicode( flask.Markup.escape(text) )
return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen)
+@app.route("/json/excerpt/<int:docnum>/<path:term>", methods=["GET"])
+def json_excerpt(docnum, term):
+ searcher = index.searcher()
+
+ def generator(q):
+ for result in searcher.search(q, limit=1, sortedby="pagenumber"):
+ terms = [ text for fieldname, text in q.all_terms()
+ if fieldname == "content" ]
+ excerpt = highlight.highlight(result.get("content"),
+ terms,
+ analysis.FancyAnalyzer(),
+ highlight.SimpleFragmenter(),
+ MyHtmlFormatter())
+
+ yield json.dumps( { 'pagenumber':result.get("pagenumber"),
+ 'excerpt':excerpt,
+ 'docnum':result.docnum} )
+
+ document = searcher.reader().stored_fields(docnum)
+ q = QueryParser("content").parse(term)
+ q = query.And([ q, query.Term("path", document['path']) ] )
+ return werkzeug.Response( generator(q) )
+
@app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
@app.route("/search/<path:term>",methods=["GET"])
@app.route("/search/", methods=["GET"])
@@ -57,8 +83,16 @@ def do_search(skip=0,term=None):
return flask.render_template('search.html', objects=[], term="", skip=0)
query = QueryParser("content").parse(term)
- results = searcher.search(query, limit=1001, sortedby="path")
+ facets = searching.Facets.from_field(searcher, "path")
+ results = searcher.search(query, limit=None, sortedby="path")
+ categories = {}
+ for key, value in facets.categorize(results).items():
+ categories[key] = map(lambda v: v[0], value)
+
+ print searcher.key_terms(categories[categories.keys()[0]], "content")
+ return flask.jsonify(categories)
+ """
terms = [text for fieldname, text in query.all_terms()
if fieldname == "content"]
@@ -92,6 +126,7 @@ def do_search(skip=0,term=None):
objects = match_groups.values()[skip:skip+5]
return flask.render_template('search.html',
match_groups=objects, term=term, skip=skip, resultlen=len(results))
+ """
if __name__ == "__main__":
app.debug = True