summaryrefslogtreecommitdiff
path: root/web.py
diff options
context:
space:
mode:
authoryvesf <yvesf-git@xapek.org>2010-11-20 18:20:32 +0100
committeryvesf <yvesf-git@xapek.org>2010-11-20 18:20:32 +0100
commitf78911646cf53e37c47921f9dcd9702d6e946f54 (patch)
tree013c97eeae02c9abfdfa205ff4e6258a120464a0 /web.py
parentd66c0d42b678408921f5c3cb43c18df5b1f87338 (diff)
downloadbooksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.tar.gz
booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.zip
ergebnisliste unbefriedigend
Diffstat (limited to 'web.py')
-rw-r--r--web.py53
1 files changed, 38 insertions, 15 deletions
diff --git a/web.py b/web.py
index b262e1d..b5494c6 100644
--- a/web.py
+++ b/web.py
@@ -28,22 +28,27 @@ def do_download_file(docnum):
filename=os.path.basename(document['path']))
return r
-@app.route("/download/page/<int:docnum>", methods=["GET"])
-def do_download_page(docnum):
+@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"])
+def do_download_page(docnum,page):
document = index.reader().stored_fields(docnum)
inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
- page = inputfile.getPage(document['pagenumber'])
+ page = inputfile.getPage(page)
outbuf = StringIO()
outfile = pyPdf.PdfFileWriter()
outfile.addPage(page)
outfile.write(outbuf)
outbuf.seek(0)
- r= werkzeug.Response(outbuf, mimetype="application/pdf")
+ r = werkzeug.Response(outbuf, mimetype="application/pdf")
client_filename = os.path.basename(document['path'])[:-3]
client_filename += u".Page-{0}".format(document['pagenumber'])
r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
return r
+class MyHtmlFormatter(highlight.HtmlFormatter):
+ def _format_fragment(self, text, fragment, seen):
+ text = unicode( flask.Markup.escape(text) )
+ return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen)
+
@app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
@app.route("/search/<path:term>",methods=["GET"])
@app.route("/search/", methods=["GET"])
@@ -52,23 +57,41 @@ def do_search(skip=0,term=None):
return flask.render_template('search.html', objects=[], term="", skip=0)
query = QueryParser("content").parse(term)
- results = searcher.search(query, limit=skip+5)
+ results = searcher.search(query, limit=1001, sortedby="path")
terms = [text for fieldname, text in query.all_terms()
if fieldname == "content"]
- objects = []
- for result in results[skip:skip+5]:
+
+ matches = []
+ for result in results:
title = result.get("title")
path = result.get("path")
- print path
- high = highlight.highlight(result.get("content"),
- terms,
- analysis.StandardAnalyzer(),
- highlight.SimpleFragmenter(),
- highlight.HtmlFormatter())
- objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum })
+ excerpt = highlight.highlight(result.get("content"),
+ terms,
+ analysis.StandardAnalyzer(),
+ highlight.SimpleFragmenter(),
+ MyHtmlFormatter())
+ matches.append( {'path':result.get('path'),
+ 'excerpt':excerpt,
+ 'docnum':result.docnum,
+ 'pagenumber':result.get('pagenumber'),
+ 'title':title })
+ match_groups = {}
+ for match in matches:
+ print match
+ if not match_groups.has_key(match['path']):
+ match_groups[match['path']] = { 'matches':[],
+ 'title' : match['title'],
+ 'first_docnum' : match['docnum'],
+ 'filename':os.path.basename(match['path']) }
+ match_groups[ match['path'] ]['matches'].append(
+ { 'excerpt':match['excerpt'],
+ 'docnum':match['docnum'],
+ 'pagenumber':match['pagenumber'] })
- return flask.render_template('search.html', objects=objects, term=term, skip=skip)
+ objects = match_groups.values()[skip:skip+5]
+ return flask.render_template('search.html',
+ match_groups=objects, term=term, skip=skip, resultlen=len(results))
if __name__ == "__main__":
app.debug = True