ergebnisliste unbefriedigend

author: yvesf <yvesf-git@xapek.org> 2010-11-20 18:20:32 +0100
committer: yvesf <yvesf-git@xapek.org> 2010-11-20 18:20:32 +0100
commit: f78911646cf53e37c47921f9dcd9702d6e946f54 (patch)
tree: 013c97eeae02c9abfdfa205ff4e6258a120464a0 /web.py
parent: d66c0d42b678408921f5c3cb43c18df5b1f87338 (diff)
download: booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.tar.gz
booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.zip
1 files changed, 38 insertions, 15 deletions
diff --git a/web.py b/web.py
index b262e1d..b5494c6 100644
--- a/web.py
+++ b/web.py
@@ -28,22 +28,27 @@ def do_download_file(docnum):
         filename=os.path.basename(document['path']))
     return r
 
-@app.route("/download/page/<int:docnum>", methods=["GET"])
-def do_download_page(docnum):
+@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"])
+def do_download_page(docnum,page):
     document = index.reader().stored_fields(docnum)
     inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
-    page = inputfile.getPage(document['pagenumber'])
+    page = inputfile.getPage(page)
     outbuf = StringIO()
     outfile = pyPdf.PdfFileWriter()
     outfile.addPage(page)
     outfile.write(outbuf)
     outbuf.seek(0)
-    r= werkzeug.Response(outbuf, mimetype="application/pdf")
+    r = werkzeug.Response(outbuf, mimetype="application/pdf")
     client_filename = os.path.basename(document['path'])[:-3]
     client_filename += u".Page-{0}".format(document['pagenumber'])
     r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
     return r
 
+class MyHtmlFormatter(highlight.HtmlFormatter):
+    def _format_fragment(self, text, fragment, seen):
+        text = unicode( flask.Markup.escape(text) )
+        return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen)
+
 @app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
 @app.route("/search/<path:term>",methods=["GET"])
 @app.route("/search/", methods=["GET"])
@@ -52,23 +57,41 @@ def do_search(skip=0,term=None):
         return flask.render_template('search.html', objects=[], term="", skip=0)
 
     query = QueryParser("content").parse(term)
-    results = searcher.search(query, limit=skip+5)
+    results = searcher.search(query, limit=1001, sortedby="path")
 
     terms = [text for fieldname, text in query.all_terms()
                     if fieldname == "content"]
-    objects = []
-    for result in results[skip:skip+5]:
+
+    matches = []
+    for result in results:
         title = result.get("title")
         path = result.get("path")
-        print path
-        high = highlight.highlight(result.get("content"), 
-            terms, 
-            analysis.StandardAnalyzer(),
-            highlight.SimpleFragmenter(),
-            highlight.HtmlFormatter())
-        objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum })
+        excerpt = highlight.highlight(result.get("content"), 
+                    terms, 
+                    analysis.StandardAnalyzer(),
+                    highlight.SimpleFragmenter(),
+                    MyHtmlFormatter())
+        matches.append( {'path':result.get('path'),
+                         'excerpt':excerpt, 
+                         'docnum':result.docnum, 
+                         'pagenumber':result.get('pagenumber'), 
+                         'title':title })
+    match_groups = {}
+    for match in matches:
+        print match
+        if not match_groups.has_key(match['path']):
+            match_groups[match['path']] = { 'matches':[],
+                                            'title' : match['title'],
+                                            'first_docnum' : match['docnum'],
+                                            'filename':os.path.basename(match['path']) }
+        match_groups[ match['path'] ]['matches'].append(
+            { 'excerpt':match['excerpt'],
+              'docnum':match['docnum'],
+              'pagenumber':match['pagenumber'] })
 
-    return flask.render_template('search.html', objects=objects, term=term, skip=skip)
+    objects = match_groups.values()[skip:skip+5]
+    return flask.render_template('search.html', 
+        match_groups=objects, term=term, skip=skip, resultlen=len(results))
 
 if __name__ == "__main__":
     app.debug = True
author	yvesf <yvesf-git@xapek.org>	2010-11-20 18:20:32 +0100
committer	yvesf <yvesf-git@xapek.org>	2010-11-20 18:20:32 +0100
commit	f78911646cf53e37c47921f9dcd9702d6e946f54 (patch)
tree	013c97eeae02c9abfdfa205ff4e6258a120464a0 /web.py
parent	d66c0d42b678408921f5c3cb43c18df5b1f87338 (diff)
download	booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.tar.gz booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.zip