diff options
author | yvesf <yvesf-git@xapek.org> | 2010-11-20 18:20:32 +0100 |
---|---|---|
committer | yvesf <yvesf-git@xapek.org> | 2010-11-20 18:20:32 +0100 |
commit | f78911646cf53e37c47921f9dcd9702d6e946f54 (patch) | |
tree | 013c97eeae02c9abfdfa205ff4e6258a120464a0 /web.py | |
parent | d66c0d42b678408921f5c3cb43c18df5b1f87338 (diff) | |
download | booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.tar.gz booksearch-f78911646cf53e37c47921f9dcd9702d6e946f54.zip |
ergebnisliste unbefriedigend
Diffstat (limited to 'web.py')
-rw-r--r-- | web.py | 53 |
1 files changed, 38 insertions, 15 deletions
@@ -28,22 +28,27 @@ def do_download_file(docnum): filename=os.path.basename(document['path'])) return r -@app.route("/download/page/<int:docnum>", methods=["GET"]) -def do_download_page(docnum): +@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"]) +def do_download_page(docnum,page): document = index.reader().stored_fields(docnum) inputfile = pyPdf.PdfFileReader(file(document['path'], 'r')) - page = inputfile.getPage(document['pagenumber']) + page = inputfile.getPage(page) outbuf = StringIO() outfile = pyPdf.PdfFileWriter() outfile.addPage(page) outfile.write(outbuf) outbuf.seek(0) - r= werkzeug.Response(outbuf, mimetype="application/pdf") + r = werkzeug.Response(outbuf, mimetype="application/pdf") client_filename = os.path.basename(document['path'])[:-3] client_filename += u".Page-{0}".format(document['pagenumber']) r.headers.add('Content-Disposition', 'attachment', filename=client_filename) return r +class MyHtmlFormatter(highlight.HtmlFormatter): + def _format_fragment(self, text, fragment, seen): + text = unicode( flask.Markup.escape(text) ) + return highlight.HtmlFormatter._format_fragment(self, text, fragment, seen) + @app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"]) @app.route("/search/<path:term>",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -52,23 +57,41 @@ def do_search(skip=0,term=None): return flask.render_template('search.html', objects=[], term="", skip=0) query = QueryParser("content").parse(term) - results = searcher.search(query, limit=skip+5) + results = searcher.search(query, limit=1001, sortedby="path") terms = [text for fieldname, text in query.all_terms() if fieldname == "content"] - objects = [] - for result in results[skip:skip+5]: + + matches = [] + for result in results: title = result.get("title") path = result.get("path") - print path - high = highlight.highlight(result.get("content"), - terms, - analysis.StandardAnalyzer(), - highlight.SimpleFragmenter(), - highlight.HtmlFormatter()) - objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum }) + excerpt = highlight.highlight(result.get("content"), + terms, + analysis.StandardAnalyzer(), + highlight.SimpleFragmenter(), + MyHtmlFormatter()) + matches.append( {'path':result.get('path'), + 'excerpt':excerpt, + 'docnum':result.docnum, + 'pagenumber':result.get('pagenumber'), + 'title':title }) + match_groups = {} + for match in matches: + print match + if not match_groups.has_key(match['path']): + match_groups[match['path']] = { 'matches':[], + 'title' : match['title'], + 'first_docnum' : match['docnum'], + 'filename':os.path.basename(match['path']) } + match_groups[ match['path'] ]['matches'].append( + { 'excerpt':match['excerpt'], + 'docnum':match['docnum'], + 'pagenumber':match['pagenumber'] }) - return flask.render_template('search.html', objects=objects, term=term, skip=skip) + objects = match_groups.values()[skip:skip+5] + return flask.render_template('search.html', + match_groups=objects, term=term, skip=skip, resultlen=len(results)) if __name__ == "__main__": app.debug = True |