From cc80a271ba5b2b11a2c92883fbf5734fd3420aca Mon Sep 17 00:00:00 2001 From: yvesf Date: Wed, 24 Nov 2010 17:30:53 +0100 Subject: =?UTF-8?q?sch=C3=B6neres=20template?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- templates/search.html | 22 ++++++++++++++++------ web.py | 43 ++++++++++++++++++++++++++++++------------- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/templates/search.html b/templates/search.html index b99c053..490cf75 100644 --- a/templates/search.html +++ b/templates/search.html @@ -34,10 +34,20 @@ {% endblock %} {% block content %} - {% if matches %} - Matched {{ matches.__len__() }} Book{% if matches.__len__() > 1 %}s{% endif %} - {% for docnum, matches in matches.items() %} -

book: {{ docnum }}

+ {% if books %} + Matched {{ books.__len__() }} Book{% if books.__len__() > 1 %}s{% endif %} + {% for docnum, book in books.items() %} +

+ {{ book['filename'] }} + {% if book['title'] %} + ( + {{ book['title'] }} + ) + {% endif %} + + ↸ + +

@@ -47,7 +57,7 @@
- {% for match in matches %} + {% for match in book['matches'] %}

Match at page {{ match[2] }}

@@ -57,7 +67,7 @@
diff --git a/web.py b/web.py index 858019d..d1e74b7 100644 --- a/web.py +++ b/web.py @@ -33,7 +33,13 @@ def do_book_file(docnum): return r -def pdf_to_image(filepath, page): +def pdf_to_image(filepath, page, size): + if not type(size) == int or size < 1 or size > 2000: + size = 260 + + density = 60 + 0.15 * size + app.logger.debug("Convert PDF to image page={0} size={1} density={2} filepath={3}".format(page, size, density, filepath)) + def generator(process, input): input.seek(0) while not process.stdin.closed: @@ -41,35 +47,38 @@ def pdf_to_image(filepath, page): if stdout: yield stdout pdffile = StringIO() - page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page-1) + page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page) out = pyPdf.PdfFileWriter() out.addPage(page) out.write(pdffile) - process = subprocess.Popen(["/usr/bin/convert", "-resize", "260x", "pdf:-", "jpeg:-"], + process = subprocess.Popen(["/usr/bin/convert", "-density", "{0}".format(density), "-resize", "{0}x".format(size), "pdf:-", "jpeg:-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg") @app.route("/page/image/", methods=["GET"]) -def do_page_image(docnum): +@app.route("/page/image//", methods=["GET"]) +def do_page_image(docnum,size=260): with index_book.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], document['pagenumber']) + page = document['pagenumber'] - 1 + return pdf_to_image(document['path'], page, size=size) @app.route("/book/frontpage/", methods=["GET"]) def do_book_frontpage(docnum): with index_metadata.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], 0) + return pdf_to_image(document['path'], 0, 260) @app.route("/page/file/", methods=["GET"]) def do_page_file(docnum): with index_book.reader() as reader: document = reader.stored_fields(docnum) filepath = document['path'] - page = document['pagenumber'] + page = document['pagenumber'] - 1 + app.logger.debug("Extract page={0} from filepath={1}".format(page, filepath) ) inputfile = pyPdf.PdfFileReader(file(filepath, 'r')) - pdfpage = inputfile.getPage(page-1) + pdfpage = inputfile.getPage(page) outbuf = StringIO() outfile = pyPdf.PdfFileWriter() outfile.addPage(pdfpage) @@ -112,7 +121,7 @@ def excerpt(docnum, term): analysis.StandardAnalyzer(), highlight.SimpleFragmenter(), MyHtmlFormatter()) - return unicode( excerpt ) + return unicode( excerpt ) @app.route("/search/",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -127,19 +136,27 @@ def do_search(term=None): categories = facets.categorize(results) searcher.close() - matches = {} + books = {} with index_book.reader() as reader: for book in categories.items(): filepath = book[0] with index_metadata.searcher() as searcher: docnum = searcher.document_number(path=filepath) - matches[docnum] = [] + with index_metadata.reader() as reader2: + title = reader2.stored_fields(docnum).get('title') + books[docnum] = { + 'matches' : [], + 'title':title, + 'filename' : os.path.basename(filepath), + } for match in book[1]: pagenumber = reader.stored_fields(match[0])['pagenumber'] match = (match[0], match[1], pagenumber) - matches[docnum].append(match) + books[docnum]['matches'].append(match) - return flask.render_template('search.html', matches=matches, term=term) + return flask.render_template('search.html', + books=books, + term=term) def log_response(sender, response): sender.logger.debug('Request context is about to close down. ' -- cgit v1.2.1