diff options
-rw-r--r-- | templates/search.html | 22 | ||||
-rw-r--r-- | web.py | 43 |
2 files changed, 46 insertions, 19 deletions
diff --git a/templates/search.html b/templates/search.html index b99c053..490cf75 100644 --- a/templates/search.html +++ b/templates/search.html @@ -34,10 +34,20 @@ </script> {% endblock %} {% block content %} - {% if matches %} - Matched {{ matches.__len__() }} Book{% if matches.__len__() > 1 %}s{% endif %} - {% for docnum, matches in matches.items() %} - <h2 asd="foo"> <a href="{{ url_for("do_book_file", docnum=docnum) }}">book: {{ docnum }}</a> </h2> + {% if books %} + Matched {{ books.__len__() }} Book{% if books.__len__() > 1 %}s{% endif %} + {% for docnum, book in books.items() %} + <h2 asd="foo"> + {{ book['filename'] }} + {% if book['title'] %} + (<small> + {{ book['title'] }} + </small>) + {% endif %} + <a href="{{ url_for("do_book_file", docnum=docnum) }}" title="Download {{docnum}}"> + ↸ + </a> + </h2> <div class="book"> <div class="c_frontpage"> <img class="frontpage" src="{{ url_for("do_book_frontpage", docnum=docnum) }}"/> @@ -47,7 +57,7 @@ </div> <div class="c_matches"> <div class="matches"> - {% for match in matches %} + {% for match in book['matches'] %} <h3> <a href="#">Match at page {{ match[2] }}</a> </h3> @@ -57,7 +67,7 @@ <div class="toolbar"> <a href="{{ url_for("do_page_file", docnum=match[0]) }}">Download page as PDF</a> - - <a href="{{ url_for("do_page_image", docnum=match[0]) }}">Download page as Image</a> + <a href="{{ url_for("do_page_image", docnum=match[0], size=1500) }}">Download page as Image</a> </div> <div class="excerpt"></div> </div> @@ -33,7 +33,13 @@ def do_book_file(docnum): return r -def pdf_to_image(filepath, page): +def pdf_to_image(filepath, page, size): + if not type(size) == int or size < 1 or size > 2000: + size = 260 + + density = 60 + 0.15 * size + app.logger.debug("Convert PDF to image page={0} size={1} density={2} filepath={3}".format(page, size, density, filepath)) + def generator(process, input): input.seek(0) while not process.stdin.closed: @@ -41,35 +47,38 @@ def pdf_to_image(filepath, page): if stdout: yield stdout pdffile = StringIO() - page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page-1) + page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page) out = pyPdf.PdfFileWriter() out.addPage(page) out.write(pdffile) - process = subprocess.Popen(["/usr/bin/convert", "-resize", "260x", "pdf:-", "jpeg:-"], + process = subprocess.Popen(["/usr/bin/convert", "-density", "{0}".format(density), "-resize", "{0}x".format(size), "pdf:-", "jpeg:-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg") @app.route("/page/image/<int:docnum>", methods=["GET"]) -def do_page_image(docnum): +@app.route("/page/image/<int:docnum>/<int:size>", methods=["GET"]) +def do_page_image(docnum,size=260): with index_book.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], document['pagenumber']) + page = document['pagenumber'] - 1 + return pdf_to_image(document['path'], page, size=size) @app.route("/book/frontpage/<int:docnum>", methods=["GET"]) def do_book_frontpage(docnum): with index_metadata.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], 0) + return pdf_to_image(document['path'], 0, 260) @app.route("/page/file/<int:docnum>", methods=["GET"]) def do_page_file(docnum): with index_book.reader() as reader: document = reader.stored_fields(docnum) filepath = document['path'] - page = document['pagenumber'] + page = document['pagenumber'] - 1 + app.logger.debug("Extract page={0} from filepath={1}".format(page, filepath) ) inputfile = pyPdf.PdfFileReader(file(filepath, 'r')) - pdfpage = inputfile.getPage(page-1) + pdfpage = inputfile.getPage(page) outbuf = StringIO() outfile = pyPdf.PdfFileWriter() outfile.addPage(pdfpage) @@ -112,7 +121,7 @@ def excerpt(docnum, term): analysis.StandardAnalyzer(), highlight.SimpleFragmenter(), MyHtmlFormatter()) - return unicode( excerpt ) + return unicode( excerpt ) @app.route("/search/<path:term>",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -127,19 +136,27 @@ def do_search(term=None): categories = facets.categorize(results) searcher.close() - matches = {} + books = {} with index_book.reader() as reader: for book in categories.items(): filepath = book[0] with index_metadata.searcher() as searcher: docnum = searcher.document_number(path=filepath) - matches[docnum] = [] + with index_metadata.reader() as reader2: + title = reader2.stored_fields(docnum).get('title') + books[docnum] = { + 'matches' : [], + 'title':title, + 'filename' : os.path.basename(filepath), + } for match in book[1]: pagenumber = reader.stored_fields(match[0])['pagenumber'] match = (match[0], match[1], pagenumber) - matches[docnum].append(match) + books[docnum]['matches'].append(match) - return flask.render_template('search.html', matches=matches, term=term) + return flask.render_template('search.html', + books=books, + term=term) def log_response(sender, response): sender.logger.debug('Request context is about to close down. ' |