diff options
Diffstat (limited to 'web.py')
-rw-r--r-- | web.py | 43 |
1 files changed, 30 insertions, 13 deletions
@@ -33,7 +33,13 @@ def do_book_file(docnum): return r -def pdf_to_image(filepath, page): +def pdf_to_image(filepath, page, size): + if not type(size) == int or size < 1 or size > 2000: + size = 260 + + density = 60 + 0.15 * size + app.logger.debug("Convert PDF to image page={0} size={1} density={2} filepath={3}".format(page, size, density, filepath)) + def generator(process, input): input.seek(0) while not process.stdin.closed: @@ -41,35 +47,38 @@ def pdf_to_image(filepath, page): if stdout: yield stdout pdffile = StringIO() - page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page-1) + page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page) out = pyPdf.PdfFileWriter() out.addPage(page) out.write(pdffile) - process = subprocess.Popen(["/usr/bin/convert", "-resize", "260x", "pdf:-", "jpeg:-"], + process = subprocess.Popen(["/usr/bin/convert", "-density", "{0}".format(density), "-resize", "{0}x".format(size), "pdf:-", "jpeg:-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg") @app.route("/page/image/<int:docnum>", methods=["GET"]) -def do_page_image(docnum): +@app.route("/page/image/<int:docnum>/<int:size>", methods=["GET"]) +def do_page_image(docnum,size=260): with index_book.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], document['pagenumber']) + page = document['pagenumber'] - 1 + return pdf_to_image(document['path'], page, size=size) @app.route("/book/frontpage/<int:docnum>", methods=["GET"]) def do_book_frontpage(docnum): with index_metadata.reader() as reader: document = reader.stored_fields(docnum) - return pdf_to_image(document['path'], 0) + return pdf_to_image(document['path'], 0, 260) @app.route("/page/file/<int:docnum>", methods=["GET"]) def do_page_file(docnum): with index_book.reader() as reader: document = reader.stored_fields(docnum) filepath = document['path'] - page = document['pagenumber'] + page = document['pagenumber'] - 1 + app.logger.debug("Extract page={0} from filepath={1}".format(page, filepath) ) inputfile = pyPdf.PdfFileReader(file(filepath, 'r')) - pdfpage = inputfile.getPage(page-1) + pdfpage = inputfile.getPage(page) outbuf = StringIO() outfile = pyPdf.PdfFileWriter() outfile.addPage(pdfpage) @@ -112,7 +121,7 @@ def excerpt(docnum, term): analysis.StandardAnalyzer(), highlight.SimpleFragmenter(), MyHtmlFormatter()) - return unicode( excerpt ) + return unicode( excerpt ) @app.route("/search/<path:term>",methods=["GET"]) @app.route("/search/", methods=["GET"]) @@ -127,19 +136,27 @@ def do_search(term=None): categories = facets.categorize(results) searcher.close() - matches = {} + books = {} with index_book.reader() as reader: for book in categories.items(): filepath = book[0] with index_metadata.searcher() as searcher: docnum = searcher.document_number(path=filepath) - matches[docnum] = [] + with index_metadata.reader() as reader2: + title = reader2.stored_fields(docnum).get('title') + books[docnum] = { + 'matches' : [], + 'title':title, + 'filename' : os.path.basename(filepath), + } for match in book[1]: pagenumber = reader.stored_fields(match[0])['pagenumber'] match = (match[0], match[1], pagenumber) - matches[docnum].append(match) + books[docnum]['matches'].append(match) - return flask.render_template('search.html', matches=matches, term=term) + return flask.render_template('search.html', + books=books, + term=term) def log_response(sender, response): sender.logger.debug('Request context is about to close down. ' |