From ca6a17203eb77e8c4965e85fa9cba566d23b788e Mon Sep 17 00:00:00 2001 From: yvesf Date: Sun, 5 Dec 2010 21:59:44 +0100 Subject: use cache in pdf_to_image --- web.py | 44 ++++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'web.py') diff --git a/web.py b/web.py index 756d03b..9ce75c4 100644 --- a/web.py +++ b/web.py @@ -17,11 +17,13 @@ import whoosh.query as query import flask import pyPdf import werkzeug +from werkzeug.contrib.cache import SimpleCache import subprocess from compat import str_format app = flask.Flask("booksearch") app.debug = True +image_cache = SimpleCache() index_book = open_dir(u"index", u"book") index_metadata = open_dir(u"index", u"metadata") @@ -47,20 +49,34 @@ def pdf_to_image(filepath, page, size): density = 60 + 0.15 * size app.logger.debug( str_format("Convert PDF to image page={page} size={size} density={density} filepath={filepath}", page=page, size=size, density=density, filepath=filepath )) - def generator(process, input): - input.seek(0) - while not process.stdin.closed: - stdout, stderr = process.communicate(input.read()) - if stdout: - yield stdout - pdffile = StringIO() - page = pyPdf.PdfFileReader(file(filepath, "r")).getPage(page) - out = pyPdf.PdfFileWriter() - out.addPage(page) - out.write(pdffile) - process = subprocess.Popen(["/usr/bin/convert", "-density", str(density), "-resize", str(size), "pdf:-", "jpeg:-"], - stdin=subprocess.PIPE, stdout=subprocess.PIPE) - return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg") + if image_cache.get("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath) is None: + try: + pdffile = StringIO() + page = pyPdf.PdfFileReader(file(filepath, "r")).getPage(page) + out = pyPdf.PdfFileWriter() + out.addPage(page) + out.write(pdffile) + pdffile.seek(0) + process = subprocess.Popen(["/usr/local/bin/convert", "-density", str(density), "-resize", str(size), "pdf:-", "jpeg:-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + process.stdin.write(pdffile.read()) + image_data = "" + while not process.stdout.closed: + image_data += process.communicate()[0] + image_cache.set("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath, image_data) + except Exception,e: + process = subprocess.Popen(["/usr/local/bin/convert", "-size", str(size), "-fill", "black", "-pointsize", "23", "caption:@-", "jpeg:-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + message = str_format("Error:\n{error}", error=str(e)) + if image_cache.get("EXCEPTION_"+ str(size) + message) is None: + image_data ="" + process.stdin.write(message) + while not process.stdout.closed: + image_data += process.communicate()[0] + image_cache.set("EXCEPTION_" + str(size) + message, image_data, timeout=60*60) + return werkzeug.Response(image_cache.get("EXCEPTION_" + str(size) + message), mimetype="image/jpeg") + + return werkzeug.Response(image_cache.get("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath), mimetype="image/jpeg") @app.route("/page/image/", methods=["GET"]) @app.route("/page/image//", methods=["GET"]) -- cgit v1.2.1