summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryvesf <yvesf-git@xapek.org>2010-12-05 21:59:44 +0100
committeryvesf <yvesf-git@xapek.org>2010-12-05 21:59:44 +0100
commitca6a17203eb77e8c4965e85fa9cba566d23b788e (patch)
tree6c3e951db0a66188176f28cd060d67c0d6953904
parentf6e39a928a5fb98399199067c33649fd4a6b64e4 (diff)
downloadbooksearch-ca6a17203eb77e8c4965e85fa9cba566d23b788e.tar.gz
booksearch-ca6a17203eb77e8c4965e85fa9cba566d23b788e.zip
use cache in pdf_to_imageHEADmaster
-rw-r--r--web.py44
1 files changed, 30 insertions, 14 deletions
diff --git a/web.py b/web.py
index 756d03b..9ce75c4 100644
--- a/web.py
+++ b/web.py
@@ -17,11 +17,13 @@ import whoosh.query as query
import flask
import pyPdf
import werkzeug
+from werkzeug.contrib.cache import SimpleCache
import subprocess
from compat import str_format
app = flask.Flask("booksearch")
app.debug = True
+image_cache = SimpleCache()
index_book = open_dir(u"index", u"book")
index_metadata = open_dir(u"index", u"metadata")
@@ -47,20 +49,34 @@ def pdf_to_image(filepath, page, size):
density = 60 + 0.15 * size
app.logger.debug( str_format("Convert PDF to image page={page} size={size} density={density} filepath={filepath}", page=page, size=size, density=density, filepath=filepath ))
- def generator(process, input):
- input.seek(0)
- while not process.stdin.closed:
- stdout, stderr = process.communicate(input.read())
- if stdout:
- yield stdout
- pdffile = StringIO()
- page = pyPdf.PdfFileReader(file(filepath, "r")).getPage(page)
- out = pyPdf.PdfFileWriter()
- out.addPage(page)
- out.write(pdffile)
- process = subprocess.Popen(["/usr/bin/convert", "-density", str(density), "-resize", str(size), "pdf:-", "jpeg:-"],
- stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg")
+ if image_cache.get("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath) is None:
+ try:
+ pdffile = StringIO()
+ page = pyPdf.PdfFileReader(file(filepath, "r")).getPage(page)
+ out = pyPdf.PdfFileWriter()
+ out.addPage(page)
+ out.write(pdffile)
+ pdffile.seek(0)
+ process = subprocess.Popen(["/usr/local/bin/convert", "-density", str(density), "-resize", str(size), "pdf:-", "jpeg:-"],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ process.stdin.write(pdffile.read())
+ image_data = ""
+ while not process.stdout.closed:
+ image_data += process.communicate()[0]
+ image_cache.set("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath, image_data)
+ except Exception,e:
+ process = subprocess.Popen(["/usr/local/bin/convert", "-size", str(size), "-fill", "black", "-pointsize", "23", "caption:@-", "jpeg:-"],
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+ message = str_format("Error:\n{error}", error=str(e))
+ if image_cache.get("EXCEPTION_"+ str(size) + message) is None:
+ image_data =""
+ process.stdin.write(message)
+ while not process.stdout.closed:
+ image_data += process.communicate()[0]
+ image_cache.set("EXCEPTION_" + str(size) + message, image_data, timeout=60*60)
+ return werkzeug.Response(image_cache.get("EXCEPTION_" + str(size) + message), mimetype="image/jpeg")
+
+ return werkzeug.Response(image_cache.get("PDF_IMAGE_"+str(size)+"_"+str(page)+filepath), mimetype="image/jpeg")
@app.route("/page/image/<int:docnum>", methods=["GET"])
@app.route("/page/image/<int:docnum>/<int:size>", methods=["GET"])