summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--templates/search.html22
-rw-r--r--web.py43
2 files changed, 46 insertions, 19 deletions
diff --git a/templates/search.html b/templates/search.html
index b99c053..490cf75 100644
--- a/templates/search.html
+++ b/templates/search.html
@@ -34,10 +34,20 @@
</script>
{% endblock %}
{% block content %}
- {% if matches %}
- Matched {{ matches.__len__() }} Book{% if matches.__len__() > 1 %}s{% endif %}
- {% for docnum, matches in matches.items() %}
- <h2 asd="foo"> <a href="{{ url_for("do_book_file", docnum=docnum) }}">book: {{ docnum }}</a> </h2>
+ {% if books %}
+ Matched {{ books.__len__() }} Book{% if books.__len__() > 1 %}s{% endif %}
+ {% for docnum, book in books.items() %}
+ <h2 asd="foo">
+ {{ book['filename'] }}
+ {% if book['title'] %}
+ (<small>
+ {{ book['title'] }}
+ </small>)
+ {% endif %}
+ <a href="{{ url_for("do_book_file", docnum=docnum) }}" title="Download {{docnum}}">
+ ↸
+ </a>
+ </h2>
<div class="book">
<div class="c_frontpage">
<img class="frontpage" src="{{ url_for("do_book_frontpage", docnum=docnum) }}"/>
@@ -47,7 +57,7 @@
</div>
<div class="c_matches">
<div class="matches">
- {% for match in matches %}
+ {% for match in book['matches'] %}
<h3>
<a href="#">Match at page {{ match[2] }}</a>
</h3>
@@ -57,7 +67,7 @@
<div class="toolbar">
<a href="{{ url_for("do_page_file", docnum=match[0]) }}">Download page as PDF</a>
-
- <a href="{{ url_for("do_page_image", docnum=match[0]) }}">Download page as Image</a>
+ <a href="{{ url_for("do_page_image", docnum=match[0], size=1500) }}">Download page as Image</a>
</div>
<div class="excerpt"></div>
</div>
diff --git a/web.py b/web.py
index 858019d..d1e74b7 100644
--- a/web.py
+++ b/web.py
@@ -33,7 +33,13 @@ def do_book_file(docnum):
return r
-def pdf_to_image(filepath, page):
+def pdf_to_image(filepath, page, size):
+ if not type(size) == int or size < 1 or size > 2000:
+ size = 260
+
+ density = 60 + 0.15 * size
+ app.logger.debug("Convert PDF to image page={0} size={1} density={2} filepath={3}".format(page, size, density, filepath))
+
def generator(process, input):
input.seek(0)
while not process.stdin.closed:
@@ -41,35 +47,38 @@ def pdf_to_image(filepath, page):
if stdout:
yield stdout
pdffile = StringIO()
- page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page-1)
+ page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page)
out = pyPdf.PdfFileWriter()
out.addPage(page)
out.write(pdffile)
- process = subprocess.Popen(["/usr/bin/convert", "-resize", "260x", "pdf:-", "jpeg:-"],
+ process = subprocess.Popen(["/usr/bin/convert", "-density", "{0}".format(density), "-resize", "{0}x".format(size), "pdf:-", "jpeg:-"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg")
@app.route("/page/image/<int:docnum>", methods=["GET"])
-def do_page_image(docnum):
+@app.route("/page/image/<int:docnum>/<int:size>", methods=["GET"])
+def do_page_image(docnum,size=260):
with index_book.reader() as reader:
document = reader.stored_fields(docnum)
- return pdf_to_image(document['path'], document['pagenumber'])
+ page = document['pagenumber'] - 1
+ return pdf_to_image(document['path'], page, size=size)
@app.route("/book/frontpage/<int:docnum>", methods=["GET"])
def do_book_frontpage(docnum):
with index_metadata.reader() as reader:
document = reader.stored_fields(docnum)
- return pdf_to_image(document['path'], 0)
+ return pdf_to_image(document['path'], 0, 260)
@app.route("/page/file/<int:docnum>", methods=["GET"])
def do_page_file(docnum):
with index_book.reader() as reader:
document = reader.stored_fields(docnum)
filepath = document['path']
- page = document['pagenumber']
+ page = document['pagenumber'] - 1
+ app.logger.debug("Extract page={0} from filepath={1}".format(page, filepath) )
inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
- pdfpage = inputfile.getPage(page-1)
+ pdfpage = inputfile.getPage(page)
outbuf = StringIO()
outfile = pyPdf.PdfFileWriter()
outfile.addPage(pdfpage)
@@ -112,7 +121,7 @@ def excerpt(docnum, term):
analysis.StandardAnalyzer(),
highlight.SimpleFragmenter(),
MyHtmlFormatter())
- return unicode( excerpt )
+ return unicode( excerpt )
@app.route("/search/<path:term>",methods=["GET"])
@app.route("/search/", methods=["GET"])
@@ -127,19 +136,27 @@ def do_search(term=None):
categories = facets.categorize(results)
searcher.close()
- matches = {}
+ books = {}
with index_book.reader() as reader:
for book in categories.items():
filepath = book[0]
with index_metadata.searcher() as searcher:
docnum = searcher.document_number(path=filepath)
- matches[docnum] = []
+ with index_metadata.reader() as reader2:
+ title = reader2.stored_fields(docnum).get('title')
+ books[docnum] = {
+ 'matches' : [],
+ 'title':title,
+ 'filename' : os.path.basename(filepath),
+ }
for match in book[1]:
pagenumber = reader.stored_fields(match[0])['pagenumber']
match = (match[0], match[1], pagenumber)
- matches[docnum].append(match)
+ books[docnum]['matches'].append(match)
- return flask.render_template('search.html', matches=matches, term=term)
+ return flask.render_template('search.html',
+ books=books,
+ term=term)
def log_response(sender, response):
sender.logger.debug('Request context is about to close down. '