summaryrefslogtreecommitdiff
path: root/web.py
diff options
context:
space:
mode:
authoryvesf <yvesf-git@xapek.org>2010-11-22 00:52:52 +0100
committeryvesf <yvesf-git@xapek.org>2010-11-22 00:52:52 +0100
commit1f408d58a0853653d9297bd048512c3e4e279512 (patch)
tree204e2e85fc2cb73d2dc8328d61a6739dacc3d95f /web.py
parent5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 (diff)
downloadbooksearch-1f408d58a0853653d9297bd048512c3e4e279512.tar.gz
booksearch-1f408d58a0853653d9297bd048512c3e4e279512.zip
index/seaerch/convert works; ugly template
Diffstat (limited to 'web.py')
-rw-r--r--web.py131
1 files changed, 59 insertions, 72 deletions
diff --git a/web.py b/web.py
index 077aa22..edb4a34 100644
--- a/web.py
+++ b/web.py
@@ -24,29 +24,25 @@ index_metadata = open_dir(u"index", u"metadata")
def do_index():
return flask.redirect(flask.url_for("do_search",term=""))
-@app.route("/download/file/<int:docnum>")
-def do_download_file(docnum):
- document = index.reader().stored_fields(docnum)
- r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
- r.headers.add('Content-Disposition', 'attachment',
- filename=os.path.basename(document['path']))
- return r
-
-@app.route("/download/image/<int:docnum>", methods=["GET"])
-@app.route("/download/image/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_image(docnum,page=0):
+@app.route("/book/file/<int:docnum>")
+def do_book_file(docnum):
+ with index_metadata.reader() as reader:
+ document = reader.stored_fields(docnum)
+ r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
+ r.headers.add('Content-Disposition', 'attachment',
+ filename=os.path.basename(document['path']))
+ return r
+
+
+def pdf_to_image(filepath, page):
+ print page
def generator(process, input):
input.seek(0)
while not process.stdin.closed:
stdout, stderr = process.communicate(input.read())
if stdout:
yield stdout
-
- with index_metadata.reader() as reader:
- document = reader.stored_fields(docnum)
- filepath = document['path']
pdffile = StringIO()
-
page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page)
out = pyPdf.PdfFileWriter()
out.addPage(page)
@@ -55,19 +51,35 @@ def do_download_image(docnum,page=0):
stdin=subprocess.PIPE, stdout=subprocess.PIPE)
return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg")
-@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_page(docnum,page):
- document = index.reader().stored_fields(docnum)
- inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
- page = inputfile.getPage(page)
+@app.route("/page/image/<int:docnum>", methods=["GET"])
+def do_page_image(docnum):
+ with index_book.reader() as reader:
+ document = reader.stored_fields(docnum)
+ return pdf_to_image(document['path'], document['pagenumber'])
+
+
+@app.route("/book/frontpage/<int:docnum>", methods=["GET"])
+def do_book_frontpage(docnum):
+ with index_metadata.reader() as reader:
+ document = reader.stored_fields(docnum)
+ return pdf_to_image(document['path'], 0)
+
+@app.route("/page/file/<int:docnum>", methods=["GET"])
+def do_page_file(docnum):
+ with index_book.reader() as reader:
+ document = reader.stored_fields(docnum)
+ filepath = document['path']
+ page = document['pagenumber']
+ inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
+ pdfpage = inputfile.getPage(page)
outbuf = StringIO()
outfile = pyPdf.PdfFileWriter()
- outfile.addPage(page)
+ outfile.addPage(pdfpage)
outfile.write(outbuf)
outbuf.seek(0)
r = werkzeug.Response(outbuf, mimetype="application/pdf")
- client_filename = os.path.basename(document['path'])[:-3]
- client_filename += u".Page-{0}".format(document['pagenumber'])
+ client_filename = os.path.basename(filepath)[:-3]
+ client_filename += u".Page-{0}".format(page)
r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
return r
@@ -78,8 +90,6 @@ class MyHtmlFormatter(highlight.HtmlFormatter):
@app.route("/json/excerpt/<int:docnum>/<path:term>", methods=["GET"])
def json_excerpt(docnum, term):
- searcher = index.searcher()
-
def generator(q):
for result in searcher.search(q, limit=1, sortedby="pagenumber"):
terms = [ text for fieldname, text in q.all_terms()
@@ -94,10 +104,17 @@ def json_excerpt(docnum, term):
'excerpt':excerpt,
'docnum':result.docnum} )
- document = searcher.reader().stored_fields(docnum)
+ with index_book.reader() as reader:
+ document = reader.stored_fields(docnum)
q = QueryParser("content").parse(term)
- q = query.And([ q, query.Term("path", document['path']) ] )
- return werkzeug.Response( generator(q) )
+ terms = [ text for fieldname, text in q.all_terms()
+ if fieldname == "content" ]
+ excerpt = highlight.highlight(document['content'],
+ terms,
+ analysis.FancyAnalyzer(),
+ highlight.SimpleFragmenter(),
+ MyHtmlFormatter())
+ return unicode( excerpt )
@app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
@app.route("/search/<path:term>",methods=["GET"])
@@ -110,54 +127,24 @@ def do_search(skip=0,term=None):
query = QueryParser("content").parse(term)
facets = searching.Facets.from_field(searcher, "path")
results = searcher.search(query, limit=None)
- categories = facets.categorize(results).items()
+ for result in results:
+ print result.get("pagenumber")
+ categories = facets.categorize(results)
searcher.close()
matches = {}
- for book in categories:
- filepath = book[0]
- with index_metadata.searcher() as searcher:
- docnum = searcher.document_number(path=filepath)
- matches[docnum] = []
- for match in book[1]:
- matches[docnum].append(match)
+ with index_book.reader() as reader:
+ for book in categories.items():
+ filepath = book[0]
+ with index_metadata.searcher() as searcher:
+ docnum = searcher.document_number(path=filepath)
+ matches[docnum] = []
+ for match in book[1]:
+ pagenumber = reader.stored_fields(match[0])['pagenumber']
+ match = (match[0], match[1], pagenumber)
+ matches[docnum].append(match)
return flask.render_template('search.html', matches=matches, term=term)
- """
- terms = [text for fieldname, text in query.all_terms()
- if fieldname == "content"]
-
- matches = []
- for result in results:
- title = result.get("title")
- path = result.get("path")
- excerpt = highlight.highlight(result.get("content"),
- terms,
- analysis.StandardAnalyzer(),
- highlight.SimpleFragmenter(),
- MyHtmlFormatter())
- matches.append( {'path':result.get('path'),
- 'excerpt':excerpt,
- 'docnum':result.docnum,
- 'pagenumber':result.get('pagenumber'),
- 'title':title })
- match_groups = {}
- for match in matches:
- print match
- if not match_groups.has_key(match['path']):
- match_groups[match['path']] = { 'matches':[],
- 'title' : match['title'],
- 'first_docnum' : match['docnum'],
- 'filename':os.path.basename(match['path']) }
- match_groups[ match['path'] ]['matches'].append(
- { 'excerpt':match['excerpt'],
- 'docnum':match['docnum'],
- 'pagenumber':match['pagenumber'] })
-
- objects = match_groups.values()[skip:skip+5]
- return flask.render_template('search.html',
- match_groups=objects, term=term, skip=skip, resultlen=len(results))
- """
if __name__ == "__main__":
app.debug = True