index/seaerch/convert works; ugly template

author: yvesf <yvesf-git@xapek.org> 2010-11-22 00:52:52 +0100
committer: yvesf <yvesf-git@xapek.org> 2010-11-22 00:52:52 +0100
commit: 1f408d58a0853653d9297bd048512c3e4e279512 (patch)
tree: 204e2e85fc2cb73d2dc8328d61a6739dacc3d95f /web.py
parent: 5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 (diff)
download: booksearch-1f408d58a0853653d9297bd048512c3e4e279512.tar.gz
booksearch-1f408d58a0853653d9297bd048512c3e4e279512.zip
1 files changed, 59 insertions, 72 deletions
diff --git a/web.py b/web.py
index 077aa22..edb4a34 100644
--- a/web.py
+++ b/web.py
@@ -24,29 +24,25 @@ index_metadata = open_dir(u"index", u"metadata")
 def do_index():
     return flask.redirect(flask.url_for("do_search",term=""))
 
-@app.route("/download/file/<int:docnum>")
-def do_download_file(docnum):
-    document = index.reader().stored_fields(docnum)
-    r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
-    r.headers.add('Content-Disposition', 'attachment', 
-        filename=os.path.basename(document['path']))
-    return r
-
-@app.route("/download/image/<int:docnum>", methods=["GET"])
-@app.route("/download/image/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_image(docnum,page=0):
+@app.route("/book/file/<int:docnum>")
+def do_book_file(docnum):
+    with index_metadata.reader() as reader:
+        document = reader.stored_fields(docnum)
+        r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
+        r.headers.add('Content-Disposition', 'attachment', 
+            filename=os.path.basename(document['path']))
+        return r
+
+   
+def pdf_to_image(filepath, page):
+    print page
     def generator(process, input):
         input.seek(0)
         while not process.stdin.closed:
             stdout, stderr = process.communicate(input.read())
             if stdout:
                 yield stdout
-
-    with index_metadata.reader() as reader:
-        document = reader.stored_fields(docnum)
-        filepath = document['path']
     pdffile = StringIO()
-
     page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page)
     out = pyPdf.PdfFileWriter()
     out.addPage(page)
@@ -55,19 +51,35 @@ def do_download_image(docnum,page=0):
         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
     return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg")
 
-@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_page(docnum,page):
-    document = index.reader().stored_fields(docnum)
-    inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
-    page = inputfile.getPage(page)
+@app.route("/page/image/<int:docnum>", methods=["GET"])
+def do_page_image(docnum):
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
+        return pdf_to_image(document['path'], document['pagenumber'])
+    
+ 
+@app.route("/book/frontpage/<int:docnum>", methods=["GET"])
+def do_book_frontpage(docnum):
+    with index_metadata.reader() as reader:
+        document = reader.stored_fields(docnum)
+        return pdf_to_image(document['path'], 0)
+ 
+@app.route("/page/file/<int:docnum>", methods=["GET"])
+def do_page_file(docnum):
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
+        filepath = document['path']
+        page = document['pagenumber']
+    inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
+    pdfpage = inputfile.getPage(page)
     outbuf = StringIO()
     outfile = pyPdf.PdfFileWriter()
-    outfile.addPage(page)
+    outfile.addPage(pdfpage)
     outfile.write(outbuf)
     outbuf.seek(0)
     r = werkzeug.Response(outbuf, mimetype="application/pdf")
-    client_filename = os.path.basename(document['path'])[:-3]
-    client_filename += u".Page-{0}".format(document['pagenumber'])
+    client_filename = os.path.basename(filepath)[:-3]
+    client_filename += u".Page-{0}".format(page)
     r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
     return r
 
@@ -78,8 +90,6 @@ class MyHtmlFormatter(highlight.HtmlFormatter):
 
 @app.route("/json/excerpt/<int:docnum>/<path:term>", methods=["GET"])
 def json_excerpt(docnum, term):
-    searcher = index.searcher()
-
     def generator(q):
         for result in searcher.search(q, limit=1, sortedby="pagenumber"):
             terms = [ text for fieldname, text in q.all_terms()
@@ -94,10 +104,17 @@ def json_excerpt(docnum, term):
                                 'excerpt':excerpt,
                                 'docnum':result.docnum} )
 
-    document = searcher.reader().stored_fields(docnum)
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
     q = QueryParser("content").parse(term)
-    q = query.And([ q, query.Term("path", document['path']) ] )
-    return werkzeug.Response( generator(q) )
+    terms = [ text for fieldname, text in q.all_terms()
+        if fieldname == "content" ]
+    excerpt = highlight.highlight(document['content'],
+                terms,
+                analysis.FancyAnalyzer(),
+                highlight.SimpleFragmenter(),
+                MyHtmlFormatter())
+    return unicode(  excerpt  )
 
 @app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
 @app.route("/search/<path:term>",methods=["GET"])
@@ -110,54 +127,24 @@ def do_search(skip=0,term=None):
     query = QueryParser("content").parse(term)
     facets = searching.Facets.from_field(searcher, "path")
     results = searcher.search(query, limit=None)
-    categories = facets.categorize(results).items()
+    for result in results:
+        print result.get("pagenumber")
+    categories = facets.categorize(results)
     searcher.close()
 
     matches = {}
-    for book in categories:
-        filepath = book[0]
-        with index_metadata.searcher() as searcher:
-            docnum = searcher.document_number(path=filepath)
-        matches[docnum] = []
-        for match in book[1]:
-            matches[docnum].append(match)
+    with index_book.reader() as reader:
+        for book in categories.items():
+            filepath = book[0]
+            with index_metadata.searcher() as searcher:
+                docnum = searcher.document_number(path=filepath)
+            matches[docnum] = []
+            for match in book[1]:
+                pagenumber = reader.stored_fields(match[0])['pagenumber']
+                match = (match[0], match[1], pagenumber)
+                matches[docnum].append(match)
 
     return flask.render_template('search.html', matches=matches, term=term)
-    """
-    terms = [text for fieldname, text in query.all_terms()
-                    if fieldname == "content"]
-
-    matches = []
-    for result in results:
-        title = result.get("title")
-        path = result.get("path")
-        excerpt = highlight.highlight(result.get("content"), 
-                    terms, 
-                    analysis.StandardAnalyzer(),
-                    highlight.SimpleFragmenter(),
-                    MyHtmlFormatter())
-        matches.append( {'path':result.get('path'),
-                         'excerpt':excerpt, 
-                         'docnum':result.docnum, 
-                         'pagenumber':result.get('pagenumber'), 
-                         'title':title })
-    match_groups = {}
-    for match in matches:
-        print match
-        if not match_groups.has_key(match['path']):
-            match_groups[match['path']] = { 'matches':[],
-                                            'title' : match['title'],
-                                            'first_docnum' : match['docnum'],
-                                            'filename':os.path.basename(match['path']) }
-        match_groups[ match['path'] ]['matches'].append(
-            { 'excerpt':match['excerpt'],
-              'docnum':match['docnum'],
-              'pagenumber':match['pagenumber'] })
-
-    objects = match_groups.values()[skip:skip+5]
-    return flask.render_template('search.html', 
-        match_groups=objects, term=term, skip=skip, resultlen=len(results))
-    """ 
 
 if __name__ == "__main__":
     app.debug = True
author	yvesf <yvesf-git@xapek.org>	2010-11-22 00:52:52 +0100
committer	yvesf <yvesf-git@xapek.org>	2010-11-22 00:52:52 +0100
commit	1f408d58a0853653d9297bd048512c3e4e279512 (patch)
tree	204e2e85fc2cb73d2dc8328d61a6739dacc3d95f /web.py
parent	5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 (diff)
download	booksearch-1f408d58a0853653d9297bd048512c3e4e279512.tar.gz booksearch-1f408d58a0853653d9297bd048512c3e4e279512.zip