index/seaerch/convert works; ugly template

author: yvesf <yvesf-git@xapek.org> 2010-11-22 00:52:52 +0100
committer: yvesf <yvesf-git@xapek.org> 2010-11-22 00:52:52 +0100
commit: 1f408d58a0853653d9297bd048512c3e4e279512 (patch)
tree: 204e2e85fc2cb73d2dc8328d61a6739dacc3d95f
parent: 5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 (diff)
download: booksearch-1f408d58a0853653d9297bd048512c3e4e279512.tar.gz
booksearch-1f408d58a0853653d9297bd048512c3e4e279512.zip
3 files changed, 69 insertions, 80 deletions
diff --git a/static/style.css b/static/style.css
index 11bcece..15365f6 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1,5 +1,6 @@
 div#header {
     font-size: 150%;
+b
 }
 
 div#search {
@@ -15,10 +16,6 @@ div#content div#navigation {
 }
 
 div#footer {
-    position: fixed;
-    bottom: 0px;
-    padding: 4px;
-    background-color: white;
     left: 0px;
     right: 0px;
 }
diff --git a/templates/search.html b/templates/search.html
index 4afe479..81a1007 100644
--- a/templates/search.html
+++ b/templates/search.html
@@ -13,15 +13,20 @@
         Matched {{ matches.__len__() }} Book{% if matches.__len__() > 1 %}s{% endif %}
         {% for docnum, matches in matches.items() %}
             <div class="book">
-                book: {{ docnum }}
-                <img src="{{ url_for("do_download_image", docnum=docnum, page=0) }}"/>
+                <a href="{{ url_for("do_book_file", docnum=docnum) }}">book: {{ docnum }}</a>
+                <img src="{{ url_for("do_book_frontpage", docnum=docnum) }}"/>
+                <br />
                 {% for match in matches %}
                     <div class="match">
-                        {{ match }}   
+                        Match at page {{ match[2] }} (
+                        <a href="{{ url_for("do_page_image", docnum=match[0]) }}">image</a>,
+                        <a href="{{ url_for("do_page_file", docnum=match[0]) }}">pdf</a>)
+                        score={{ match[0] }}
+                        <a href="{{ url_for("json_excerpt", docnum=match[0], term=term) }}">excerpt</a>
                     </div>
                 {% endfor %}
             </div>
-            match in {{ docnum }}
+            <hr />
         {% endfor %}
     {% endif %}
 {% endblock %}
diff --git a/web.py b/web.py
index 077aa22..edb4a34 100644
--- a/web.py
+++ b/web.py
@@ -24,29 +24,25 @@ index_metadata = open_dir(u"index", u"metadata")
 def do_index():
     return flask.redirect(flask.url_for("do_search",term=""))
 
-@app.route("/download/file/<int:docnum>")
-def do_download_file(docnum):
-    document = index.reader().stored_fields(docnum)
-    r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
-    r.headers.add('Content-Disposition', 'attachment', 
-        filename=os.path.basename(document['path']))
-    return r
-
-@app.route("/download/image/<int:docnum>", methods=["GET"])
-@app.route("/download/image/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_image(docnum,page=0):
+@app.route("/book/file/<int:docnum>")
+def do_book_file(docnum):
+    with index_metadata.reader() as reader:
+        document = reader.stored_fields(docnum)
+        r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
+        r.headers.add('Content-Disposition', 'attachment', 
+            filename=os.path.basename(document['path']))
+        return r
+
+   
+def pdf_to_image(filepath, page):
+    print page
     def generator(process, input):
         input.seek(0)
         while not process.stdin.closed:
             stdout, stderr = process.communicate(input.read())
             if stdout:
                 yield stdout
-
-    with index_metadata.reader() as reader:
-        document = reader.stored_fields(docnum)
-        filepath = document['path']
     pdffile = StringIO()
-
     page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page)
     out = pyPdf.PdfFileWriter()
     out.addPage(page)
@@ -55,19 +51,35 @@ def do_download_image(docnum,page=0):
         stdin=subprocess.PIPE, stdout=subprocess.PIPE)
     return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg")
 
-@app.route("/download/page/<int:docnum>/<int:page>", methods=["GET"])
-def do_download_page(docnum,page):
-    document = index.reader().stored_fields(docnum)
-    inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
-    page = inputfile.getPage(page)
+@app.route("/page/image/<int:docnum>", methods=["GET"])
+def do_page_image(docnum):
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
+        return pdf_to_image(document['path'], document['pagenumber'])
+    
+ 
+@app.route("/book/frontpage/<int:docnum>", methods=["GET"])
+def do_book_frontpage(docnum):
+    with index_metadata.reader() as reader:
+        document = reader.stored_fields(docnum)
+        return pdf_to_image(document['path'], 0)
+ 
+@app.route("/page/file/<int:docnum>", methods=["GET"])
+def do_page_file(docnum):
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
+        filepath = document['path']
+        page = document['pagenumber']
+    inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
+    pdfpage = inputfile.getPage(page)
     outbuf = StringIO()
     outfile = pyPdf.PdfFileWriter()
-    outfile.addPage(page)
+    outfile.addPage(pdfpage)
     outfile.write(outbuf)
     outbuf.seek(0)
     r = werkzeug.Response(outbuf, mimetype="application/pdf")
-    client_filename = os.path.basename(document['path'])[:-3]
-    client_filename += u".Page-{0}".format(document['pagenumber'])
+    client_filename = os.path.basename(filepath)[:-3]
+    client_filename += u".Page-{0}".format(page)
     r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
     return r
 
@@ -78,8 +90,6 @@ class MyHtmlFormatter(highlight.HtmlFormatter):
 
 @app.route("/json/excerpt/<int:docnum>/<path:term>", methods=["GET"])
 def json_excerpt(docnum, term):
-    searcher = index.searcher()
-
     def generator(q):
         for result in searcher.search(q, limit=1, sortedby="pagenumber"):
             terms = [ text for fieldname, text in q.all_terms()
@@ -94,10 +104,17 @@ def json_excerpt(docnum, term):
                                 'excerpt':excerpt,
                                 'docnum':result.docnum} )
 
-    document = searcher.reader().stored_fields(docnum)
+    with index_book.reader() as reader:
+        document = reader.stored_fields(docnum)
     q = QueryParser("content").parse(term)
-    q = query.And([ q, query.Term("path", document['path']) ] )
-    return werkzeug.Response( generator(q) )
+    terms = [ text for fieldname, text in q.all_terms()
+        if fieldname == "content" ]
+    excerpt = highlight.highlight(document['content'],
+                terms,
+                analysis.FancyAnalyzer(),
+                highlight.SimpleFragmenter(),
+                MyHtmlFormatter())
+    return unicode(  excerpt  )
 
 @app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
 @app.route("/search/<path:term>",methods=["GET"])
@@ -110,54 +127,24 @@ def do_search(skip=0,term=None):
     query = QueryParser("content").parse(term)
     facets = searching.Facets.from_field(searcher, "path")
     results = searcher.search(query, limit=None)
-    categories = facets.categorize(results).items()
+    for result in results:
+        print result.get("pagenumber")
+    categories = facets.categorize(results)
     searcher.close()
 
     matches = {}
-    for book in categories:
-        filepath = book[0]
-        with index_metadata.searcher() as searcher:
-            docnum = searcher.document_number(path=filepath)
-        matches[docnum] = []
-        for match in book[1]:
-            matches[docnum].append(match)
+    with index_book.reader() as reader:
+        for book in categories.items():
+            filepath = book[0]
+            with index_metadata.searcher() as searcher:
+                docnum = searcher.document_number(path=filepath)
+            matches[docnum] = []
+            for match in book[1]:
+                pagenumber = reader.stored_fields(match[0])['pagenumber']
+                match = (match[0], match[1], pagenumber)
+                matches[docnum].append(match)
 
     return flask.render_template('search.html', matches=matches, term=term)
-    """
-    terms = [text for fieldname, text in query.all_terms()
-                    if fieldname == "content"]
-
-    matches = []
-    for result in results:
-        title = result.get("title")
-        path = result.get("path")
-        excerpt = highlight.highlight(result.get("content"), 
-                    terms, 
-                    analysis.StandardAnalyzer(),
-                    highlight.SimpleFragmenter(),
-                    MyHtmlFormatter())
-        matches.append( {'path':result.get('path'),
-                         'excerpt':excerpt, 
-                         'docnum':result.docnum, 
-                         'pagenumber':result.get('pagenumber'), 
-                         'title':title })
-    match_groups = {}
-    for match in matches:
-        print match
-        if not match_groups.has_key(match['path']):
-            match_groups[match['path']] = { 'matches':[],
-                                            'title' : match['title'],
-                                            'first_docnum' : match['docnum'],
-                                            'filename':os.path.basename(match['path']) }
-        match_groups[ match['path'] ]['matches'].append(
-            { 'excerpt':match['excerpt'],
-              'docnum':match['docnum'],
-              'pagenumber':match['pagenumber'] })
-
-    objects = match_groups.values()[skip:skip+5]
-    return flask.render_template('search.html', 
-        match_groups=objects, term=term, skip=skip, resultlen=len(results))
-    """ 
 
 if __name__ == "__main__":
     app.debug = True
author	yvesf <yvesf-git@xapek.org>	2010-11-22 00:52:52 +0100
committer	yvesf <yvesf-git@xapek.org>	2010-11-22 00:52:52 +0100
commit	1f408d58a0853653d9297bd048512c3e4e279512 (patch)
tree	204e2e85fc2cb73d2dc8328d61a6739dacc3d95f
parent	5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 (diff)
download	booksearch-1f408d58a0853653d9297bd048512c3e4e279512.tar.gz booksearch-1f408d58a0853653d9297bd048512c3e4e279512.zip