summaryrefslogtreecommitdiff
path: root/web.py
blob: babc9ddd6bf4fa7ae50795a36ce800965ac12e57 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python2.6
# coding: utf-8
from StringIO import StringIO
from whoosh.index import open_dir
from whoosh.qparser import QueryParser
import whoosh.fields as fields
import whoosh.analysis as analysis
import whoosh.highlight as highlight
import flask
import pyPdf
import werkzeug

app = flask.Flask("booksearch")

index = open_dir(u"index", mapped=False)
searcher = index.searcher()

@app.route("/")
def do_index():
    return flask.redirect(flask.url_for("do_search",term=""))

@app.route("/download/file/<int:docnum>")
def do_download_file(docnum):
   document = index.reader().stored_fields(docnum)
   filepath = document['path']
   return werkzeug.Response(open(filepath, "r"), mimetype="application/pdf")

@app.route("/download/page/<int:docnum>", methods=["GET"])
def do_download_page(docnum):
   document = index.reader().stored_fields(docnum)
   filepath = document['path']
   pagenumber = document['pagenumber']
   inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
   page = inputfile.getPage(pagenumber)
   outbuf = StringIO()
   outfile = pyPdf.PdfFileWriter()
   outfile.addPage(page)
   outfile.write(outbuf)
   outbuf.seek(0)
   return werkzeug.Response(outbuf, mimetype="application/pdf")

@app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
@app.route("/search/<path:term>",methods=["GET"])
@app.route("/search/", methods=["GET"])
def do_search(skip=0,term=None):
    if skip == 0 and not term:
        return flask.render_template('search.html', objects=[], term="", skip=0)

    query = QueryParser("content").parse(term)
    results = searcher.search(query, limit=skip+5)

    terms = [text for fieldname, text in query.all_terms()
                    if fieldname == "content"]
    objects = []
    for result in results[skip:skip+5]:
        title = result.get("title")
        path = result.get("path")
        print path
        high = highlight.highlight(result.get("content"), 
            terms, 
            analysis.StandardAnalyzer(),
            highlight.SimpleFragmenter(),
            highlight.HtmlFormatter())
        objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum })

    return flask.render_template('search.html', objects=objects, term=term, skip=skip)

if __name__ == "__main__":
    app.debug = True
    app.run(host="0.0.0.0")