summaryrefslogtreecommitdiff
path: root/web.py
blob: b262e1d6f063c60e2ba2309ffaae0a2b73ff6763 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python2.6
# coding: utf-8
import os
from StringIO import StringIO
from whoosh.index import open_dir
from whoosh.qparser import QueryParser
import whoosh.fields as fields
import whoosh.analysis as analysis
import whoosh.highlight as highlight
import flask
import pyPdf
import werkzeug

app = flask.Flask("booksearch")

index = open_dir(u"index", mapped=False)
searcher = index.searcher()

@app.route("/")
def do_index():
    return flask.redirect(flask.url_for("do_search",term=""))

@app.route("/download/file/<int:docnum>")
def do_download_file(docnum):
    document = index.reader().stored_fields(docnum)
    r = werkzeug.Response(open(document['path'], "r"), mimetype="application/pdf",)
    r.headers.add('Content-Disposition', 'attachment', 
        filename=os.path.basename(document['path']))
    return r

@app.route("/download/page/<int:docnum>", methods=["GET"])
def do_download_page(docnum):
    document = index.reader().stored_fields(docnum)
    inputfile = pyPdf.PdfFileReader(file(document['path'], 'r'))
    page = inputfile.getPage(document['pagenumber'])
    outbuf = StringIO()
    outfile = pyPdf.PdfFileWriter()
    outfile.addPage(page)
    outfile.write(outbuf)
    outbuf.seek(0)
    r= werkzeug.Response(outbuf, mimetype="application/pdf")
    client_filename = os.path.basename(document['path'])[:-3]
    client_filename += u".Page-{0}".format(document['pagenumber'])
    r.headers.add('Content-Disposition', 'attachment', filename=client_filename)
    return r

@app.route("/search/skip=<int:skip>/<path:term>",methods=["GET"])
@app.route("/search/<path:term>",methods=["GET"])
@app.route("/search/", methods=["GET"])
def do_search(skip=0,term=None):
    if skip == 0 and not term:
        return flask.render_template('search.html', objects=[], term="", skip=0)

    query = QueryParser("content").parse(term)
    results = searcher.search(query, limit=skip+5)

    terms = [text for fieldname, text in query.all_terms()
                    if fieldname == "content"]
    objects = []
    for result in results[skip:skip+5]:
        title = result.get("title")
        path = result.get("path")
        print path
        high = highlight.highlight(result.get("content"), 
            terms, 
            analysis.StandardAnalyzer(),
            highlight.SimpleFragmenter(),
            highlight.HtmlFormatter())
        objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum })

    return flask.render_template('search.html', objects=objects, term=term, skip=skip)

if __name__ == "__main__":
    app.debug = True
    app.run(host="0.0.0.0")