#!/usr/bin/python2.6 # coding: utf-8 from whoosh.index import open_dir from whoosh.qparser import QueryParser import whoosh.fields as fields import whoosh.analysis as analysis from whoosh import highlight import flask from flask import Flask import pyPdf from StringIO import StringIO import werkzeug app = Flask("booksearch") index = open_dir(u"index", mapped=False) searcher = index.searcher() @app.route("/") def do_index(): return flask.redirect(flask.url_for("do_search",term="")) @app.route("/download/file/") def do_download_file(docnum): document = index.reader().stored_fields(docnum) filepath = document['path'] return werkzeug.Response(open(filepath, "r"), mimetype="application/pdf") @app.route("/download/page/", methods=["GET"]) def do_download_page(docnum): document = index.reader().stored_fields(docnum) filepath = document['path'] pagenumber = document['pagenumber'] inputfile = pyPdf.PdfFileReader(file(filepath, 'r')) page = inputfile.getPage(pagenumber) outbuf = StringIO() outfile = pyPdf.PdfFileWriter() outfile.addPage(page) outfile.write(outbuf) outbuf.seek(0) return werkzeug.Response(outbuf, mimetype="application/pdf") @app.route("/search/skip=/",methods=["GET"]) @app.route("/search/",methods=["GET"]) @app.route("/search/", methods=["GET"]) def do_search(skip=0,term=None): if skip == 0 and not term: return flask.render_template('search.html', objects=[], term="", skip=0) query = QueryParser("content").parse(term) results = searcher.search(query, limit=skip+5) terms = [text for fieldname, text in query.all_terms() if fieldname == "content"] objects = [] for result in results[skip:skip+5]: title = result.get("title") path = result.get("path") print path high = highlight.highlight(result.get("content"), terms, analysis.StandardAnalyzer(), highlight.SimpleFragmenter(), highlight.HtmlFormatter()) objects.append({ 'title' : title, 'path' : path, 'excerpt' : high, 'docnum':result.docnum }) return flask.render_template('search.html', objects=objects, term=term, skip=skip) if __name__ == "__main__": app.debug = True app.run(host="0.0.0.0")