From 5b7f0bdf98e4fffca943e408a60f2fe2e289fef6 Mon Sep 17 00:00:00 2001 From: yvesf Date: Mon, 22 Nov 2010 00:03:26 +0100 Subject: save progress --- web.py | 57 ++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 23 deletions(-) (limited to 'web.py') diff --git a/web.py b/web.py index dc1632e..077aa22 100644 --- a/web.py +++ b/web.py @@ -32,26 +32,28 @@ def do_download_file(docnum): filename=os.path.basename(document['path'])) return r -@app.route("/test") -def test(): - input = pyPdf.PdfFileReader(file("test.pdf", 'r')) - page = input.getPage(0) +@app.route("/download/image/", methods=["GET"]) +@app.route("/download/image//", methods=["GET"]) +def do_download_image(docnum,page=0): + def generator(process, input): + input.seek(0) + while not process.stdin.closed: + stdout, stderr = process.communicate(input.read()) + if stdout: + yield stdout + + with index_metadata.reader() as reader: + document = reader.stored_fields(docnum) + filepath = document['path'] + pdffile = StringIO() + + page = pyPdf.PdfFileReader(file(filepath, 'r')).getPage(page) out = pyPdf.PdfFileWriter() out.addPage(page) - pdffile = StringIO() out.write(pdffile) - process = subprocess.Popen(["/usr/bin/convert", "pdf:-", "jpeg:-"], stdin=subprocess.PIPE, stdout=subprocess.PIPE) - outjpg = StringIO() - pdffile.seek(0) - while not process.stdin.closed: - print "." - stdout, stderr = process.communicate(pdffile.read()) - if not stdout: - break - outjpg.write(stdout) - outjpg.seek(0) - r = werkzeug.Response(outjpg, mimetype="image/jpeg") - return r + process = subprocess.Popen(["/usr/bin/convert", "pdf:-", "jpeg:-"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + return werkzeug.Response(generator(process,pdffile), mimetype="image/jpeg") @app.route("/download/page//", methods=["GET"]) def do_download_page(docnum,page): @@ -104,14 +106,23 @@ def do_search(skip=0,term=None): if skip == 0 and not term: return flask.render_template('search.html', objects=[], term="", skip=0) + searcher = index_book.searcher() query = QueryParser("content").parse(term) facets = searching.Facets.from_field(searcher, "path") - results = searcher.search(query, limit=None, sortedby="path") - categories = {} - for key, value in facets.categorize(results).items(): - categories[key] = map(lambda v: v[0], value) - - return flask.jsonify(categories) + results = searcher.search(query, limit=None) + categories = facets.categorize(results).items() + searcher.close() + + matches = {} + for book in categories: + filepath = book[0] + with index_metadata.searcher() as searcher: + docnum = searcher.document_number(path=filepath) + matches[docnum] = [] + for match in book[1]: + matches[docnum].append(match) + + return flask.render_template('search.html', matches=matches, term=term) """ terms = [text for fieldname, text in query.all_terms() if fieldname == "content"] -- cgit v1.2.1