summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryvesf <yvesf-git@xapek.org>2010-11-21 13:00:01 +0100
committeryvesf <yvesf-git@xapek.org>2010-11-21 13:00:01 +0100
commit1c34f8b6824b38c5a2698bbbd27a7aa7df51da9c (patch)
tree51a8a3f3278862fbe8cb3932d74a2941aae1bf84
parent7385d9f5ba2cba49db24ee9c874f08fff641cb20 (diff)
downloadbooksearch-1c34f8b6824b38c5a2698bbbd27a7aa7df51da9c.tar.gz
booksearch-1c34f8b6824b38c5a2698bbbd27a7aa7df51da9c.zip
save
-rw-r--r--indexer.py4
-rw-r--r--templates/search.html36
-rw-r--r--web.py6
3 files changed, 6 insertions, 40 deletions
diff --git a/indexer.py b/indexer.py
index 57e1c0f..1508bd4 100644
--- a/indexer.py
+++ b/indexer.py
@@ -77,6 +77,7 @@ def process_file(filepath):
print u"{0} processing {1}".format(os.getpid(), filepath)
inputfile = pyPdf.PdfFileReader(file(filepath, 'r'))
title = inputfile.getDocumentInfo().title
+
writer_metadata = index_metadata.writer()
writer_metadata.add_document(title=title, path=filepath, createtime=time.time())
writer_metadata.commit()
@@ -110,7 +111,8 @@ except KeyboardInterrupt:
pool.terminate()
except ImportError:
for filepath in filepaths:
- process_file(filepath)
+ #if process_file(filepath) == "KeyboardInterrupt":
+ break
print u"optimize indexes"
index_metadata.optimize()
index_metadata.close()
diff --git a/templates/search.html b/templates/search.html
index e2773ae..8e0a206 100644
--- a/templates/search.html
+++ b/templates/search.html
@@ -8,39 +8,5 @@
{% endblock %}
{% block searchValue %}{{ term }}{% endblock %}
{% block content %}
- {% if match_groups.__len__() == 0 %}
- No Matches
- {% else %}
- {{ resultlen }} Matches in {{ match_groups.__len__() }} files
- {% for match_group in match_groups %}
- <hr />
- <h2>
- {{ match_group['title'] }} - {{ match_group['filename'] }}
- (<a href="{{ url_for("do_download_file", docnum=match_group['first_docnum']) }}">
- Download
- </a>)
- </h2>
- {% for match in match_group['matches'] %}
- <h3>
- Page {{ match['pagenumber'] }}
- (<a href="{{ url_for("do_download_page", docnum=match_group['first_docnum'], page=match['pagenumber']) }}">Download</a>)
- </h3>
- {% autoescape false %}
- <div>{{ match['excerpt'] }}</div>
- {% endautoescape %}
- {% endfor %}
- {% endfor %}
- {% endif %}
-
- <div id="navigation">
- {% if term != "" %}
- {% if skip > 0 %}
- <a href="{{ url_for("do_search", term=term, skip=skip-5) }}">Previous 5</a>
- {% else %}
- Previous 5
- {% endif %}
- -
- <a href="{{ url_for("do_search", term=term, skip=skip+5) }}">Next 5</a>
- {% endif %}
- </div>
+content
{% endblock %}
diff --git a/web.py b/web.py
index 38a7dbb..577e56f 100644
--- a/web.py
+++ b/web.py
@@ -16,8 +16,8 @@ import werkzeug
app = flask.Flask("booksearch")
-index = open_dir(u"index", mapped=False)
-searcher = index.searcher()
+index_book = open_dir(u"index", u"book")
+index_metadata = open_dir(u"index", u"metadata")
@app.route("/")
def do_index():
@@ -89,8 +89,6 @@ def do_search(skip=0,term=None):
for key, value in facets.categorize(results).items():
categories[key] = map(lambda v: v[0], value)
- print searcher.key_terms(categories[categories.keys()[0]], "content")
-
return flask.jsonify(categories)
"""
terms = [text for fieldname, text in query.all_terms()