summaryrefslogtreecommitdiff
path: root/watchnews/fetch.py
diff options
context:
space:
mode:
authorYves Fischer <yvesf-git@xapek.org>2016-12-09 21:19:19 +0100
committerYves Fischer <yvesf-git@xapek.org>2016-12-09 21:31:44 +0100
commit65a19f4c5fdeea6b4afa12d138f9319a063d618f (patch)
treeb0a421f695349641aaababc2544447e0ea510108 /watchnews/fetch.py
parentd1fd42b946734843415cd0bab3083679123153d0 (diff)
downloadwatchnews-65a19f4c5fdeea6b4afa12d138f9319a063d618f.tar.gz
watchnews-65a19f4c5fdeea6b4afa12d138f9319a063d618f.zip
add --test to test newspaper3k text extractionHEADmaster
Diffstat (limited to 'watchnews/fetch.py')
-rwxr-xr-xwatchnews/fetch.py14
1 files changed, 14 insertions, 0 deletions
diff --git a/watchnews/fetch.py b/watchnews/fetch.py
index e6c5c39..c476be7 100755
--- a/watchnews/fetch.py
+++ b/watchnews/fetch.py
@@ -8,6 +8,20 @@ from newspaper.cleaners import DocumentCleaner
import logging
+def dump_paper(link):
+ paper = Article(link)
+ paper.download()
+ paper.parse()
+ print("""\
+Article Dump:
+Title: {title}
+Url: {url}
+Authors: {authors}
+Text: {text}
+-----
+
+""".format(**paper.__dict__))
+
def update(feed):
logging.info('Update %s', feed.url)
result = feedparser.parse(feed.url)