From 65a19f4c5fdeea6b4afa12d138f9319a063d618f Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Fri, 9 Dec 2016 21:19:19 +0100 Subject: add --test to test newspaper3k text extraction --- watchnews/fetch.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'watchnews') diff --git a/watchnews/fetch.py b/watchnews/fetch.py index e6c5c39..c476be7 100755 --- a/watchnews/fetch.py +++ b/watchnews/fetch.py @@ -8,6 +8,20 @@ from newspaper.cleaners import DocumentCleaner import logging +def dump_paper(link): + paper = Article(link) + paper.download() + paper.parse() + print("""\ +Article Dump: +Title: {title} +Url: {url} +Authors: {authors} +Text: {text} +----- + +""".format(**paper.__dict__)) + def update(feed): logging.info('Update %s', feed.url) result = feedparser.parse(feed.url) -- cgit v1.2.1