diff options
-rwxr-xr-x | watchnews-cli | 5 | ||||
-rwxr-xr-x | watchnews/fetch.py | 14 |
2 files changed, 18 insertions, 1 deletions
diff --git a/watchnews-cli b/watchnews-cli index f3c080a..b43d1c2 100755 --- a/watchnews-cli +++ b/watchnews-cli @@ -17,6 +17,8 @@ if __name__ == '__main__': help='Update all known feeds') parser.add_argument('--update-feed', type=int, metavar='ID', help='Update feed with ID') + parser.add_argument('--test', type=str, metavar='URL', + help='Fetch URL with newspaper library and dump result for testing') parser.add_argument('--remove', type=int, metavar='ID', help='Remove feed with ID') parser.add_argument('--list', action='store_true', @@ -60,5 +62,6 @@ if __name__ == '__main__': web.get_app().run(debug=args.web_debug) if args.rss: print(rss.rss()) - + if args.test: + fetch.dump_paper(args.test) diff --git a/watchnews/fetch.py b/watchnews/fetch.py index e6c5c39..c476be7 100755 --- a/watchnews/fetch.py +++ b/watchnews/fetch.py @@ -8,6 +8,20 @@ from newspaper.cleaners import DocumentCleaner import logging +def dump_paper(link): + paper = Article(link) + paper.download() + paper.parse() + print("""\ +Article Dump: +Title: {title} +Url: {url} +Authors: {authors} +Text: {text} +----- + +""".format(**paper.__dict__)) + def update(feed): logging.info('Update %s', feed.url) result = feedparser.parse(feed.url) |