From d9a668312c152d4f64318010ec84340015d896bb Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Wed, 17 Feb 2016 00:10:30 +0100 Subject: Import to git --- .gitignore | 2 ++ README.md | 10 ++++++++ setup.py | 12 ++++++++++ wikifolio-rss | 18 +++++++++++++++ wikifolio/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ wikifolio/model.py | 25 ++++++++++++++++++++ wikifolio/rss.py | 30 ++++++++++++++++++++++++ 7 files changed, 161 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 setup.py create mode 100755 wikifolio-rss create mode 100644 wikifolio/__init__.py create mode 100644 wikifolio/model.py create mode 100644 wikifolio/rss.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..16f7637 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/.idea +/*.iml \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..289f5cf --- /dev/null +++ b/README.md @@ -0,0 +1,10 @@ +# Usage + +get the name 'dach-trading-invest' from web-url + +``` +# touch ./dach-trading-invest.rss.xml +# wikifolio-rss ./dach-trading-invest.rss.xml +``` + +The rss feed of comments is written into that file. \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ec22a2c --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +from distutils.core import setup + +setup(name='wikifolio-rss', + version='0.0.1', + description='RSS Feed transformation for wikifolio', + author='Yves Fischer', + author_email='yvesf+wikifolio@xapek.org', + url='https://www.xapek.org/git/yvesf/wikifolio-rss', + packages=['wikifolio'], + scripts=['wikifolio-rss'], + install_requires=['ll-xist']) diff --git a/wikifolio-rss b/wikifolio-rss new file mode 100755 index 0000000..e285f06 --- /dev/null +++ b/wikifolio-rss @@ -0,0 +1,18 @@ +#!/usr/bin/python3 +import os +import sys +import logging + +import wikifolio +import wikifolio.rss + +logging.basicConfig(level=logging.INFO) + +if len(sys.argv) == 2 and os.path.exists(sys.argv[1]): + filename = sys.argv[1] + name = os.path.basename(filename) + name = name[:name.find('.')] + cert = wikifolio.get_id_from_name(name) + comments = wikifolio.get_comments(cert) + with open(filename, "w") as f: + f.write(wikifolio.rss.dump(cert, comments)) diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py new file mode 100644 index 0000000..06559e1 --- /dev/null +++ b/wikifolio/__init__.py @@ -0,0 +1,64 @@ +import logging +import codecs +import time +import urllib.request + +from lxml.html import parse + +from . import model + +logger = logging.getLogger(__name__) + +COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ + "getpagedmessagesforwikifolio/{name}?Id={id}" \ + "&tv=False" \ + "&id={id}" \ + "&page=1" \ + "&pageSize=5" \ + "&_={timestamp}" +USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ + "Windows NT 5.1; FSL 7.0.6.01001)" + + +def make_request(url): + request = urllib.request.Request(url) + request.add_header("User-Agent", USER_AGENT) + return request + + +def get_id_from_name(name): + """ + :param name: sanitized name of the certificate (line in url) + :rtype: Certificate + """ + request = make_request(model.BASE_URL + name) + with urllib.request.urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + try: + return model.Certificate( + document.find('//input[@id="wikifolio"]').value, + document.find('//input[@id="wikifolio-shortdesc"]').value, + document.find('//input[@id="wikifolio-isin"]').value, + document.find('//div[@data-trader]').get('data-trader')) + except: + raise Exception("Failed to find wikifolio infos (id,name,isin) in html page") + + +def get_comments(cert): + """ + :type cert: Certificate instance + """ + logger.info("Fetch comments of {.name}".format(cert)) + request = make_request(COMMENT_URL.format( + id=cert.id, name=cert.name, timestamp=int(time.time()))) + with urllib.request.urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + comments = document.findall('//div[@class="user-comment"]') + for div_comment in comments: + pubDate = div_comment.find('div/time').get('datetime') + yield model.Comment( + time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"), + "{trader} <{trader}@localhost>".format(trader=cert.trader), + div_comment.find('div[@class="message-item-content"]').text, + div_comment.get('id'), + cert.make_url()) diff --git a/wikifolio/model.py b/wikifolio/model.py new file mode 100644 index 0000000..7315782 --- /dev/null +++ b/wikifolio/model.py @@ -0,0 +1,25 @@ +BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/" + + +class Certificate: + def __init__(self, id, shortdesc, isin, trader): + self.id = id + self.name = shortdesc + self.isin = isin + self.trader = trader + + def make_url(self): + return BASE_URL + self.name + + def __repr__(self): + return "<{} id={} shortdesc=\"{}\" isin={}>".format( + self.__class__.__name__, self.id, self.name, self.isin) + + +class Comment: + def __init__(self, date, author, text, guid, link): + self.pubDate = date + self.author = author + self.description = text + self.guid = guid + self.link = link diff --git a/wikifolio/rss.py b/wikifolio/rss.py new file mode 100644 index 0000000..e56c143 --- /dev/null +++ b/wikifolio/rss.py @@ -0,0 +1,30 @@ +import time + +from . import model + +from ll.xist import xsc +from ll.xist.ns import xml, rss20 + + +def dump(cert, comments): + """ + :type cert: model.Certificate + :type comments: list[model.Comment] + """ + title = "Kommentare zu {.name} / {.isin}".format(cert, cert) + items = [] + for comment in comments: + items.append(rss20.item( + rss20.title(title), + rss20.author(comment.author), + rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", + comment.pubDate)), + rss20.guid(comment.guid), + rss20.link(comment.link), + rss20.description(comment.description) + )) + return xsc.Frag(xml.XML(), + rss20.rss(rss20.channel( + rss20.title(title), + *items + ))).string('utf-8') -- cgit v1.2.1