From 05fc177b2986556df92aa0f506b1a48ef964fe19 Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Tue, 22 Nov 2016 19:33:30 +0100 Subject: some refactoring before moving it to github --- .gitignore | 3 +- README.md | 14 ++++++--- setup.py | 8 ++--- wikifolio-comments-rss | 12 ++++++++ wikifolio-plot-trades | 16 ---------- wikifolio-rss | 19 ------------ wikifolio-trades-rss | 12 ++++++++ wikifolio/__init__.py | 84 +++++++++++++++++++++++++++----------------------- wikifolio/model.py | 23 +++++++------- wikifolio/rss.py | 52 +++++++++++++++++++++---------- 10 files changed, 133 insertions(+), 110 deletions(-) create mode 100755 wikifolio-comments-rss delete mode 100755 wikifolio-plot-trades delete mode 100755 wikifolio-rss create mode 100755 wikifolio-trades-rss diff --git a/.gitignore b/.gitignore index 5ba4b60..ea48823 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /.idea /*.iml -*swp \ No newline at end of file +*swp +__pycache__ diff --git a/README.md b/README.md index 289f5cf..3443fee 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,16 @@ # Usage -get the name 'dach-trading-invest' from web-url +- use python 3.5+ (typing) +- take a name like 'dach-trading-invest' from the web page url ``` -# touch ./dach-trading-invest.rss.xml -# wikifolio-rss ./dach-trading-invest.rss.xml +# wikifolio-comments-rss dach-trading-invest > dach-trading-invest.comments.rss.xml +# wikifolio-trades-rss dach-trading-invest > dach-trading-invest.trades.rss.xml ``` -The rss feed of comments is written into that file. \ No newline at end of file + +# Note + +The source code provided here is not affiliated with wikifolio.com. +wikifolio is most likely a registered brand and owned by somebody else. +No warranty, use at your own risk. diff --git a/setup.py b/setup.py index b2c4c61..ceb8f37 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,11 @@ from distutils.core import setup setup(name='wikifolio-rss', - version='0.0.1', + version='0.0.2', description='RSS Feed transformation for wikifolio', author='Yves Fischer', author_email='yvesf+wikifolio@xapek.org', - url='https://www.xapek.org/git/yvesf/wikifolio-rss', + url='https://github.com/yvesf/wikifolio-rss', packages=['wikifolio'], - scripts=['wikifolio-rss', 'wikifolio-plot-trades'], - install_requires=['ll-xist']) + scripts=['wikifolio-comments-rss', 'wikifolio-trades-rss'], + install_requires=['ll-xist', 'lxml']) diff --git a/wikifolio-comments-rss b/wikifolio-comments-rss new file mode 100755 index 0000000..9f1cc6f --- /dev/null +++ b/wikifolio-comments-rss @@ -0,0 +1,12 @@ +#!/usr/bin/python3 +import sys +import logging +import wikifolio.rss + +logging.basicConfig(level=logging.INFO) + +if len(sys.argv) == 2: + name = sys.argv[1] + zertifikat_id = wikifolio.get_id_from_name(name) + comments = wikifolio.get_comments(zertifikat_id) + print(wikifolio.rss.dump_comments(zertifikat_id, comments)) diff --git a/wikifolio-plot-trades b/wikifolio-plot-trades deleted file mode 100755 index 3bd53dc..0000000 --- a/wikifolio-plot-trades +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/python3 -import os -import sys -import logging - -import wikifolio - -logging.basicConfig(level=logging.INFO) - -if len(sys.argv) == 2 and os.path.exists(sys.argv[1]): - filename = sys.argv[1] - name, _ = os.path.splitext(filename) - cert = wikifolio.get_id_from_name(name) - trades = wikifolio.get_trades(cert) - for trade in trades: - print(repr(trade)) diff --git a/wikifolio-rss b/wikifolio-rss deleted file mode 100755 index 5034dd7..0000000 --- a/wikifolio-rss +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/python3 -import os -import sys -import logging - -import wikifolio -import wikifolio.rss -import os - -logging.basicConfig(level=logging.INFO) - -if len(sys.argv) == 2 and os.path.exists(sys.argv[1]): - filename = sys.argv[1] - name = os.path.basename(filename) - name, _ = os.path.splitext(name) - cert = wikifolio.get_id_from_name(name) - comments = wikifolio.get_comments(cert) - with open(filename, "w") as f: - f.write(wikifolio.rss.dump(cert, comments)) diff --git a/wikifolio-trades-rss b/wikifolio-trades-rss new file mode 100755 index 0000000..24f98f0 --- /dev/null +++ b/wikifolio-trades-rss @@ -0,0 +1,12 @@ +#!/usr/bin/python3 +import sys +import logging +import wikifolio.rss + +logging.basicConfig(level=logging.INFO) + +if len(sys.argv) == 2: + name = sys.argv[1] + zertifikat_id = wikifolio.get_id_from_name(name) + trades = wikifolio.get_trades(zertifikat_id) + print(wikifolio.rss.dump_trades(zertifikat_id, trades)) diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py index 667ca98..6537b86 100644 --- a/wikifolio/__init__.py +++ b/wikifolio/__init__.py @@ -1,6 +1,8 @@ +import typing import logging import codecs import time +import datetime from urllib.request import urlopen, Request from lxml.html import parse @@ -14,7 +16,7 @@ COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "&tv=False" \ "&id={id}" \ "&page=1" \ - "&pageSize=5" \ + "&pageSize=15" \ "&_={timestamp}" TRADES_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "getpagedtradesforwikifolio/{name}?id={id}" \ @@ -23,49 +25,44 @@ USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ "Windows NT 5.1; FSL 7.0.6.01001)" -def make_request(url): - """:rtype: Request""" +def make_request(url) -> Request: logging.info("Make request: {}".format(url)) request = Request(url) request.add_header("User-Agent", USER_AGENT) return request -def get_id_from_name(name): - """ - :param name: sanitized name of the certificate (line in url) - :rtype: model.Certificate - """ +def get_id_from_name(name: str) -> model.Certificate: request = make_request(model.BASE_URL + name) with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) try: return model.Certificate( - name, - document.find('//input[@id="wikifolio"]').value, - document.find('//input[@id="wikifolio-shortdesc"]').value, - document.find('//input[@id="wikifolio-isin"]').value, - document.find('//div[@data-trader]').get('data-trader')) + name, + document.find('//input[@id="wikifolio"]').value, + document.find('//input[@id="wikifolio-shortdesc"]').value, + document.find('//input[@id="wikifolio-isin"]').value, + document.find('//div[@data-trader]').get('data-trader')) except: raise Exception("Failed to find wikifolio infos (id,name,isin) in html page") -def get_comments(cert): - """:type cert: model.Certificate""" +def get_comments(cert: model.Certificate) -> typing.Iterable[model.Comment]: logger.info("Fetch comments of {.name}".format(cert)) request = make_request(COMMENT_URL.format( - id=cert.guid, name=cert.name, timestamp=int(time.time()))) + id=cert.guid, name=cert.name, timestamp=int(time.time()))) with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) comments = document.findall('//div[@class="user-comment"]') for div_comment in comments: - pubDate = div_comment.find('div/time').get('datetime') + pub_date = div_comment.find('div/time').get('datetime') yield model.Comment( - time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"), - "{trader} <{trader}@localhost>".format(trader=cert.trader), - div_comment.find('div[@class="message-item-content"]').text, - div_comment.get('id'), - cert.make_url()) + time.strptime(pub_date, "%d.%m.%Y %H:%M:%S"), + "{trader} <{trader}@localhost>".format(trader=cert.trader), + div_comment.find('div[@class="message-item-content"]').text, + div_comment.get('id'), + cert.make_url()) + def get_trades(cert): """:type cert: model.Certificate""" @@ -74,21 +71,30 @@ def get_trades(cert): document = parse(codecs.getreader('utf-8')(input_raw)) trade_blocks = document.findall('//table/tr') - share_name = share_isin = None + share_name = share_isin = kurs = None + trades = [] for trade_block in trade_blocks: - typ = trade_block.find('td[2]').text.strip() - if typ != "": # not a continuation - share_name = trade_block.find('td[1]/div/a/span').text.strip() - share_isin = trade_block.find('td[1]/div/div').text.strip() - else: # a continuaton, read type from first column - typ = trade_block.find('td[1]/span').text.strip() - timestamp = trade_block.find('td[3]/div[2]').text.strip() - timestamp = timestamp.replace('\xa0', ' ') - timestamp = time.strptime(timestamp, "%d.%m.%Y %H:%M") - yield model.Trade(share_name, - share_isin, - typ, - trade_block.find('td[3]/div[1]').text.strip(), #status - timestamp, - trade_block.find('td[4]').text.strip(), #quote - trade_block.find('td[5]').text.strip()) # kurs \ No newline at end of file + try: + typ = trade_block.find('td[2]').text.strip() + if typ != "" and trade_block.find('td[1]/div/a/span') is not None: # not a continuation + share_name = trade_block.find('td[1]/div/a/span').text.strip() + share_isin = trade_block.find('td[1]/div/div').text.strip() + else: # a continuaton, read type from first column + typ = trade_block.find('td[1]/span').text.strip() + if trade_block.find('td[4]').text and trade_block.find('td[4]').text.strip() != "": + kurs = trade_block.find('td[4]').text.strip().replace('.', '').replace(',', '.') + volume = trade_block.find('td[5]').text.strip().replace('.', '').replace(',', '.') + timestamp = trade_block.find('td[3]/div[2]').text.strip() + timestamp = timestamp.replace('\xa0', ' ') + timestamp = datetime.datetime.strptime(timestamp, "%d.%m.%Y %H:%M") + status = trade_block.find('td[3]/div[1]').text.strip() + if status != model.Trade.STATUS_AUSGEFUEHRT: + continue # skip pending ones + + trades.append(model.Trade( + share_name, share_isin, typ, status, timestamp, float(kurs), float(volume) + )) + except: + raise Exception("failed to decode trade") # to do: where? + + return trades diff --git a/wikifolio/model.py b/wikifolio/model.py index ce8feba..658f8ef 100644 --- a/wikifolio/model.py +++ b/wikifolio/model.py @@ -1,7 +1,13 @@ BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/" -class Certificate: +class _WithRepr: + def __repr__(self): + return "<{} {}".format(type(self).__name__, + " ".join(map(lambda kv: "{}={}".format(*kv), self.__dict__.items()))) + + +class Certificate(_WithRepr): def __init__(self, name, guid, shortdesc, isin, trader): self.name = name self.guid = guid @@ -12,12 +18,8 @@ class Certificate: def make_url(self): return BASE_URL + self.name - def __repr__(self): - return "<{} id={} shortdesc=\"{}\" isin={}>".format( - self.__class__.__name__, self.guid, self.name, self.isin) - -class Comment: +class Comment(_WithRepr): def __init__(self, date, author, text, guid, link): self.pubDate = date self.author = author @@ -26,9 +28,12 @@ class Comment: self.link = link -class Trade: +class Trade(_WithRepr): TYPE_KAUF = 'Quote Kauf' TYPE_VERKAUF = 'Quote Verkauf' + TYPES_KAUF = ('Quote Kauf', 'Limit Kauf', 'Stop-Limit Kauf', 'Rückabwicklung Verkauf') + TYPES_VERKAUF = ('Quote Verkauf', 'Limit Verkauf', 'Stop-Limit Verkauf', 'Rückabwicklung Kauf') + STATUS_AUSGEFUEHRT = 'Ausgeführt' def __init__(self, share_name, share_isin, typ, status, timestamp, quote, volume): self.share_name = share_name @@ -38,7 +43,3 @@ class Trade: self.timestamp = timestamp self.quote = quote self.volume = volume - - def __repr__(self): - return "<{} {}".format(type(self).__name__, - " ".join(map(lambda kv: "{}={}".format(*kv), self.__dict__.items()))) diff --git a/wikifolio/rss.py b/wikifolio/rss.py index a00191c..81e6122 100644 --- a/wikifolio/rss.py +++ b/wikifolio/rss.py @@ -1,30 +1,50 @@ -import time - from . import model +import time +import typing + from ll.xist import xsc from ll.xist.ns import xml, rss20 -def dump(cert, comments): - """ - :type cert: model.Certificate - :type comments: list[model.Comment] - """ +def dump_comments(cert: model.Certificate, comments: typing.Iterable[model.Comment]): title = "{.name} / {.isin}".format(cert, cert) items = [] for comment in comments: items.append(rss20.item( - rss20.title("Kommentar " + title), - rss20.author(comment.author), - rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", - comment.pubDate)), - rss20.guid(comment.guid), - rss20.link(comment.link), - rss20.description(comment.description) + rss20.title("Kommentar: " + title), + rss20.author(comment.author), + rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", + comment.pubDate)), + rss20.guid(comment.guid), + rss20.link(comment.link), + rss20.description(comment.description) + )) + return xsc.Frag(xml.XML(), + rss20.rss(rss20.channel( + rss20.title(title), + *items + ))).string('utf-8') + + +def dump_trades(cert: model.Certificate, trades: typing.Iterable[model.Trade]): + title = "{.name} / {.isin}".format(cert, cert) + items = [] + for trade in trades: + trade_title = trade.typ + " " + str(trade.volume) + "@" + str(trade.quote) + " " + trade.share_name + description = trade.typ + " " + str(trade.volume) + "@" + str(trade.quote) + " " + trade.share_name + description += " ( " + trade.share_isin + " ) " + + items.append(rss20.item( + rss20.title(trade_title), + rss20.author(cert.name), + rss20.pubDate(trade.timestamp.strftime("%a, %d %b %Y %T %z")), + rss20.guid(trade.timestamp.strftime("%a, %d %b %Y %T %z")), + rss20.link(cert.make_url()), + rss20.description(description) )) return xsc.Frag(xml.XML(), rss20.rss(rss20.channel( - rss20.title(title), - *items + rss20.title(title), + *items ))).string('utf-8') -- cgit v1.2.1