diff options
Diffstat (limited to 'wikifolio')
-rw-r--r-- | wikifolio/__init__.py | 84 | ||||
-rw-r--r-- | wikifolio/model.py | 23 | ||||
-rw-r--r-- | wikifolio/rss.py | 52 |
3 files changed, 93 insertions, 66 deletions
diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py index 667ca98..6537b86 100644 --- a/wikifolio/__init__.py +++ b/wikifolio/__init__.py @@ -1,6 +1,8 @@ +import typing import logging import codecs import time +import datetime from urllib.request import urlopen, Request from lxml.html import parse @@ -14,7 +16,7 @@ COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "&tv=False" \ "&id={id}" \ "&page=1" \ - "&pageSize=5" \ + "&pageSize=15" \ "&_={timestamp}" TRADES_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "getpagedtradesforwikifolio/{name}?id={id}" \ @@ -23,49 +25,44 @@ USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ "Windows NT 5.1; FSL 7.0.6.01001)" -def make_request(url): - """:rtype: Request""" +def make_request(url) -> Request: logging.info("Make request: {}".format(url)) request = Request(url) request.add_header("User-Agent", USER_AGENT) return request -def get_id_from_name(name): - """ - :param name: sanitized name of the certificate (line in url) - :rtype: model.Certificate - """ +def get_id_from_name(name: str) -> model.Certificate: request = make_request(model.BASE_URL + name) with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) try: return model.Certificate( - name, - document.find('//input[@id="wikifolio"]').value, - document.find('//input[@id="wikifolio-shortdesc"]').value, - document.find('//input[@id="wikifolio-isin"]').value, - document.find('//div[@data-trader]').get('data-trader')) + name, + document.find('//input[@id="wikifolio"]').value, + document.find('//input[@id="wikifolio-shortdesc"]').value, + document.find('//input[@id="wikifolio-isin"]').value, + document.find('//div[@data-trader]').get('data-trader')) except: raise Exception("Failed to find wikifolio infos (id,name,isin) in html page") -def get_comments(cert): - """:type cert: model.Certificate""" +def get_comments(cert: model.Certificate) -> typing.Iterable[model.Comment]: logger.info("Fetch comments of {.name}".format(cert)) request = make_request(COMMENT_URL.format( - id=cert.guid, name=cert.name, timestamp=int(time.time()))) + id=cert.guid, name=cert.name, timestamp=int(time.time()))) with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) comments = document.findall('//div[@class="user-comment"]') for div_comment in comments: - pubDate = div_comment.find('div/time').get('datetime') + pub_date = div_comment.find('div/time').get('datetime') yield model.Comment( - time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"), - "{trader} <{trader}@localhost>".format(trader=cert.trader), - div_comment.find('div[@class="message-item-content"]').text, - div_comment.get('id'), - cert.make_url()) + time.strptime(pub_date, "%d.%m.%Y %H:%M:%S"), + "{trader} <{trader}@localhost>".format(trader=cert.trader), + div_comment.find('div[@class="message-item-content"]').text, + div_comment.get('id'), + cert.make_url()) + def get_trades(cert): """:type cert: model.Certificate""" @@ -74,21 +71,30 @@ def get_trades(cert): document = parse(codecs.getreader('utf-8')(input_raw)) trade_blocks = document.findall('//table/tr') - share_name = share_isin = None + share_name = share_isin = kurs = None + trades = [] for trade_block in trade_blocks: - typ = trade_block.find('td[2]').text.strip() - if typ != "": # not a continuation - share_name = trade_block.find('td[1]/div/a/span').text.strip() - share_isin = trade_block.find('td[1]/div/div').text.strip() - else: # a continuaton, read type from first column - typ = trade_block.find('td[1]/span').text.strip() - timestamp = trade_block.find('td[3]/div[2]').text.strip() - timestamp = timestamp.replace('\xa0', ' ') - timestamp = time.strptime(timestamp, "%d.%m.%Y %H:%M") - yield model.Trade(share_name, - share_isin, - typ, - trade_block.find('td[3]/div[1]').text.strip(), #status - timestamp, - trade_block.find('td[4]').text.strip(), #quote - trade_block.find('td[5]').text.strip()) # kurs
\ No newline at end of file + try: + typ = trade_block.find('td[2]').text.strip() + if typ != "" and trade_block.find('td[1]/div/a/span') is not None: # not a continuation + share_name = trade_block.find('td[1]/div/a/span').text.strip() + share_isin = trade_block.find('td[1]/div/div').text.strip() + else: # a continuaton, read type from first column + typ = trade_block.find('td[1]/span').text.strip() + if trade_block.find('td[4]').text and trade_block.find('td[4]').text.strip() != "": + kurs = trade_block.find('td[4]').text.strip().replace('.', '').replace(',', '.') + volume = trade_block.find('td[5]').text.strip().replace('.', '').replace(',', '.') + timestamp = trade_block.find('td[3]/div[2]').text.strip() + timestamp = timestamp.replace('\xa0', ' ') + timestamp = datetime.datetime.strptime(timestamp, "%d.%m.%Y %H:%M") + status = trade_block.find('td[3]/div[1]').text.strip() + if status != model.Trade.STATUS_AUSGEFUEHRT: + continue # skip pending ones + + trades.append(model.Trade( + share_name, share_isin, typ, status, timestamp, float(kurs), float(volume) + )) + except: + raise Exception("failed to decode trade") # to do: where? + + return trades diff --git a/wikifolio/model.py b/wikifolio/model.py index ce8feba..658f8ef 100644 --- a/wikifolio/model.py +++ b/wikifolio/model.py @@ -1,7 +1,13 @@ BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/" -class Certificate: +class _WithRepr: + def __repr__(self): + return "<{} {}".format(type(self).__name__, + " ".join(map(lambda kv: "{}={}".format(*kv), self.__dict__.items()))) + + +class Certificate(_WithRepr): def __init__(self, name, guid, shortdesc, isin, trader): self.name = name self.guid = guid @@ -12,12 +18,8 @@ class Certificate: def make_url(self): return BASE_URL + self.name - def __repr__(self): - return "<{} id={} shortdesc=\"{}\" isin={}>".format( - self.__class__.__name__, self.guid, self.name, self.isin) - -class Comment: +class Comment(_WithRepr): def __init__(self, date, author, text, guid, link): self.pubDate = date self.author = author @@ -26,9 +28,12 @@ class Comment: self.link = link -class Trade: +class Trade(_WithRepr): TYPE_KAUF = 'Quote Kauf' TYPE_VERKAUF = 'Quote Verkauf' + TYPES_KAUF = ('Quote Kauf', 'Limit Kauf', 'Stop-Limit Kauf', 'Rückabwicklung Verkauf') + TYPES_VERKAUF = ('Quote Verkauf', 'Limit Verkauf', 'Stop-Limit Verkauf', 'Rückabwicklung Kauf') + STATUS_AUSGEFUEHRT = 'Ausgeführt' def __init__(self, share_name, share_isin, typ, status, timestamp, quote, volume): self.share_name = share_name @@ -38,7 +43,3 @@ class Trade: self.timestamp = timestamp self.quote = quote self.volume = volume - - def __repr__(self): - return "<{} {}".format(type(self).__name__, - " ".join(map(lambda kv: "{}={}".format(*kv), self.__dict__.items()))) diff --git a/wikifolio/rss.py b/wikifolio/rss.py index a00191c..81e6122 100644 --- a/wikifolio/rss.py +++ b/wikifolio/rss.py @@ -1,30 +1,50 @@ -import time - from . import model +import time +import typing + from ll.xist import xsc from ll.xist.ns import xml, rss20 -def dump(cert, comments): - """ - :type cert: model.Certificate - :type comments: list[model.Comment] - """ +def dump_comments(cert: model.Certificate, comments: typing.Iterable[model.Comment]): title = "{.name} / {.isin}".format(cert, cert) items = [] for comment in comments: items.append(rss20.item( - rss20.title("Kommentar " + title), - rss20.author(comment.author), - rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", - comment.pubDate)), - rss20.guid(comment.guid), - rss20.link(comment.link), - rss20.description(comment.description) + rss20.title("Kommentar: " + title), + rss20.author(comment.author), + rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", + comment.pubDate)), + rss20.guid(comment.guid), + rss20.link(comment.link), + rss20.description(comment.description) + )) + return xsc.Frag(xml.XML(), + rss20.rss(rss20.channel( + rss20.title(title), + *items + ))).string('utf-8') + + +def dump_trades(cert: model.Certificate, trades: typing.Iterable[model.Trade]): + title = "{.name} / {.isin}".format(cert, cert) + items = [] + for trade in trades: + trade_title = trade.typ + " " + str(trade.volume) + "@" + str(trade.quote) + " " + trade.share_name + description = trade.typ + " " + str(trade.volume) + "@" + str(trade.quote) + " " + trade.share_name + description += " ( " + trade.share_isin + " ) " + + items.append(rss20.item( + rss20.title(trade_title), + rss20.author(cert.name), + rss20.pubDate(trade.timestamp.strftime("%a, %d %b %Y %T %z")), + rss20.guid(trade.timestamp.strftime("%a, %d %b %Y %T %z")), + rss20.link(cert.make_url()), + rss20.description(description) )) return xsc.Frag(xml.XML(), rss20.rss(rss20.channel( - rss20.title(title), - *items + rss20.title(title), + *items ))).string('utf-8') |