From 413051037749da90f03055bcd305c82b1ee90afd Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Tue, 1 Mar 2016 23:34:05 +0100 Subject: Add script to read trades --- wikifolio/__init__.py | 48 +++++++++++++++++++++++++++++++++++++++--------- wikifolio/model.py | 27 +++++++++++++++++++++++---- 2 files changed, 62 insertions(+), 13 deletions(-) (limited to 'wikifolio') diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py index 06559e1..667ca98 100644 --- a/wikifolio/__init__.py +++ b/wikifolio/__init__.py @@ -1,7 +1,7 @@ import logging import codecs import time -import urllib.request +from urllib.request import urlopen, Request from lxml.html import parse @@ -16,12 +16,17 @@ COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "&page=1" \ "&pageSize=5" \ "&_={timestamp}" +TRADES_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ + "getpagedtradesforwikifolio/{name}?id={id}" \ + "&page=1&pageSize=100" USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ "Windows NT 5.1; FSL 7.0.6.01001)" def make_request(url): - request = urllib.request.Request(url) + """:rtype: Request""" + logging.info("Make request: {}".format(url)) + request = Request(url) request.add_header("User-Agent", USER_AGENT) return request @@ -29,13 +34,14 @@ def make_request(url): def get_id_from_name(name): """ :param name: sanitized name of the certificate (line in url) - :rtype: Certificate + :rtype: model.Certificate """ request = make_request(model.BASE_URL + name) - with urllib.request.urlopen(request) as input_raw: + with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) try: return model.Certificate( + name, document.find('//input[@id="wikifolio"]').value, document.find('//input[@id="wikifolio-shortdesc"]').value, document.find('//input[@id="wikifolio-isin"]').value, @@ -45,13 +51,11 @@ def get_id_from_name(name): def get_comments(cert): - """ - :type cert: Certificate instance - """ + """:type cert: model.Certificate""" logger.info("Fetch comments of {.name}".format(cert)) request = make_request(COMMENT_URL.format( - id=cert.id, name=cert.name, timestamp=int(time.time()))) - with urllib.request.urlopen(request) as input_raw: + id=cert.guid, name=cert.name, timestamp=int(time.time()))) + with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) comments = document.findall('//div[@class="user-comment"]') for div_comment in comments: @@ -62,3 +66,29 @@ def get_comments(cert): div_comment.find('div[@class="message-item-content"]').text, div_comment.get('id'), cert.make_url()) + +def get_trades(cert): + """:type cert: model.Certificate""" + request = make_request(TRADES_URL.format(name=cert.name, id=cert.guid)) + with urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + trade_blocks = document.findall('//table/tr') + + share_name = share_isin = None + for trade_block in trade_blocks: + typ = trade_block.find('td[2]').text.strip() + if typ != "": # not a continuation + share_name = trade_block.find('td[1]/div/a/span').text.strip() + share_isin = trade_block.find('td[1]/div/div').text.strip() + else: # a continuaton, read type from first column + typ = trade_block.find('td[1]/span').text.strip() + timestamp = trade_block.find('td[3]/div[2]').text.strip() + timestamp = timestamp.replace('\xa0', ' ') + timestamp = time.strptime(timestamp, "%d.%m.%Y %H:%M") + yield model.Trade(share_name, + share_isin, + typ, + trade_block.find('td[3]/div[1]').text.strip(), #status + timestamp, + trade_block.find('td[4]').text.strip(), #quote + trade_block.find('td[5]').text.strip()) # kurs \ No newline at end of file diff --git a/wikifolio/model.py b/wikifolio/model.py index 7315782..ce8feba 100644 --- a/wikifolio/model.py +++ b/wikifolio/model.py @@ -2,9 +2,10 @@ BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/" class Certificate: - def __init__(self, id, shortdesc, isin, trader): - self.id = id - self.name = shortdesc + def __init__(self, name, guid, shortdesc, isin, trader): + self.name = name + self.guid = guid + self.shortdesc = shortdesc self.isin = isin self.trader = trader @@ -13,7 +14,7 @@ class Certificate: def __repr__(self): return "<{} id={} shortdesc=\"{}\" isin={}>".format( - self.__class__.__name__, self.id, self.name, self.isin) + self.__class__.__name__, self.guid, self.name, self.isin) class Comment: @@ -23,3 +24,21 @@ class Comment: self.description = text self.guid = guid self.link = link + + +class Trade: + TYPE_KAUF = 'Quote Kauf' + TYPE_VERKAUF = 'Quote Verkauf' + + def __init__(self, share_name, share_isin, typ, status, timestamp, quote, volume): + self.share_name = share_name + self.share_isin = share_isin + self.typ = typ + self.status = status + self.timestamp = timestamp + self.quote = quote + self.volume = volume + + def __repr__(self): + return "<{} {}".format(type(self).__name__, + " ".join(map(lambda kv: "{}={}".format(*kv), self.__dict__.items()))) -- cgit v1.2.1