From 413051037749da90f03055bcd305c82b1ee90afd Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Tue, 1 Mar 2016 23:34:05 +0100 Subject: Add script to read trades --- wikifolio/__init__.py | 48 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) (limited to 'wikifolio/__init__.py') diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py index 06559e1..667ca98 100644 --- a/wikifolio/__init__.py +++ b/wikifolio/__init__.py @@ -1,7 +1,7 @@ import logging import codecs import time -import urllib.request +from urllib.request import urlopen, Request from lxml.html import parse @@ -16,12 +16,17 @@ COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ "&page=1" \ "&pageSize=5" \ "&_={timestamp}" +TRADES_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ + "getpagedtradesforwikifolio/{name}?id={id}" \ + "&page=1&pageSize=100" USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ "Windows NT 5.1; FSL 7.0.6.01001)" def make_request(url): - request = urllib.request.Request(url) + """:rtype: Request""" + logging.info("Make request: {}".format(url)) + request = Request(url) request.add_header("User-Agent", USER_AGENT) return request @@ -29,13 +34,14 @@ def make_request(url): def get_id_from_name(name): """ :param name: sanitized name of the certificate (line in url) - :rtype: Certificate + :rtype: model.Certificate """ request = make_request(model.BASE_URL + name) - with urllib.request.urlopen(request) as input_raw: + with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) try: return model.Certificate( + name, document.find('//input[@id="wikifolio"]').value, document.find('//input[@id="wikifolio-shortdesc"]').value, document.find('//input[@id="wikifolio-isin"]').value, @@ -45,13 +51,11 @@ def get_id_from_name(name): def get_comments(cert): - """ - :type cert: Certificate instance - """ + """:type cert: model.Certificate""" logger.info("Fetch comments of {.name}".format(cert)) request = make_request(COMMENT_URL.format( - id=cert.id, name=cert.name, timestamp=int(time.time()))) - with urllib.request.urlopen(request) as input_raw: + id=cert.guid, name=cert.name, timestamp=int(time.time()))) + with urlopen(request) as input_raw: document = parse(codecs.getreader('utf-8')(input_raw)) comments = document.findall('//div[@class="user-comment"]') for div_comment in comments: @@ -62,3 +66,29 @@ def get_comments(cert): div_comment.find('div[@class="message-item-content"]').text, div_comment.get('id'), cert.make_url()) + +def get_trades(cert): + """:type cert: model.Certificate""" + request = make_request(TRADES_URL.format(name=cert.name, id=cert.guid)) + with urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + trade_blocks = document.findall('//table/tr') + + share_name = share_isin = None + for trade_block in trade_blocks: + typ = trade_block.find('td[2]').text.strip() + if typ != "": # not a continuation + share_name = trade_block.find('td[1]/div/a/span').text.strip() + share_isin = trade_block.find('td[1]/div/div').text.strip() + else: # a continuaton, read type from first column + typ = trade_block.find('td[1]/span').text.strip() + timestamp = trade_block.find('td[3]/div[2]').text.strip() + timestamp = timestamp.replace('\xa0', ' ') + timestamp = time.strptime(timestamp, "%d.%m.%Y %H:%M") + yield model.Trade(share_name, + share_isin, + typ, + trade_block.find('td[3]/div[1]').text.strip(), #status + timestamp, + trade_block.find('td[4]').text.strip(), #quote + trade_block.find('td[5]').text.strip()) # kurs \ No newline at end of file -- cgit v1.2.1