1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
import logging
import codecs
import time
from urllib.request import urlopen, Request
from lxml.html import parse
from . import model
logger = logging.getLogger(__name__)
COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \
"getpagedmessagesforwikifolio/{name}?Id={id}" \
"&tv=False" \
"&id={id}" \
"&page=1" \
"&pageSize=5" \
"&_={timestamp}"
TRADES_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \
"getpagedtradesforwikifolio/{name}?id={id}" \
"&page=1&pageSize=100"
USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \
"Windows NT 5.1; FSL 7.0.6.01001)"
def make_request(url):
""":rtype: Request"""
logging.info("Make request: {}".format(url))
request = Request(url)
request.add_header("User-Agent", USER_AGENT)
return request
def get_id_from_name(name):
"""
:param name: sanitized name of the certificate (line in url)
:rtype: model.Certificate
"""
request = make_request(model.BASE_URL + name)
with urlopen(request) as input_raw:
document = parse(codecs.getreader('utf-8')(input_raw))
try:
return model.Certificate(
name,
document.find('//input[@id="wikifolio"]').value,
document.find('//input[@id="wikifolio-shortdesc"]').value,
document.find('//input[@id="wikifolio-isin"]').value,
document.find('//div[@data-trader]').get('data-trader'))
except:
raise Exception("Failed to find wikifolio infos (id,name,isin) in html page")
def get_comments(cert):
""":type cert: model.Certificate"""
logger.info("Fetch comments of {.name}".format(cert))
request = make_request(COMMENT_URL.format(
id=cert.guid, name=cert.name, timestamp=int(time.time())))
with urlopen(request) as input_raw:
document = parse(codecs.getreader('utf-8')(input_raw))
comments = document.findall('//div[@class="user-comment"]')
for div_comment in comments:
pubDate = div_comment.find('div/time').get('datetime')
yield model.Comment(
time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"),
"{trader} <{trader}@localhost>".format(trader=cert.trader),
div_comment.find('div[@class="message-item-content"]').text,
div_comment.get('id'),
cert.make_url())
def get_trades(cert):
""":type cert: model.Certificate"""
request = make_request(TRADES_URL.format(name=cert.name, id=cert.guid))
with urlopen(request) as input_raw:
document = parse(codecs.getreader('utf-8')(input_raw))
trade_blocks = document.findall('//table/tr')
share_name = share_isin = None
for trade_block in trade_blocks:
typ = trade_block.find('td[2]').text.strip()
if typ != "": # not a continuation
share_name = trade_block.find('td[1]/div/a/span').text.strip()
share_isin = trade_block.find('td[1]/div/div').text.strip()
else: # a continuaton, read type from first column
typ = trade_block.find('td[1]/span').text.strip()
timestamp = trade_block.find('td[3]/div[2]').text.strip()
timestamp = timestamp.replace('\xa0', ' ')
timestamp = time.strptime(timestamp, "%d.%m.%Y %H:%M")
yield model.Trade(share_name,
share_isin,
typ,
trade_block.find('td[3]/div[1]').text.strip(), #status
timestamp,
trade_block.find('td[4]').text.strip(), #quote
trade_block.find('td[5]').text.strip()) # kurs
|