From d9a668312c152d4f64318010ec84340015d896bb Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Wed, 17 Feb 2016 00:10:30 +0100 Subject: Import to git --- wikifolio/__init__.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++ wikifolio/model.py | 25 ++++++++++++++++++++ wikifolio/rss.py | 30 ++++++++++++++++++++++++ 3 files changed, 119 insertions(+) create mode 100644 wikifolio/__init__.py create mode 100644 wikifolio/model.py create mode 100644 wikifolio/rss.py (limited to 'wikifolio') diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py new file mode 100644 index 0000000..06559e1 --- /dev/null +++ b/wikifolio/__init__.py @@ -0,0 +1,64 @@ +import logging +import codecs +import time +import urllib.request + +from lxml.html import parse + +from . import model + +logger = logging.getLogger(__name__) + +COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \ + "getpagedmessagesforwikifolio/{name}?Id={id}" \ + "&tv=False" \ + "&id={id}" \ + "&page=1" \ + "&pageSize=5" \ + "&_={timestamp}" +USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \ + "Windows NT 5.1; FSL 7.0.6.01001)" + + +def make_request(url): + request = urllib.request.Request(url) + request.add_header("User-Agent", USER_AGENT) + return request + + +def get_id_from_name(name): + """ + :param name: sanitized name of the certificate (line in url) + :rtype: Certificate + """ + request = make_request(model.BASE_URL + name) + with urllib.request.urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + try: + return model.Certificate( + document.find('//input[@id="wikifolio"]').value, + document.find('//input[@id="wikifolio-shortdesc"]').value, + document.find('//input[@id="wikifolio-isin"]').value, + document.find('//div[@data-trader]').get('data-trader')) + except: + raise Exception("Failed to find wikifolio infos (id,name,isin) in html page") + + +def get_comments(cert): + """ + :type cert: Certificate instance + """ + logger.info("Fetch comments of {.name}".format(cert)) + request = make_request(COMMENT_URL.format( + id=cert.id, name=cert.name, timestamp=int(time.time()))) + with urllib.request.urlopen(request) as input_raw: + document = parse(codecs.getreader('utf-8')(input_raw)) + comments = document.findall('//div[@class="user-comment"]') + for div_comment in comments: + pubDate = div_comment.find('div/time').get('datetime') + yield model.Comment( + time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"), + "{trader} <{trader}@localhost>".format(trader=cert.trader), + div_comment.find('div[@class="message-item-content"]').text, + div_comment.get('id'), + cert.make_url()) diff --git a/wikifolio/model.py b/wikifolio/model.py new file mode 100644 index 0000000..7315782 --- /dev/null +++ b/wikifolio/model.py @@ -0,0 +1,25 @@ +BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/" + + +class Certificate: + def __init__(self, id, shortdesc, isin, trader): + self.id = id + self.name = shortdesc + self.isin = isin + self.trader = trader + + def make_url(self): + return BASE_URL + self.name + + def __repr__(self): + return "<{} id={} shortdesc=\"{}\" isin={}>".format( + self.__class__.__name__, self.id, self.name, self.isin) + + +class Comment: + def __init__(self, date, author, text, guid, link): + self.pubDate = date + self.author = author + self.description = text + self.guid = guid + self.link = link diff --git a/wikifolio/rss.py b/wikifolio/rss.py new file mode 100644 index 0000000..e56c143 --- /dev/null +++ b/wikifolio/rss.py @@ -0,0 +1,30 @@ +import time + +from . import model + +from ll.xist import xsc +from ll.xist.ns import xml, rss20 + + +def dump(cert, comments): + """ + :type cert: model.Certificate + :type comments: list[model.Comment] + """ + title = "Kommentare zu {.name} / {.isin}".format(cert, cert) + items = [] + for comment in comments: + items.append(rss20.item( + rss20.title(title), + rss20.author(comment.author), + rss20.pubDate(time.strftime("%a, %d %b %Y %T %z", + comment.pubDate)), + rss20.guid(comment.guid), + rss20.link(comment.link), + rss20.description(comment.description) + )) + return xsc.Frag(xml.XML(), + rss20.rss(rss20.channel( + rss20.title(title), + *items + ))).string('utf-8') -- cgit v1.2.1