summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Fischer <yvesf-git@xapek.org>2016-02-17 00:10:30 +0100
committerYves Fischer <yvesf-git@xapek.org>2016-02-17 00:10:30 +0100
commitd9a668312c152d4f64318010ec84340015d896bb (patch)
treebe85c9bc2da8d2f286f6ca680fdaa768934805b3
downloadwikifolio-rss-d9a668312c152d4f64318010ec84340015d896bb.tar.gz
wikifolio-rss-d9a668312c152d4f64318010ec84340015d896bb.zip
Import to git
-rw-r--r--.gitignore2
-rw-r--r--README.md10
-rw-r--r--setup.py12
-rwxr-xr-xwikifolio-rss18
-rw-r--r--wikifolio/__init__.py64
-rw-r--r--wikifolio/model.py25
-rw-r--r--wikifolio/rss.py30
7 files changed, 161 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..16f7637
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/.idea
+/*.iml \ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..289f5cf
--- /dev/null
+++ b/README.md
@@ -0,0 +1,10 @@
+# Usage
+
+get the name 'dach-trading-invest' from web-url
+
+```
+# touch ./dach-trading-invest.rss.xml
+# wikifolio-rss ./dach-trading-invest.rss.xml
+```
+
+The rss feed of comments is written into that file. \ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ec22a2c
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+from distutils.core import setup
+
+setup(name='wikifolio-rss',
+ version='0.0.1',
+ description='RSS Feed transformation for wikifolio',
+ author='Yves Fischer',
+ author_email='yvesf+wikifolio@xapek.org',
+ url='https://www.xapek.org/git/yvesf/wikifolio-rss',
+ packages=['wikifolio'],
+ scripts=['wikifolio-rss'],
+ install_requires=['ll-xist'])
diff --git a/wikifolio-rss b/wikifolio-rss
new file mode 100755
index 0000000..e285f06
--- /dev/null
+++ b/wikifolio-rss
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+import os
+import sys
+import logging
+
+import wikifolio
+import wikifolio.rss
+
+logging.basicConfig(level=logging.INFO)
+
+if len(sys.argv) == 2 and os.path.exists(sys.argv[1]):
+ filename = sys.argv[1]
+ name = os.path.basename(filename)
+ name = name[:name.find('.')]
+ cert = wikifolio.get_id_from_name(name)
+ comments = wikifolio.get_comments(cert)
+ with open(filename, "w") as f:
+ f.write(wikifolio.rss.dump(cert, comments))
diff --git a/wikifolio/__init__.py b/wikifolio/__init__.py
new file mode 100644
index 0000000..06559e1
--- /dev/null
+++ b/wikifolio/__init__.py
@@ -0,0 +1,64 @@
+import logging
+import codecs
+import time
+import urllib.request
+
+from lxml.html import parse
+
+from . import model
+
+logger = logging.getLogger(__name__)
+
+COMMENT_URL = "https://www.wikifolio.com/dynamic/de/de/invest/" \
+ "getpagedmessagesforwikifolio/{name}?Id={id}" \
+ "&tv=False" \
+ "&id={id}" \
+ "&page=1" \
+ "&pageSize=5" \
+ "&_={timestamp}"
+USER_AGENT = "Mozilla/4.0 (compatible; MSIE 6.0; " \
+ "Windows NT 5.1; FSL 7.0.6.01001)"
+
+
+def make_request(url):
+ request = urllib.request.Request(url)
+ request.add_header("User-Agent", USER_AGENT)
+ return request
+
+
+def get_id_from_name(name):
+ """
+ :param name: sanitized name of the certificate (line in url)
+ :rtype: Certificate
+ """
+ request = make_request(model.BASE_URL + name)
+ with urllib.request.urlopen(request) as input_raw:
+ document = parse(codecs.getreader('utf-8')(input_raw))
+ try:
+ return model.Certificate(
+ document.find('//input[@id="wikifolio"]').value,
+ document.find('//input[@id="wikifolio-shortdesc"]').value,
+ document.find('//input[@id="wikifolio-isin"]').value,
+ document.find('//div[@data-trader]').get('data-trader'))
+ except:
+ raise Exception("Failed to find wikifolio infos (id,name,isin) in html page")
+
+
+def get_comments(cert):
+ """
+ :type cert: Certificate instance
+ """
+ logger.info("Fetch comments of {.name}".format(cert))
+ request = make_request(COMMENT_URL.format(
+ id=cert.id, name=cert.name, timestamp=int(time.time())))
+ with urllib.request.urlopen(request) as input_raw:
+ document = parse(codecs.getreader('utf-8')(input_raw))
+ comments = document.findall('//div[@class="user-comment"]')
+ for div_comment in comments:
+ pubDate = div_comment.find('div/time').get('datetime')
+ yield model.Comment(
+ time.strptime(pubDate, "%d.%m.%Y %H:%M:%S"),
+ "{trader} <{trader}@localhost>".format(trader=cert.trader),
+ div_comment.find('div[@class="message-item-content"]').text,
+ div_comment.get('id'),
+ cert.make_url())
diff --git a/wikifolio/model.py b/wikifolio/model.py
new file mode 100644
index 0000000..7315782
--- /dev/null
+++ b/wikifolio/model.py
@@ -0,0 +1,25 @@
+BASE_URL = "https://www.wikifolio.com/de/de/wikifolio/"
+
+
+class Certificate:
+ def __init__(self, id, shortdesc, isin, trader):
+ self.id = id
+ self.name = shortdesc
+ self.isin = isin
+ self.trader = trader
+
+ def make_url(self):
+ return BASE_URL + self.name
+
+ def __repr__(self):
+ return "<{} id={} shortdesc=\"{}\" isin={}>".format(
+ self.__class__.__name__, self.id, self.name, self.isin)
+
+
+class Comment:
+ def __init__(self, date, author, text, guid, link):
+ self.pubDate = date
+ self.author = author
+ self.description = text
+ self.guid = guid
+ self.link = link
diff --git a/wikifolio/rss.py b/wikifolio/rss.py
new file mode 100644
index 0000000..e56c143
--- /dev/null
+++ b/wikifolio/rss.py
@@ -0,0 +1,30 @@
+import time
+
+from . import model
+
+from ll.xist import xsc
+from ll.xist.ns import xml, rss20
+
+
+def dump(cert, comments):
+ """
+ :type cert: model.Certificate
+ :type comments: list[model.Comment]
+ """
+ title = "Kommentare zu {.name} / {.isin}".format(cert, cert)
+ items = []
+ for comment in comments:
+ items.append(rss20.item(
+ rss20.title(title),
+ rss20.author(comment.author),
+ rss20.pubDate(time.strftime("%a, %d %b %Y %T %z",
+ comment.pubDate)),
+ rss20.guid(comment.guid),
+ rss20.link(comment.link),
+ rss20.description(comment.description)
+ ))
+ return xsc.Frag(xml.XML(),
+ rss20.rss(rss20.channel(
+ rss20.title(title),
+ *items
+ ))).string('utf-8')