diff options
author | Yves Fischer <yvesf-git@xapek.org> | 2018-07-17 12:34:04 +0200 |
---|---|---|
committer | Yves Fischer <yvesf-git@xapek.org> | 2018-07-17 12:49:49 +0200 |
commit | 69c220cda3d8c0a95327630f5752dad36cb82261 (patch) | |
tree | fad9fbe78cf717f4dd17b8e9d996ab9a54b7d3e2 /jobs | |
download | datasources-69c220cda3d8c0a95327630f5752dad36cb82261.tar.gz datasources-69c220cda3d8c0a95327630f5752dad36cb82261.zip |
Squashed commit
Diffstat (limited to 'jobs')
-rw-r--r-- | jobs/__init__.py | 1 | ||||
-rwxr-xr-x | jobs/clever_tanken.py | 114 | ||||
-rwxr-xr-x | jobs/davis_vantage.py | 21 | ||||
-rwxr-xr-x | jobs/esg.py | 88 | ||||
-rwxr-xr-x | jobs/hplq1300n.py | 26 | ||||
-rwxr-xr-x | jobs/prix_carburant.py | 107 | ||||
-rwxr-xr-x | jobs/swr_wetter.py | 50 | ||||
-rwxr-xr-x | jobs/sys_network.py | 15 | ||||
-rwxr-xr-x | jobs/sys_network_rate.py | 22 | ||||
-rwxr-xr-x | jobs/tankerkoenig.py | 36 | ||||
-rwxr-xr-x | jobs/telexoo.py | 55 | ||||
-rwxr-xr-x | jobs/transferwise.py | 55 |
12 files changed, 590 insertions, 0 deletions
diff --git a/jobs/__init__.py b/jobs/__init__.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/jobs/__init__.py @@ -0,0 +1 @@ + diff --git a/jobs/clever_tanken.py b/jobs/clever_tanken.py new file mode 100755 index 0000000..8418b11 --- /dev/null +++ b/jobs/clever_tanken.py @@ -0,0 +1,114 @@ +import codecs +import logging +from enum import Enum +from html.parser import HTMLParser +from urllib import request + +State = Enum('State', 'fuel_name fuel_price station_name idle') + + +class Tankstelle: + def __init__(self): + self.name = "" + self.preise = {} + self.id = None + + def __repr__(self): + return "{}: {} {}".format(type(self).__name__, self.name, self.preise) + + +class Parser(HTMLParser): + def error(self, message): + logging.error("Parser error: %s", message) + + def __init__(self): + super().__init__() + self.tankstelle = Tankstelle() + self._current_fuel_name = None + self._state = State.idle + + def get_prix(self): + for key, value in self.tankstelle.preise.items(): + self.tankstelle.preise[key] = float(value) + return self.tankstelle + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if self._state == State.idle: + if tag == "div" and attrs.get('class') == 'fuel-price-type': + self._state = State.fuel_name + self._current_fuel_name = "" + if tag == "span" and (attrs.get('id') == "main-content-fuel-station-header-name" + or attrs.get('itemprop') == "http://schema.org/addressCountry"): + self._state = State.station_name + elif self._current_fuel_name is not None and tag == "span" and attrs.get('ng-bind') == "display_preis": + self._state = State.fuel_price + + def handle_endtag(self, tag): + if self._state == State.fuel_name and tag in ('span', 'div'): + self._state = State.idle + elif self._state == State.station_name and tag in ('span'): + self._state = State.idle + elif self._state == State.fuel_price and tag == 'span': + self._state = State.idle + preis = self.tankstelle.preise[self._current_fuel_name].strip() + if preis == "": + del self.tankstelle.preise[self._current_fuel_name] + else: + self.tankstelle.preise[self._current_fuel_name] = float(preis) + self._current_fuel_name = None + + def handle_data(self, data: str): + if self._state == State.fuel_name: + self._current_fuel_name += data.strip().replace(':', '') + self.tankstelle.preise[self._current_fuel_name] = "" + elif self._state == State.fuel_price: + self.tankstelle.preise[self._current_fuel_name] += data + elif self._state == State.station_name: + if len(data.strip()) > 0: + if len(self.tankstelle.name) > 0: + self.tankstelle.name += " " + self.tankstelle.name += data.strip() + + +URL = "http://www.clever-tanken.de/tankstelle_details/" + + +def execute(station_id: str): + parser = Parser() + r = request.Request(URL + station_id) + r.add_header('Host', 'www.clever-tanken.de') + r.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0') + try: + with request.urlopen(r) as f: + f2 = codecs.getreader('utf-8')(f) + f2.errors = 'ignore' + for line in f2.readlines(): + parser.feed(line) + + tankstelle = parser.tankstelle + tankstelle.id = station_id + return tankstelle + except Exception as e: + logging.error("Failed for station: %s", station_id) + raise e + + + +if __name__ == "__main__": + from pprint import pprint + + pprint(list(map(execute, [ + '20219', '11985', '17004', + '19715', # Kaiserst. Mineralölvertrieb Schwärzle + '54296', # ESSO Endingen + '10355', # ARAL Tiengen + '20144', # bft Rankackerweg + '27534', # EXTROL Freiburg + '55690', # Rheinmünster + '15220', # Esso Achern + '5853', # JET Rastatt + '24048', # Bodersweier + '27534', + '3819']) # JET Freiburg + )) diff --git a/jobs/davis_vantage.py b/jobs/davis_vantage.py new file mode 100755 index 0000000..7a46515 --- /dev/null +++ b/jobs/davis_vantage.py @@ -0,0 +1,21 @@ +import codecs +import json +from urllib.request import urlopen, Request + + +def load(url: str): + request = Request(url) + request.add_header( + "User-Agent", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; FSL 7.0.6.01001)") + with urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + data = json.load(f2) + data = {i: data[i] for i in data if type(data[i]) in (int, float)} + return data + + +if __name__ == "__main__": + from pprint import pprint + + pprint(list(load('http://wettermichel.de/davis/con_davis.php'))) diff --git a/jobs/esg.py b/jobs/esg.py new file mode 100755 index 0000000..a2bb025 --- /dev/null +++ b/jobs/esg.py @@ -0,0 +1,88 @@ +import codecs +import logging +import urllib.parse +import urllib.request +from enum import Enum +from html.parser import HTMLParser + + +class Product: + def __init__(self): + self.price = "" + self.name = "" + self.sku = None + + def __repr__(self): + return "<{} name={} price={} sku={}>".format( + self.__class__, self.name, self.price, self.sku) + + +State = Enum('State', 'parsing product product_name price idle') + + +class Parser(HTMLParser): + def error(self, message): + logging.error("Parser error: %s", message) + + def __init__(self): + super().__init__() + self.products = [] + self.current = None + self.state = State.idle + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if self.state == State.idle and tag == "tr" and "data-sku" in attrs: + self.current = Product() + self.current.sku = attrs["data-sku"] + self.state = State.parsing + elif self.state == State.parsing and tag == 'h3' and \ + "class" in attrs and attrs['class'] == 'product-name': + self.state = State.product_name + elif self.state == State.parsing and tag == 'span' and \ + "class" in attrs and attrs['class'] == "price": + self.state = State.price + + def handle_endtag(self, tag): + if self.state == State.product_name and tag == 'a': + self.state = State.parsing + elif self.state == State.price and tag == 'span': + self.state = State.parsing + + if self.current and self.current.name and \ + self.current.price and self.current.sku: + self.current.name = self.current.name.strip() + price = self.current.price + price = price.replace(".", "").replace(",", ".").split("\xa0")[0] + self.current.price = float(price) + self.products += [self.current] + self.current = None + self.state = State.idle + + def handle_data(self, data): + if self.state == State.product_name: + self.current.name += data + if self.state == State.price: + self.current.price += data + + +URL = "http://www.edelmetall-handel.de/quickbuy/twozero/" + + +def execute(): + """Always fetches full catalog""" + request = urllib.request.Request(URL) + with urllib.request.urlopen(request) as f: + # with open("index.html", 'rb') as f: + f2 = codecs.getreader('utf-8')(f) + f2.errors = 'ignore' + parser = Parser() + for line in f2.readlines(): + parser.feed(line) + return parser.products + + +if __name__ == "__main__": + from pprint import pprint + + pprint(execute()) diff --git a/jobs/hplq1300n.py b/jobs/hplq1300n.py new file mode 100755 index 0000000..b77a323 --- /dev/null +++ b/jobs/hplq1300n.py @@ -0,0 +1,26 @@ +import codecs +import re +import urllib.request +from collections import namedtuple + +Data = namedtuple('Data', ['hostname', 'value']) + +URL = "http://{}/hp/device/info_suppliesStatus.html" + + +def job(host: str) -> Data: + url = URL.format(host) + name = host.replace(".", "_") + request = urllib.request.Request(url) + with urllib.request.urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + for line in f2.readlines(): + m = re.match(".*>([0-9]*)%<br", line) + if m: + return Data(name, int(m.groups()[0])) + + +if __name__ == "__main__": + from pprint import pprint + + pprint(job("10.1.0.10")) diff --git a/jobs/prix_carburant.py b/jobs/prix_carburant.py new file mode 100755 index 0000000..21e6c26 --- /dev/null +++ b/jobs/prix_carburant.py @@ -0,0 +1,107 @@ +import codecs +import logging +import typing +from enum import Enum +from html.parser import HTMLParser +from urllib import request + + +class Station: + def __init__(self): + self.station_name = "" + self.prices = {} + self.id = None + + def clean(self): + self.prices = filter(lambda kv: kv[1] != '', self.prices.items()) + self.prices = dict(map(lambda kv: (kv[0], float(kv[1])), self.prices)) + + def __repr__(self): + return "Prix: {} {}".format(self.station_name, self.prices) + + +State = Enum('State', 'pricelist fuel_name fuel_price station_name idle') + + +class Parser(HTMLParser): + def error(self, message): + logging.error("Parser error: %s", message) + + def __init__(self): + super().__init__() + self._prix = Station() + self._current_fuel_name = "" + self._state = State.idle + + def get_prix(self): + self._prix.clean() + return self._prix + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if self._state == State.idle and tag == "div" and "id" in attrs and attrs['id'] == 'prix': + self._state = State.pricelist + elif self._state in [State.pricelist, State.fuel_price] and tag == 'strong': + self._state = State.fuel_name + self._current_fuel_name = '' + elif self._state == State.idle and tag == 'div' and 'id' in attrs and attrs['id'] == 'colg': + self._state = State.station_name + + def handle_endtag(self, tag): + if self._state == State.pricelist and tag == 'div': + self._state = State.idle + elif self._state == State.fuel_name and tag == 'strong': + self._state = State.fuel_price + elif self._state == State.fuel_price and tag == 'div': + self._state = State.idle + elif self._state == State.station_name and tag == 'p': + self._state = State.idle + + def handle_data(self, data: str): + if self._state == State.fuel_name: + self._current_fuel_name += data.strip().replace(':', '') + self._prix.prices[self._current_fuel_name] = "" + elif self._state == State.fuel_price: + if data.strip() != "0.000": + self._prix.prices[self._current_fuel_name] += data.strip() + elif self._state == State.station_name: + if len(data.strip()) > 0: + self._prix.station_name += data.strip() + ". " + + +URL = "http://www.prix-carburants.gouv.fr/map/recupererInfosPdv/" + + +def _execute(station_id: str): + parser = Parser() + r = request.Request(URL + station_id) + r.add_header('Host', 'www.prix-carburants.gouv.fr') + r.add_header('Referer', 'http://www.prix-carburants.gouv.fr/recherche/map') + r.add_header('User-Agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0') + r.add_header('X-Requested-With', 'XMLHttpRequest') + r.add_header('X-Prototype-Version', '1.7') + r.add_header('Connection', 'close') + with request.urlopen(r) as f: + # with open("info.html", 'rb') as f: + f2 = codecs.getreader('utf-8')(f) + f2.errors = 'ignore' + for line in f2.readlines(): + parser.feed(line) + + try: + prix = parser.get_prix() + prix.id = station_id + return prix + except Exception as e: + raise Exception("Failed for station: {}".format(station_id), e) + + +def execute(*ids) -> typing.Iterable[Station]: + for station_id in ids: + yield _execute(station_id) + + +if __name__ == "__main__": + from pprint import pprint + + pprint(list(execute('1630001', '67760001', '1210003', '1630003', '1210002', '1710001'))) diff --git a/jobs/swr_wetter.py b/jobs/swr_wetter.py new file mode 100755 index 0000000..bb464c1 --- /dev/null +++ b/jobs/swr_wetter.py @@ -0,0 +1,50 @@ +import codecs +import json +import re +import urllib.parse +import urllib.request + +URL = "http://www.swr.de/-/id=5491998/cf=42/did=13968954/format=json/nid=5491998/17ag7cb/index.json" + + +def job(cc): + """ + cc: id of the region. See webpage: http://www.swr.de/wetter + """ + params = urllib.parse.urlencode({'cc': cc}) + request = urllib.request.Request(URL + "?" + params) + request.add_header( + "User-Agent", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; FSL 7.0.6.01001)") + + def transformDict(name, value_dict): + for (key, value) in value_dict.items(): + if key in ['timestamp', 'dayForecast']: + continue + if value == "k. A.": + continue + elif re.match("^-?[1-9]+[0-9]*$", value) or value == "0": + value = int(value) + elif re.match("^-?[1-9]+[0-9]*.?[0-9]*$", value): + value = float(value) + yield {'name' : "{}.{}.{}".format(basename, name, key), 'value': value} + + with urllib.request.urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + response = json.load(f2) + basename = "swr_wetter.{stateCode}.{regionCode}.{name}".format( + **response['availableLocations'][cc]) + + for d in transformDict("current", response['current'][cc]): + yield d + + for (day, value) in response['forecast'].items(): + value = value[cc] + for d in transformDict("forecast." + day, value): + yield d + + +if __name__ == "__main__": + from pprint import pprint + + pprint(list(job("DE0008834"))) diff --git a/jobs/sys_network.py b/jobs/sys_network.py new file mode 100755 index 0000000..bbd6a0b --- /dev/null +++ b/jobs/sys_network.py @@ -0,0 +1,15 @@ +import socket + + +def job(device, stat_type): + f = open('/sys/class/net/' + device + '/statistics/' + stat_type, 'r') + value = f.read() + ivalue = int(value.replace("\n", "")) + f.close() + return {'hostname': socket.gethostname(), 'device': device, 'stat_type': stat_type, 'value': ivalue} + + +if __name__ == "__main__": + from pprint import pprint + + pprint(job("eth1", "rx_bytes")) diff --git a/jobs/sys_network_rate.py b/jobs/sys_network_rate.py new file mode 100755 index 0000000..3a5cc71 --- /dev/null +++ b/jobs/sys_network_rate.py @@ -0,0 +1,22 @@ +from collections import namedtuple + +Data = namedtuple('Data', ['hostname', 'device', 'entry', 'value']) +temp = {} + +def job(device, entry, intervals): + global temp + f = open('/sys/class/net/' + device + '/statistics/' + entry, 'r') + value = f.read() + ivalue = int(value.replace("\n", "")) + f.close() + + return_value = [] + if entry in temp: + rate = (ivalue - temp[entry]) / intervals # bytes/s + if rate > 0: + # prevent counter overflows + return_value = [Data('laer.2.localnet.cc', device, entry, rate)] + + temp[entry] = ivalue + + return return_value diff --git a/jobs/tankerkoenig.py b/jobs/tankerkoenig.py new file mode 100755 index 0000000..e78853d --- /dev/null +++ b/jobs/tankerkoenig.py @@ -0,0 +1,36 @@ +import codecs +import json +import logging +import typing +from collections import namedtuple +from urllib import request + +Data = namedtuple('Data', ['name', 'id', 'type', 'price']) + + +URL = "https://creativecommons.tankerkoenig.de/json/list.php?lat={lat}&lng={lng}&rad={rad}&sort=dist&type=all&apikey={api_key}" + + +def execute(api_key : str, lat: float, lng : float, rad: float) -> typing.Iterable[Data]: + url = URL.format(api_key=api_key, rad=rad, lat=lat, lng=lng) + r = request.Request(url) + try: + with request.urlopen(r) as f: + f2 = codecs.getreader('utf-8')(f) + data = json.load(f2) + if not data['status'] == 'ok': + raise Exception("Error %s", data['message']) + for station in data['stations']: + name = "{} - {} - {}".format(station['place'], station['brand'], station['name']) + if not station['isOpen'] == True: + continue + + if "diesel" in station: + yield Data(name, station['id'], 'Diesel', station['diesel']) + if "e5" in station: + yield Data(name, station['id'], 'SP95-E5', station['e5']) + if "e10" in station: + yield Data(name, station['id'], 'SP95-E10', station['e10']) + except Exception as e: + logging.error("Failed for: %f %f %f", lat, lng, rad) + raise e diff --git a/jobs/telexoo.py b/jobs/telexoo.py new file mode 100755 index 0000000..110ba72 --- /dev/null +++ b/jobs/telexoo.py @@ -0,0 +1,55 @@ +import codecs +import json +import random +import re +import urllib.parse +import urllib.request +from collections import namedtuple +from decimal import Decimal + +from currencies.config import * + +URL = "https://telexoo.tegona.com/convert/" + +Quote = namedtuple('Quote', ['curr_from', 'curr_to', 'rate']) + + +def execute(curr_from, curr_to): + MULT = random.randint(1000, 9999) + CURRENCY = { + MONEY_CURRENCY_EUR: "EUR", + MONEY_CURRENCY_CHF: "CHF", + MONEY_CURRENCY_USD: "USD", + MONEY_CURRENCY_GBP: "GBP", + MONEY_CURRENCY_PLN: "PLN" + } + curr_from = CURRENCY[curr_from] + curr_to = CURRENCY[curr_to] + params = urllib.parse.urlencode({ + 's1': curr_from, + 's2': curr_to, + 'amount': str(MULT), + 'action': 'sell', + 'language': 'en', + 'verbose': '0', + }) + request = urllib.request.Request(URL + "?" + params) + request.add_header( + "User-Agent", + "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; FSL 7.0.6.01001)") + with urllib.request.urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + response = json.load(f2) + result_raw = response[0]['result'].replace(",", "") + match = re.match("^{} ([0-9\.]*)$".format(curr_to), result_raw) + if not match: + raise Exception("Invalid response in 'result' field") + result = Decimal(match.groups()[0]) / MULT + return Quote(curr_to, curr_to, float(result)) + + +if __name__ == "__main__": + from pprint import pprint + + pprint(execute("CHF", "EUR")) + pprint(execute("CHF", "GBP")) diff --git a/jobs/transferwise.py b/jobs/transferwise.py new file mode 100755 index 0000000..a2e30d1 --- /dev/null +++ b/jobs/transferwise.py @@ -0,0 +1,55 @@ +import codecs +import json +import random +import re +import urllib.parse +import urllib.request +from collections import namedtuple + +APP_URL = "https://transferwise.com/fr/" +URL = "https://transferwise.com/api/v1/payment/calculate" +UA = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0" + +Data = namedtuple('Data', ['curr_from', 'curr_to', 'rate']) + +def get_token(): + request = urllib.request.Request(APP_URL) + request.add_header("User-Agent", UA) + + with urllib.request.urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + for line in f2.readlines(): + m = re.match(".*config.appToken.*'(.+)'.*", line) + if m: + g = m.groups() + return g[0] + + +def job(currFrom, currTo): + token = get_token() + MULT = random.randint(100, 100000) + data = urllib.parse.urlencode({ + 'amount': str(MULT), + 'amountCurrency': 'source', + 'hasDiscount': 'false', + 'isFixedRate': 'false', + 'isGuaranteedFixedTarget': 'false', + 'sourceCurrency': currFrom, + 'targetCurrency': currTo, + }) + # print (URL + "?" + data) + request = urllib.request.Request(URL + "?" + data) + request.add_header("X-Authorization-key", token) + request.add_header("X-Authorization-token", "") + request.add_header("User-Agent", UA) + with urllib.request.urlopen(request) as f: + f2 = codecs.getreader('utf-8')(f) + response = json.load(f2) + return Data(currFrom, currTo, float(response['transferwiseRate'])) + + +if __name__ == "__main__": + from pprint import pprint + + pprint(job("CHF", "EUR")) + pprint(job("CHF", "GBP")) |