jobs/swr_wetter.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50

import codecs
import json
import re
import urllib.parse
import urllib.request

URL = "http://www.swr.de/-/id=5491998/cf=42/did=13968954/format=json/nid=5491998/17ag7cb/index.json"


def job(cc):
    """
    cc: id of the region. See webpage: http://www.swr.de/wetter
    """
    params = urllib.parse.urlencode({'cc': cc})
    request = urllib.request.Request(URL + "?" + params)
    request.add_header(
        "User-Agent",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; FSL 7.0.6.01001)")

    def transformDict(name, value_dict):
        for (key, value) in value_dict.items():
            if key in ['timestamp', 'dayForecast']:
                continue
            if value == "k. A.":
                continue
            elif re.match("^-?[1-9]+[0-9]*$", value) or value == "0":
                value = int(value)
            elif re.match("^-?[1-9]+[0-9]*.?[0-9]*$", value):
                value = float(value)
            yield {'name' : "{}.{}.{}".format(basename, name, key), 'value': value}

    with urllib.request.urlopen(request) as f:
        f2 = codecs.getreader('utf-8')(f)
        response = json.load(f2)
        basename = "swr_wetter.{stateCode}.{regionCode}.{name}".format(
            **response['availableLocations'][cc])

        for d in transformDict("current", response['current'][cc]):
            yield d

        for (day, value) in response['forecast'].items():
            value = value[cc]
            for d in transformDict("forecast." + day, value):
                yield d


if __name__ == "__main__":
    from pprint import pprint

    pprint(list(job("DE0008834")))