From 059bed7574351310a022a8f3ae911bafe524380b Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Sun, 5 Jul 2015 02:42:04 +0200 Subject: with funcparserlib --- pyinfluxtools/__init__.py | 162 +++++++++++++++++++++++++++++++++++++--------- setup.py | 2 +- 2 files changed, 133 insertions(+), 31 deletions(-) diff --git a/pyinfluxtools/__init__.py b/pyinfluxtools/__init__.py index 6e10914..7654051 100644 --- a/pyinfluxtools/__init__.py +++ b/pyinfluxtools/__init__.py @@ -1,5 +1,12 @@ #!/usr/bin/env python3 import re +import sys + +from pprint import pprint +from funcparserlib.lexer import make_tokenizer, Token, LexerError +from funcparserlib.parser import (some, a, maybe, many, finished, skip) + + class WriteRequest(object): @@ -8,12 +15,12 @@ class WriteRequest(object): """ Parse multiple Write objects separeted by new-line character. - >>> lines = [] - >>> lines += ['cpu'] - >>> lines += ['cpu,host=serverA,region=us-west'] - >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2'] - >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234'] - >>> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines))))) + >> lines = [] + >> lines += ['cpu'] + >> lines += ['cpu,host=serverA,region=us-west'] + >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2'] + >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234'] + >> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines))))) cpu cpu,host="serverA",region="us-west" cpu,host="serverA",region="us-west" field1=1,field2=2 @@ -41,51 +48,146 @@ class Write(object): if isinstance(self.fields, dict): self.fields = self.fields.items() + @staticmethod + def tokenize(str): + specs = [ + ('Comma', (r',',)), + ('Space', (r' ',)), + ('Equal', (r'=',)), + ('Quote', (r'"',)), + ('Escape', (r'\\',)), + ('Int', (r'[0-9]+',)), + ('Float', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)), + ('Text', (r'[A-Za-z\200-\377_0-9-\.]+',)), + ] + useless = [] #'Comma', 'NL', 'Space', 'Header', 'Footer'] + t = make_tokenizer(specs) + return [x for x in t(str) if x.type not in useless] + + @staticmethod def parse(line): """ Parse a line from the POST request into a Write object. - >>> Write.parse('cpu') - - >>> print(Write.parse('cpu')) - cpu + >>> Write.parse('cpu a=1') + - >>> Write.parse('cpu,host=serverA,region=us-west') - - >>> print(Write.parse('cpu,host=serverA,region=us-west')) - cpu,host="serverA",region="us-west" + >>> print(Write.parse('cpu a=1')) + cpu a=1 + + >>> Write.parse('cpu,host=serverA,region=us-west foo=bar') + + + >>> print(Write.parse('cpu host=serverA,region=us-west')) + cpu host="serverA",region="us-west" + + >>> Write.parse('cpu\\,01 host=serverA,region=us-west') + + + >>> print(Write.parse('cpu\,01 host=serverA,region=us-west')) + cpu\,01 host="serverA",region="us-west" - >>> Write.parse('cpu\\,01,host=serverA,region=us-west') - - >>> print(Write.parse('cpu\,01,host=serverA,region=us-west')) - cpu\,01,host="serverA",region="us-west" + >>> Write.parse('cpu host=server\\ A,region=us\\ west') + - >>> Write.parse('cpu,host=server\\ A,region=us\\ west') - - >>> print(Write.parse('cpu,host=server\\ A,region=us\\ west')) - cpu,host="server A",region="us west" + >>> Write.parse('cpu ho\=st=server\ A,region=us\ west') + - >>> Write.parse('cpu,ho\=st=server\ A,region=us\ west') - - >>> print(Write.parse('cpu,ho\=st=server\ A,region=us\ west')) - cpu,ho\=st="server A",region="us west" + >>> print(Write.parse('cpu ho\=st=server\ A,region=us\ west')) + cpu ho\=st="server A",region="us west" >>> print(Write.parse('cpu,ho\=st=server\ A field=123')) cpu,ho\=st="server A" field=123 + >>> print(Write.parse('cpu,foo=bar,foo=bar field=123,field=123')) # error: double name is accepted cpu,foo="bar",foo="bar" field=123,field=123 + >>> print(Write.parse('cpu field12=12')) cpu field12=12 + >>> print(Write.parse('cpu field12=12 123123123')) cpu field12=12 123123123 - >>> print(Write.parse('cpu field12=12 1231abcdef123')) + + >> print(Write.parse('cpu field12=12 1231abcdef123')) Traceback (most recent call last): ... - ValueError: invalid literal for int() with base 10: '1231abcdef123' - >>> print(Write.parse('cpu field="hello World"')) - null + funcparserlib.parser.NoParseError: should have reached : 1,20-1,28: Text 'abcdef123' + + >>> print(Write.parse("cpu,x=3,y=4,z=6 field\ name=\\"HH \\\\\\"World\\",x=asdf\\\\ foo")) + cpu,x=3,y=4,z=6 field\\ name="HH \\"World",x="asdf foo" + + >>> print(Write.parse("cpu,x=3 field\ name=\\"HH \\\\\\"World\\",x=asdf\\\\ foo")) + cpu,x=3 field\\ name="HH \\"World",x="asdf foo" + + >>> print(Write.parse("cpu foo=bar 12345")) + cpu foo="bar" 12345 + + >>> print(Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with field_key\\\\\\="string field value, only \\\\" need be quoted"')) + "measurement\ with\ quotes",tag\ key\ with\ spaces="tag,value,with" field_key\\\\="string field value, only \\" need be quoted" + + >>> Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with"commas" field_key\\\\\\\\="string field value, only \\\\" need be quoted"') + + + #>>> Write.parse('disk_free value=442221834240,working\ directories="C:\My Documents\Stuff for examples,C:\My Documents"') + #Fails.... this format is just crazy """ + + tokval = lambda t: t.value + toksval = lambda x: "".join(x) + token = lambda type: some(lambda t: t.type == type) + + space = token('Space') >> tokval + comma = token('Comma') >> tokval + quote = token('Quote') >> tokval + escape_space = token('Escape') + token('Space') >> (lambda x: " ") + escape_comma = token('Escape') + token('Comma') >> (lambda x: ",") + escape_equal = token('Escape') + token('Equal') >> (lambda x: "=") + escape_quote = token('Escape') + token('Quote') >> (lambda x: "\"") + escape_escape = token('Escape') + token('Escape') >> (lambda x: "\\") + plain_int = token('Int') >> (lambda t: int(tokval(t))) + plain_int_text = token('Int') >> tokval + plain_float = token('Float') >> (lambda t: float(tokval(t))) + plain_float_text = token('Float') >> tokval + plain_bool = some( lambda t: t.type == 'Text' and t.value.lower() in ["t", "true"]) >> (lambda t: True) | \ + some( lambda t: t.type == 'Text' and t.value.lower() in ["f", "false"]) >> (lambda t: False) + plain_text = token("Text") >> tokval + + identifier = many( plain_text | escape_space | escape_comma | escape_escape | plain_int_text | token('Quote') >> tokval ) >> toksval + quoted_text = many( escape_escape | escape_quote | plain_text | space | comma | plain_int_text | plain_float_text) >> (lambda x: "".join(x)) + unquoted_text = many( escape_space | escape_comma | escape_equal | escape_escape | quote | plain_text | plain_int_text ) >> toksval + string_value = ( skip(token('Quote')) + quoted_text + skip(token('Quote')) ) | unquoted_text + + kv_value = plain_int | plain_float | plain_bool | string_value + kv = string_value + skip(token('Equal')) + kv_value >> (lambda x: (x[0],x[1])) + + def setter(obj, propert): + def r(val): + setattr(obj, propert, val) + return (propert, val) + return r + + key = identifier + tags = many( skip(token('Comma')) + kv) >> (lambda x: x) # (lambda x: [x[0]] + x[1]) + fields = ( kv + many( skip(token('Comma')) + kv ) ) >> (lambda x: [x[0]] + x[1]) + timestamp = plain_int + + write = Write(None, None, None, None) + toplevel = (key >> setter(write, "key")) + \ + maybe( tags >> setter(write, "tags") ) + \ + ( skip(token('Space')) + (fields >> setter(write, "fields")) ) + \ + maybe( skip(token('Space')) + timestamp >> setter(write, "timestamp") ) + \ + skip(finished) >> (lambda x: x) + try: + result = toplevel.parse(Write.tokenize(line)) + except: + pprint(line, stream=sys.stderr) + pprint(write, stream=sys.stderr) + pprint(Write.tokenize(line), stream=sys.stderr) + raise + #pprint({line : result}, stream=sys.stderr) + return write + def unescape(string): return re.sub(r'(?