1 files changed, 109 insertions, 131 deletions
diff --git a/pyinfluxtools/__init__.py b/pyinfluxtools/__init__.py
index 7654051..7ce39aa 100644
--- a/pyinfluxtools/__init__.py
+++ b/pyinfluxtools/__init__.py
@@ -2,40 +2,35 @@
 import re
 import sys
 
-from pprint import pprint
-from funcparserlib.lexer import make_tokenizer, Token, LexerError
-from funcparserlib.parser import (some, a, maybe, many, finished, skip)
-
-
+from funcparserlib.lexer import make_tokenizer
+from funcparserlib.parser import (some, maybe, many, finished, skip, NoParseError)
 
 
 class WriteRequest(object):
+
     @staticmethod
     def parse(lines):
         """
         Parse multiple Write objects separeted by new-line character.
 
-        >> lines = []
-        >> lines += ['cpu']
-        >> lines += ['cpu,host=serverA,region=us-west']
-        >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2']
-        >> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234']
-        >> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines)))))
-        cpu
-        cpu,host="serverA",region="us-west"
+        >>> print(Write.parse("foo b=1"))
+        foo b=1
+
+        >>> lines = []
+        >>> lines += ['cpu field=123']
+        >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2']
+        >>> lines += ['cpu,host=serverA,region=us-west field1=1,field2=2 1234']
+        >>> print("\\n".join(map(str, WriteRequest.parse("\\n".join(lines)))))
+        cpu field=123
         cpu,host="serverA",region="us-west" field1=1,field2=2
         cpu,host="serverA",region="us-west" field1=1,field2=2 1234
         """
         writes = map(Write.parse, lines.split("\n"))
         return list(writes)
 
-    @staticmethod
-    def parseFile(file):
-        for line in file.readlines():
-            yield Write.parse(line)
-
 
 class Write(object):
+
     def __init__(self, key, tags, fields, timestamp=None):
         self.key = key
         self.tags = tags
@@ -48,22 +43,21 @@ class Write(object):
         if isinstance(self.fields, dict):
             self.fields = self.fields.items()
 
-    @staticmethod
-    def tokenize(str):
-        specs = [ 
-            ('Comma', (r',',)),
-            ('Space', (r' ',)),
-            ('Equal', (r'=',)),
-            ('Quote', (r'"',)),
-            ('Escape', (r'\\',)),
-            ('Int', (r'[0-9]+',)),
-            ('Float', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
-            ('Text', (r'[A-Za-z\200-\377_0-9-\.]+',)),
-        ]   
-        useless = [] #'Comma', 'NL', 'Space', 'Header', 'Footer']
-        t = make_tokenizer(specs)
-        return [x for x in t(str) if x.type not in useless]
+    specs = [
+        ('Comma', (r',',)),
+        ('Space', (r' ',)),
+        ('Equal', (r'=',)),
+        ('Quote', (r'"',)),
+        ('Escape', (r'\\',)),
+        ('Int', (r'[0-9]+',)),
+        ('Float', (r'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)),
+        ('Char', (r'.',)),
+    ]
 
+    @staticmethod
+    def tokenize(line):
+        tokenizer = make_tokenizer(Write.specs)
+        return list(tokenizer(line))
 
     @staticmethod
     def parse(line):
@@ -91,7 +85,7 @@ class Write(object):
         >>> Write.parse('cpu host=server\\ A,region=us\\ west')
         <Write key=cpu tags=[] fields=[('host', 'server A'), ('region', 'us west')] timestamp=None>
 
-        >>> Write.parse('cpu ho\=st=server\ A,region=us\ west')
+        >>> Write.parse('cpu ho\\=st=server\ A,region=us\ west')
         <Write key=cpu tags=[] fields=[('ho=st', 'server A'), ('region', 'us west')] timestamp=None>
 
         >>> print(Write.parse('cpu ho\=st=server\ A,region=us\ west'))
@@ -109,10 +103,10 @@ class Write(object):
         >>> print(Write.parse('cpu field12=12 123123123'))
         cpu field12=12 123123123
 
-        >> print(Write.parse('cpu field12=12 1231abcdef123'))
-        Traceback (most recent call last):
-        ...
-        funcparserlib.parser.NoParseError: should have reached <EOF>: 1,20-1,28: Text 'abcdef123'
+        >>> try:
+        ...     print(Write.parse('cpu field12=12 1231abcdef123'))
+        ... except NoParseError:
+        ...     pass
 
         >>> print(Write.parse("cpu,x=3,y=4,z=6 field\ name=\\"HH \\\\\\"World\\",x=asdf\\\\ foo"))
         cpu,x=3,y=4,z=6 field\\ name="HH \\"World",x="asdf foo"
@@ -129,37 +123,72 @@ class Write(object):
         >>> Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with"commas" field_key\\\\\\\\="string field value, only \\\\" need be quoted"')
         <Write key="measurement with quotes" tags=[('tag key with spaces', 'tag,value,with"commas"')] fields=[('field_key\\\\', 'string field value, only " need be quoted')] timestamp=None>
 
-        #>>> Write.parse('disk_free value=442221834240,working\ directories="C:\My Documents\Stuff for examples,C:\My Documents"')
-        #Fails....  this format is just crazy
+        >>> Write.parse('disk_free value=442221834240,working\ directories="C:\My Documents\Stuff for examples,C:\My Documents"')
+        <Write key=disk_free tags=[] fields=[('value', 442221834240), ('working directories', 'C:\\\\My Documents\\\\Stuff for examples,C:\\\\My Documents')] timestamp=None>
+
+        >>> Write.parse('disk_free value=442221834240,working\ directories="C:\My Documents\Stuff for examples,C:\My Documents" 123')
+        <Write key=disk_free tags=[] fields=[('value', 442221834240), ('working directories', 'C:\\\\My Documents\\\\Stuff for examples,C:\\\\My Documents')] timestamp=123>
+
+        >>> print(Write.parse('foo,foo=2 field_key\\\\\\="string field"'))
+        foo,foo=2 field_key\\\\="string field"
+
+        >>> print(Write.parse('foo,foo=2 field_key="string\\\\" field"'))
+        foo,foo=2 field_key="string\\" field"
+
+        >>> print(Write.parse('foo field0=tag,field1=t,field2=true,field3=True,field4=TRUE'))
+        foo field0="tag",field1=True,field2=True,field3=True,field4=True
+
+        >>> print(Write.parse('foo field1=f,field2=false,field3=False,field4=FALSE,field5=fag'))
+        foo field1=False,field2=False,field3=False,field4=False,field5="fag"
+
+        >>> print(Write.parse('"measurement\ with\ quotes",tag\ key\ with\ spaces=tag\,value\,with"commas" field_key\\\\\\="string field value, only \\\\" need be quoted"'))
+        "measurement\ with\ quotes",tag\ key\ with\ spaces="tag,value,with\\"commas\\"" field_key\\\\="string field value, only \\" need be quoted"
+
+        >>> Write.parse('"measurement\ with\ quotes" foo=1')
+        <Write key="measurement with quotes" tags=[] fields=[('foo', 1)] timestamp=None>
         """
 
         tokval = lambda t: t.value
-        toksval = lambda x: "".join(x)
-        token = lambda type: some(lambda t: t.type == type)
-   
-        space = token('Space') >> tokval
-        comma = token('Comma') >> tokval
-        quote = token('Quote') >> tokval
-        escape_space = token('Escape') + token('Space') >> (lambda x: " ")
-        escape_comma = token('Escape') + token('Comma') >> (lambda x: ",")
-        escape_equal = token('Escape') + token('Equal') >> (lambda x: "=")
-        escape_quote = token('Escape') + token('Quote') >> (lambda x: "\"")
-        escape_escape = token('Escape') + token('Escape') >> (lambda x: "\\")
-        plain_int = token('Int') >> (lambda t: int(tokval(t)))
-        plain_int_text =  token('Int') >> tokval
-        plain_float = token('Float') >> (lambda t: float(tokval(t)))
-        plain_float_text = token('Float') >> tokval
-        plain_bool = some( lambda t: t.type == 'Text' and t.value.lower() in ["t", "true"]) >> (lambda t: True) | \
-                     some( lambda t: t.type == 'Text' and t.value.lower() in ["f", "false"]) >> (lambda t: False)
-        plain_text = token("Text") >> tokval
-
-        identifier = many( plain_text | escape_space | escape_comma | escape_escape | plain_int_text | token('Quote') >> tokval ) >> toksval
-        quoted_text = many( escape_escape | escape_quote | plain_text | space | comma | plain_int_text | plain_float_text) >> (lambda x: "".join(x))
-        unquoted_text = many( escape_space | escape_comma | escape_equal | escape_escape | quote | plain_text |  plain_int_text ) >> toksval
-        string_value = ( skip(token('Quote')) + quoted_text + skip(token('Quote')) ) | unquoted_text
-
-        kv_value = plain_int | plain_float | plain_bool | string_value
-        kv = string_value + skip(token('Equal')) + kv_value >> (lambda x: (x[0],x[1]))
+        joinval = "".join
+        someToken = lambda type: some(lambda t: t.type == type)
+        true_values = ["t", "true", "True", "TRUE"]
+        false_values = ["f", "false", "False", "FALSE"]
+
+        char = someToken('Char') >> tokval
+        space = someToken('Space') >> tokval
+        comma = someToken('Comma') >> tokval
+        quote = someToken('Quote') >> tokval
+        escape = someToken('Escape') >> tokval
+        equal = someToken('Equal') >> tokval
+
+        escape_space = skip(escape) + space >> joinval
+        escape_comma = skip(escape) + comma >> joinval
+        escape_equal = skip(escape) + equal >> joinval
+        escape_quote = skip(escape) + quote >> joinval
+        escape_escape = skip(escape) + escape >> joinval
+
+        plain_int_text = someToken('Int') >> tokval
+        plain_int = plain_int_text >> (lambda v: int(v))
+        plain_float_text = someToken('Float') >> tokval
+        plain_float = plain_float_text >> (lambda v: float(v))
+
+        identifier = many(char | escape_space | escape_comma |
+                          escape_escape | plain_int_text | quote) >> joinval
+        quoted_text_ = many(escape_quote | space | plain_int_text |
+                            plain_float_text | char | comma |
+                            escape) >> joinval
+        quoted_text = skip(quote) + quoted_text_ + skip(quote)
+        unquoted_text = many(escape_space | escape_comma |
+                             escape_equal | escape_escape |
+                             plain_int_text | char | quote) >> joinval
+        string_value = quoted_text | unquoted_text >> \
+            (lambda s: s in true_values and True
+             or s in false_values and False
+             or s not in false_values and s)
+
+        kv_value = plain_int | plain_float | string_value
+        kv = string_value + \
+            skip(equal) + kv_value >> (lambda x: (x[0], x[1]))
 
         def setter(obj, propert):
             def r(val):
@@ -167,70 +196,21 @@ class Write(object):
                 return (propert, val)
             return r
 
-        key = identifier
-        tags = many( skip(token('Comma')) + kv) >> (lambda x: x) # (lambda x: [x[0]] + x[1])
-        fields = ( kv + many( skip(token('Comma')) + kv ) ) >> (lambda x: [x[0]] + x[1])
-        timestamp = plain_int
+        tags = many(skip(comma) + kv) >> (lambda x: x)
+        fields = (kv + many(skip(comma) + kv)) >> \
+                 (lambda x: [x[0]] + x[1])
 
         write = Write(None, None, None, None)
-        toplevel = (key >> setter(write, "key")) + \
-                    maybe( tags >> setter(write, "tags") ) + \
-                        ( skip(token('Space')) + (fields >> setter(write, "fields")) ) + \
-                             maybe( skip(token('Space')) + timestamp >> setter(write, "timestamp") ) + \
-                                skip(finished) >> (lambda x: x)
-        try:
-            result = toplevel.parse(Write.tokenize(line))
-        except:
-            pprint(line, stream=sys.stderr)
-            pprint(write, stream=sys.stderr)
-            pprint(Write.tokenize(line), stream=sys.stderr)
-            raise
-        #pprint({line : result}, stream=sys.stderr)
+        toplevel = (identifier >> setter(write, "key")) + \
+            maybe(tags >> setter(write, "tags")) + \
+            (skip(space) + (fields >> setter(write, "fields"))) + \
+            maybe(skip(space) + plain_int >> setter(write, "timestamp")) + \
+            skip(finished) >> (lambda x: x)
+
+        result = toplevel.parse(Write.tokenize(line))
+        #pprint(result)
         return write
 
-        def unescape(string):
-            return re.sub(r'(?<!\\)([\\,=])', '', string)
-
-        def unescape_value(string):
-            if string.startswith("\"") and string.endswith("\""):
-                string = re.sub(r'(?<!\\)(["])', '', string)
-            else:
-                string = unescape(string)
-                if re.match("^[0-9]+$", string):
-                    return int(string)
-                elif re.match("^[0-9]*\.[0-9]*$", string):
-                    return float(string)
-                elif string.lower() in ["t", "true", "f", "false"]:
-                    return string.lower in ["t", "true"]
-                else:
-                    return string
-
-        args = re.split(r"(?<!\\) ", line)
-        key, *tags = re.split(r"(?<!\\),", args[0])
-        key = unescape(key)
-
-        if tags:
-            tags = map(lambda tag: re.split(r"(?<!\\)=", tag), tags)
-            tags = map(lambda tag: (unescape(tag[0]), unescape_value(tag[1])), tags)
-            tags = list(tags)
-        else:
-            tags = None
-
-        if len(args) > 1:
-            fields = re.split(r"(?<!\\),",  args[1])
-            fields = map(lambda field: re.split(r"(?<!\\)=", field), fields)
-            fields = map(lambda field: (unescape(field[0]), unescape_value(field[1])), fields)
-            fields = list(fields)
-        else:
-            fields = None
-
-        if len(args) > 2:
-            timestamp = int(args[2])
-        else:
-            timestamp = None
-
-        return Write(key, tags, fields, timestamp)
-
     def __repr__(self):
         return "<{} key={} tags={} fields={} timestamp={}>".format(
             self.__class__.__name__, self.key, self.tags, self.fields, self.timestamp)
@@ -244,13 +224,12 @@ class Write(object):
                 return str(obj)
             else:
                 obj = str(obj)
-                return "\"" + obj.replace("\"","\\\"") + "\""
-
+                return "\"" + obj.replace("\"", "\\\"") + "\""
 
         def escape_kv(kvlist):
             return ",".join(
-                    map(lambda kv: escape_key(kv[0]) + "=" + escape_value(kv[1]),
-                        kvlist))
+                map(lambda kv: escape_key(kv[0]) + "=" + escape_value(kv[1]),
+                    kvlist))
 
         result = escape_key(self.key)
 
@@ -272,4 +251,3 @@ class Write(object):
 if __name__ == "__main__":
     import doctest
     doctest.testmod()
-