1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
#lang scheme/base
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; PARSEQ.PLT
;; A Parser Combinator library.
;;
;; Bonzai Lab, LLC. All rights reserved.
;;
;; Licensed under LGPL.
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; regex.ss - a simple regular expression parser
;; yc 1/1/2009 - first version
(require "../main.ss"
mzlib/trace
)
;; sof = start of file
(define regex-sof (zero-one (char= #\^) #\$))
;; eof = end of file
(define regex-eof (zero-one (char= #\$) #\^))
;; meta-chars - a list of meta characters
(define regex-meta-chars '( #\. #\+ #\* #\? #\^ #\$ #\[ #\] #\( #\) #\{ #\} #\\))
;; digit = \\d
(define regex-digit (seq "\\d" (return digit)))
;; not-digit = \\D
(define regex-not-digit (seq "\\D" (return not-digit)))
;; word = \\w
(define regex-word (seq "\\w" (return word)))
;; not-word = \\W
(define regex-not-word (seq "\\W" (return not-word)))
;; whitespace = \\s
(define regex-whitespace (seq "\\s" (return whitespace)))
;; not-whitespace = \\S
(define regex-not-whitespace (seq "\\S" (return not-whitespace)))
;; any-char = .
(define regex-any-char (seq #\. (return any-char)))
;; literal = \\d | \\D | \\w | \\W | \\s | \\S | . | \n | \r | \t | \\ | other chars
(define regex-literal (choice regex-digit
regex-not-digit
regex-word
regex-not-word
regex-whitespace
regex-not-whitespace
regex-any-char
(seq v <- (choice e-newline
e-return
e-tab
(escaped-char #\\ any-char)
(char-not-in regex-meta-chars))
(return (char= v)))))
;; atom = literal | group | choice
(define regex-atom (choice regex-literal
regex-group
regex-choice
))
;; char-range = <lc>-<hc>, e.g., a-z
(define regex-char-range (seq lc <- (char-not-in (cons #\- regex-meta-chars))
#\-
hc <- (char-not-in (cons #\- regex-meta-chars))
(return `(,char-between ,lc ,hc))))
;; choice = [<char-range | literal>+]
(define regex-choice (seq #\[
literals <- (one-many (choice regex-char-range
regex-literal))
#\]
(return `(,one-of* ,@literals))))
;; group = (<atom>+)
(define regex-group (seq #\(
chars <- (one-many regex-atom)
#\)
(return `(,sequence* ,@chars))))
;; regex combinators
;; zero-one = <atom>?
(define regex-zero-one (seq v <- regex-atom
#\?
(return `(,zero-one ,v))))
;; zero-many = <atom>*
(define regex-zero-many (seq v <- regex-atom
#\*
(return `(,zero-many ,v))))
;; one-many = <atom>+
(define regex-one-many (seq v <- regex-atom
#\+
(return `(,one-many ,v))))
;; range = <atom>{min,max} | <atom>{times}
(define regex-range (seq v <- regex-atom
#\{
min <- (zero-one natural-number 0)
max <- (zero-one (seq #\,
max <- (zero-one natural-number +inf.0)
(return max))
min)
#\}
(return `(,repeat ,v ,min ,max))))
;; exp = sof ? <zero-one | zero-many | one-many | range | atom>* eof ?
(define regex-exp (seq SOF
sof <- regex-sof
atoms <- (zero-many (choice regex-zero-one
regex-zero-many
regex-one-many
regex-range
regex-atom
))
eof <- regex-eof
EOF
(return `(,regex-parser* ,@(if (char=? sof #\^)
`(,SOF)
'())
,@atoms
,@(if (char=? eof #\$)
`(,EOF)
'())))))
;; regex-parser
;; convert the regexp into an useable parser, which including determining
;; whether to allow for
(define (regex-parser parsers)
(let ((regexp (sequence parsers)))
(if (eq? (car parsers) SOF)
regexp
(seq v <- (choice regexp
(seq any-char (regex-parser parsers)))
(return v)))))
;; regex-parser*
;; the variable arg form of regex-parser
(define (regex-parser* parser . parsers)
(regex-parser (cons parser parsers)))
;; make-regex-exp
;; wrapper over regex...
(define (make-regex-exp in)
(define (helper exp)
(cond ((list? exp) (apply (car exp) (map helper (cdr exp))))
(else exp)))
;; (trace helper)
(let-values (((exp in)
(regex-exp (make-input in))))
(if (failed? exp)
(error 'make-regex-exp "the regular expression is invalid")
(lambda (in)
((helper exp) (make-input in))))))
(provide regex-parser
make-regex-exp
)
|