-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcte.dogma
217 lines (200 loc) · 11.5 KB
/
cte.dogma
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
dogma_v1 utf-8
- identifier = cte_v1
- description = Concise Text Encoding, version 1
- reference = https://concise-encoding.org
- dogma = https://github.com/kstenerud/dogma/blob/master/v1/dogma_v1.0.md
document = C & version & WSL
& items(record_type, WSLC)
& (data_object ! local_reference)
;
version = '1';
data_object = markable(data_type) | reference_type;
keyable_object = markable(keyable_type) | local_reference;
non_null_object = markable(data_type ! null) | reference_type;
markable(type) = marker? & type;
data_type = keyable_type | array_type | media | custom_type | list | map | record | edge | node | null;
keyable_type = boolean | integer | uid | date | time | timestamp | string | resource_id;
reference_type = local_reference | remote_reference;
array_type = array_bit
| array_int8 | array_int8x | array_int8o | array_int8b
| array_int16 | array_int16x | array_int16o | array_int16b
| array_int32 | array_int32x | array_int32o | array_int32b
| array_int64 | array_int64x | array_int64o | array_int64b
| array_uint8 | array_uint8x | array_uint8o | array_uint8b
| array_uint16 | array_uint16x | array_uint16o | array_uint16b
| array_uint32 | array_uint32x | array_uint32o | array_uint32b
| array_uint64 | array_uint64x | array_uint64o | array_uint64b
| array_float16 | array_float16x
| array_float32 | array_float32x
| array_float64 | array_float64x
| array_uid
;
null = N&U&L&L;
boolean = true | false;
true = T&R&U&E;
false = F&A&L&S&E;
integer = neg? & uinteger;
integer_bin = neg? & uinteger_bin;
integer_oct = neg? & uinteger_oct;
integer_dec = neg? & uinteger_dec;
integer_hex = neg? & uinteger_hex;
uinteger = uinteger_bin | uinteger_oct | uinteger_dec | uinteger_hex;
uinteger_bin = prefix_bin & digits_bin;
uinteger_oct = prefix_oct & digits_oct;
uinteger_dec = digits_dec;
uinteger_hex = prefix_hex & digits_hex;
integer_bin_implied = neg? & uinteger_bin_implied;
integer_oct_implied = neg? & uinteger_oct_implied;
integer_hex_implied = neg? & uinteger_hex_implied;
uinteger_bin_implied = digits_bin;
uinteger_oct_implied = digits_oct;
uinteger_hex_implied = digits_hex;
float = float_dec | float_hex;
float_dec = (neg? & digits_dec & (('.' & digits_dec & exponent_dec?) | exponent_dec)) | float_special;
float_hex = (neg? & prefix_hex & digits_hex & (('.' & digits_hex & exponent_hex?) | exponent_hex)) | float_special;
float_hex_implied = (neg? & digits_hex & (('.' & digits_hex & exponent_hex?) | exponent_hex)) | float_special;
float_special = (neg? & inf) | nan | snan;
inf = I&N&F;
nan = N&A&N;
snan = S&N&A&N;
exponent_dec = E & neg? & digits_dec;
exponent_hex = P & neg? & digits_dec; # hex float exponent is in decimal, not hex.
uid = digit_hex{8} & '-' & digit_hex{4} & '-' & digit_hex{4} & '-' & digit_hex{4} & '-' & digit_hex{12};
date = neg? & digit_dec+ & '-' & digit_dec{1~2} & '-' & digit_dec{1~2};
time = digit_dec{1~2} & ':' & digit_dec{2} & ':' & digit_dec{2} & ('.' & digit_dec{1~9})? & time_zone?;
timestamp = date & '/' & time;
time_zone = ('/' & (tz_area_location | tz_coordinates)) | utc_offset;
tz_area_location = tz_a_l_component & (('/' & tz_a_l_component)* | tz_a_l_legacy);
tz_a_l_component = ('a'~'z' | 'A'~'Z' | '.' | '-' | '_' )+;
tz_a_l_legacy = ('A'~'Z') & ('a'~'z' | 'A'~'Z' | '0'~'9' | '-' | '+' | '_' | '/')+;
tz_coordinates = tz_coordinate & '/' & tz_coordinate;
tz_coordinate = neg? & digit_dec+ & ('.' & digit_dec+)?;
utc_offset = ('+' | '-') & digit_dec{4};
string = stringlike(char_cte);
resource_id = '@' & stringlike(char_rid);
remote_reference = '$' & stringlike(char_rid);
stringlike(allowed) = '"' & ((allowed ! ('"' | '\\' | delimiter_lookalikes)) | escape_sequence)* & '"';
escape_sequence = '\\' & ( T | N | R | '"' | '*' | '/' | '\\' | '_' | '-'
| escape_continuation
| escape_codepoint
| escape_verbatim
)
;
escape_continuation = LINE_END & WS*;
escape_codepoint = '[' & digit_hex+ & ']';
escape_verbatim = '.' & var(terminator, char_sentinel+) & (LINE_END | SP) & char_cte* & terminator;
# Whitespace between elements is optional for bit array
array_bit = '@' & B & '[' & items(digit_bin, WSL?) & ']';
array_int8 = array(I & '8', integer);
array_int16 = array(I & '16', integer);
array_int32 = array(I & '32', integer);
array_int64 = array(I & '64', integer);
array_int8b = array(I & '8' & B, integer_bin_implied);
array_int16b = array(I & '16' & B, integer_bin_implied);
array_int32b = array(I & '32' & B, integer_bin_implied);
array_int64b = array(I & '64' & B, integer_bin_implied);
array_int8o = array(I & '8' & O, integer_oct_implied);
array_int16o = array(I & '16' & O, integer_oct_implied);
array_int32o = array(I & '32' & O, integer_oct_implied);
array_int64o = array(I & '64' & O, integer_oct_implied);
array_int8x = array(I & '8' & X, integer_hex_implied);
array_int16x = array(I & '16' & X, integer_hex_implied);
array_int32x = array(I & '32' & X, integer_hex_implied);
array_int64x = array(I & '64' & X, integer_hex_implied);
array_uint8 = array(U & '8', uinteger);
array_uint16 = array(U & '16', uinteger);
array_uint32 = array(U & '32', uinteger);
array_uint64 = array(U & '64', uinteger);
array_uint8b = array(U & '8' & B, uinteger_bin_implied);
array_uint16b = array(U & '16' & B, uinteger_bin_implied);
array_uint32b = array(U & '32' & B, uinteger_bin_implied);
array_uint64b = array(U & '64' & B, uinteger_bin_implied);
array_uint8o = array(U & '8' & O, uinteger_oct_implied);
array_uint16o = array(U & '16' & O, uinteger_oct_implied);
array_uint32o = array(U & '32' & O, uinteger_oct_implied);
array_uint64o = array(U & '64' & O, uinteger_oct_implied);
array_uint8x = array(U & '8' & X, uinteger_hex_implied);
array_uint16x = array(U & '16' & X, uinteger_hex_implied);
array_uint32x = array(U & '32' & X, uinteger_hex_implied);
array_uint64x = array(U & '64' & X, uinteger_hex_implied);
array_float16 = array(F & '16', float);
array_float32 = array(F & '32', float);
array_float64 = array(F & '64', float);
array_float16x = array(F & '16' & X, float_hex_implied);
array_float32x = array(F & '32' & X, float_hex_implied);
array_float64x = array(F & '64' & X, float_hex_implied);
array_uid = array(U&I&D, uid);
media = array_stringform(media_type) | array(media_type, hex_byte);
media_type = media_type_major & '/' & media_type_minor;
media_type_major = char_media_first & char_media_next*;
media_type_minor = char_media_next+;
custom_type = array_stringform(custom_type_code) | array(custom_type_code, hex_byte);
custom_type_code = digit_dec+;
array_stringform(type) = '@' & type & string;
array(type, elem_type) = '@' & type & '[' & items(elem_type, WSL) & ']';
list = '[' & items(data_object, WSLC) & ']';
map = '{' & items(key_value, WSLC) & '}';
record_type = '@' & identifier & '<' & items(keyable_object, WSLC) '>';
record = '@' & identifier & '{' & items(data_object, WSLC) & '}';
node = '(' & WSLC* & data_object & (WSLC & items(data_object, WSLC))? & WSLC* & ')';
edge = '@(' & WSLC* & non_null_object & WSLC+ & data_object & WSLC+ & non_null_object & WSLC* & ')';
key_value = keyable_object & WSLC* & '=' & WSLC* & data_object;
local_reference = '$' & identifier;
marker = '&' & identifier & ':';
identifier = char_identifier_first & char_identifier_next*;
comment_single_line = "//" & (char_cte* ! LINE_END) & LINE_END;
comment_multi_line = "/*" & ((char_cte* ! "/*") | comment_multi_line) & "*/";
items(type, separator) = separator* & (type & (separator+ & type)* & separator*)?;
hex_byte = digit_hex{2};
digits_bin = digit_bin & ('_'? & digit_bin)*;
digits_oct = digit_oct & ('_'? & digit_oct)*;
digits_dec = digit_dec & ('_'? & digit_dec)*;
digits_hex = digit_hex & ('_'? & digit_hex)*;
digit_bin = '0'~'1';
digit_oct = '0'~'7';
digit_dec = '0'~'9';
digit_hex = '0'~'9' | 'a'~'f' | 'A'~'F';
neg = '-';
prefix_bin = '0' B;
prefix_oct = '0' O;
prefix_hex = '0' X;
A = 'a' | 'A';
B = 'b' | 'B';
C = 'c' | 'C';
D = 'd' | 'D';
E = 'e' | 'E';
F = 'f' | 'F';
I = 'i' | 'I';
L = 'l' | 'L';
N = 'n' | 'N';
O = 'o' | 'O';
P = 'p' | 'P';
R = 'r' | 'R';
S = 's' | 'S';
T = 't' | 'T';
U = 'u' | 'U';
X = 'x' | 'X';
# Whitespace
WSLC = WSL | comment;
WSL = WS | LINE_END;
WS = HT | SP;
LINE_END = CR? & LF;
HT = '\[9]';
LF = '\[a]';
CR = '\[d]';
SP = '\[20]';
# https://www.rfc-editor.org/rfc/rfc2045#section-5.1
char_media_first = 'a'~'z' | 'A'~'Z';
char_media_next = ('!' ~ '~') ! rfc2045_tspecials;
rfc2045_tspecials = '(' | ')' | '<' | '>' | '@' | ',' | ';' | ':' | '\\' | '"' | '/' | '[' | ']' | '?' | '=';
char_rid = """https://www.rfc-editor.org/rfc/rfc3987""";
char_sentinel = unicode(L,M,N,P,S);
char_identifier_first = unicode(L,N) | '_';
char_identifier_next = unicode(Cf,L,M,N) | '_' | '.' | '-';
char_cte = unicode(Cf,L,M,N,P,S,Zs) | WSL;
delimiter_lookalikes = '\[02ba]' | '\[02dd]' | '\[02ee]' | '\[02f6]' | '\[05f2]' | '\[05f4]'
| '\[1cd3]' | '\[201c]' | '\[201d]' | '\[201f]' | '\[2033]' | '\[2034]'
| '\[2036]' | '\[2037]' | '\[2057]' | '\[2f02]' | '\[2216]' | '\[27cd]'
| '\[29f5]' | '\[29f9]' | '\[3003]' | '\[3035]' | '\[31d4]' | '\[4e36]'
| '\[fe68]' | '\[ff02]' | '\[ff3c]' | '\[1d20f]' | '\[1d23b]'
;