-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speed up parsejson 3.25x (with a gcc-10.2 PGO build) on number heavy input #16055
Changes from 12 commits
0e24c5e
6cb3375
46d5f21
ad4707c
b802eba
0e66b44
05a7350
136fe92
7fd9287
5b98112
61f39cd
822d91c
88d9845
7a076c2
24192b5
e8c234f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,13 +63,15 @@ type | |
stateExpectObjectComma, stateExpectColon, stateExpectValue | ||
|
||
JsonParser* = object of BaseLexer ## the parser object. | ||
a*: string | ||
tok*: TokKind | ||
a*: string ## last valid string | ||
i*: int64 ## last valid integer | ||
f*: float ## last valid float | ||
tok*: TokKind ## current token kind | ||
kind: JsonEventKind | ||
err: JsonError | ||
state: seq[ParserState] | ||
filename: string | ||
rawStringLiterals: bool | ||
rawStringLiterals, strIntegers, strFloats: bool | ||
|
||
JsonKindError* = object of ValueError ## raised by the ``to`` macro if the | ||
## JSON kind is incorrect. | ||
|
@@ -102,17 +104,25 @@ const | |
] | ||
|
||
proc open*(my: var JsonParser, input: Stream, filename: string; | ||
rawStringLiterals = false) = | ||
rawStringLiterals = false, strIntegers = true, strFloats = true) = | ||
## initializes the parser with an input stream. `Filename` is only used | ||
## for nice error messages. If `rawStringLiterals` is true, string literals | ||
## are kept with their surrounding quotes and escape sequences in them are | ||
## left untouched too. | ||
## left untouched too. If `strIntegers` is true, the `a` field is set to the | ||
## substring used to build the integer `i`. If `strFloats` is true, the `a` | ||
## field is set to the substring used to build the float `f`. These are | ||
## distinct from `rawFloats` & `rawIntegers` in `json` module, but must be | ||
## true for those raw* forms to work correctly. Parsing is about 1.5x faster | ||
## with all 3 flags false vs. all true, but false `str` defaults are needed | ||
## for backward compatibility. | ||
lexbase.open(my, input) | ||
my.filename = filename | ||
my.state = @[stateStart] | ||
my.kind = jsonError | ||
my.a = "" | ||
my.rawStringLiterals = rawStringLiterals | ||
my.strIntegers = strIntegers | ||
my.strFloats = strFloats | ||
|
||
proc close*(my: var JsonParser) {.inline.} = | ||
## closes the parser `my` and its associated input stream. | ||
|
@@ -126,13 +136,13 @@ proc str*(my: JsonParser): string {.inline.} = | |
|
||
proc getInt*(my: JsonParser): BiggestInt {.inline.} = | ||
## returns the number for the event: ``jsonInt`` | ||
assert(my.kind == jsonInt) | ||
return parseBiggestInt(my.a) | ||
assert(my.tok == tkInt) | ||
return cast[BiggestInt](my.i) # A no-op unless BiggestInt changes | ||
|
||
proc getFloat*(my: JsonParser): float {.inline.} = | ||
## returns the number for the event: ``jsonFloat`` | ||
assert(my.kind == jsonFloat) | ||
return parseFloat(my.a) | ||
assert(my.tok == tkFloat) | ||
return my.f | ||
|
||
proc kind*(my: JsonParser): JsonEventKind {.inline.} = | ||
## returns the current event type for the JSON parser | ||
|
@@ -317,35 +327,98 @@ proc skip(my: var JsonParser) = | |
break | ||
my.bufpos = pos | ||
|
||
proc parseNumber(my: var JsonParser) = | ||
var pos = my.bufpos | ||
if my.buf[pos] == '-': | ||
add(my.a, '-') | ||
inc(pos) | ||
if my.buf[pos] == '.': | ||
add(my.a, "0.") | ||
inc(pos) | ||
template jsOrVmBlock(caseJsOrVm, caseElse: untyped): untyped = | ||
when nimvm: | ||
block: | ||
caseJsOrVm | ||
else: | ||
while my.buf[pos] in Digits: | ||
add(my.a, my.buf[pos]) | ||
inc(pos) | ||
if my.buf[pos] == '.': | ||
add(my.a, '.') | ||
inc(pos) | ||
# digits after the dot: | ||
while my.buf[pos] in Digits: | ||
add(my.a, my.buf[pos]) | ||
inc(pos) | ||
if my.buf[pos] in {'E', 'e'}: | ||
add(my.a, my.buf[pos]) | ||
inc(pos) | ||
if my.buf[pos] in {'+', '-'}: | ||
add(my.a, my.buf[pos]) | ||
inc(pos) | ||
while my.buf[pos] in Digits: | ||
add(my.a, my.buf[pos]) | ||
inc(pos) | ||
my.bufpos = pos | ||
block: | ||
when defined(js) or defined(nimscript): | ||
# nimscript has to be here to avoid semantic checking of caseElse | ||
caseJsOrVm | ||
else: | ||
caseElse | ||
|
||
template doCopy(a, b, start, endp1: untyped): untyped = | ||
jsOrVmBlock: | ||
a = b[start ..< endp1] | ||
do: | ||
let n = endp1 - start | ||
if n > 0: | ||
a.setLen n | ||
copyMem a[0].addr, b[start].addr, n | ||
|
||
proc i64(c: char): int64 {.inline.} = int64(ord(c) - ord('0')) | ||
|
||
proc pow10(e: int64): float {.inline.} = | ||
const p10 = [1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, | ||
1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05, | ||
1e-4, 1e-3, 1e-2, 1e-1, 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, | ||
1e8, 1e9] # 4*64B cache lines = 32 slots | ||
if -22 <= e and e <= 9: | ||
return p10[e + 22] # common case=small table lookup | ||
result = 1.0 | ||
var base = 10.0 | ||
var e = e | ||
if e < 0: | ||
e = -e | ||
base = 0.1 | ||
while e != 0: | ||
if (e and 1) != 0: | ||
result *= base | ||
e = e shr 1 | ||
base *= base | ||
|
||
proc parseNumber(my: var JsonParser): TokKind {.inline.} = | ||
# Parse/validate/classify all at once, both setting & returning token kind | ||
# and, if not `tkError`, leaving the binary numeric answer in `my.[if]`. | ||
const Sign = {'+', '-'} # NOTE: `parseFloat` can generalize this to INF/NAN. | ||
var i = my.bufpos # NUL ('\0') terminated | ||
var noDot = false | ||
var exp = 0'i64 | ||
var p10 = 0 | ||
var pnt = -1 # find '.' (point); do digits | ||
var nD = 0 | ||
my.i = 0'i64 # build my.i up from zero.. | ||
if my.buf[i] in Sign: | ||
i.inc # skip optional sign | ||
while my.buf[i] != '\0': # ..and track scale/pow10. | ||
if my.buf[i] notin Digits: | ||
if my.buf[i] != '.' or pnt >= 0: | ||
break # a second '.' is forbidden | ||
pnt = nD # save location of '.' (point) | ||
nD.dec # undo loop's nD.inc | ||
elif nD < 18: # 2**63==9.2e18 => 18 digits ok | ||
my.i = 10 * my.i + my.buf[i].i64 # core ASCII->binary transform | ||
else: # 20+ digits before decimal | ||
p10.inc # any digit moves implicit '.' | ||
i.inc | ||
nD.inc | ||
if my.buf[my.bufpos] == '-': | ||
my.i = -my.i # adjust sign | ||
if pnt < 0: # never saw '.' | ||
pnt = nD; noDot = true # so set to number of digits | ||
elif nD == 1: | ||
return tkError # ONLY "[+-]*\.*" | ||
if my.buf[i] in {'E', 'e'}: # optional exponent | ||
i.inc | ||
let i0 = i | ||
if my.buf[i] in Sign: | ||
i.inc # skip optional sign | ||
while my.buf[i] in Digits: # build exponent | ||
exp = 10 * exp + my.buf[i].i64 | ||
i.inc | ||
if my.buf[i0] == '-': | ||
exp = -exp # adjust sign | ||
elif noDot: # and my.i < (1'i64 shl 53'i64) ? # No '.' & No [Ee]xponent | ||
my.bufpos = i | ||
if my.strIntegers: doCopy(my.a, my.buf, my.bufpos, i) | ||
return tkInt # mark as integer | ||
exp += pnt - nD + p10 # combine explicit&implicit exp | ||
my.f = my.i.float * pow10(exp) # has round-off vs. 80-bit | ||
if my.strFloats: doCopy(my.a, my.buf, my.bufpos, i) | ||
my.bufpos = i | ||
return tkFloat # mark as float | ||
|
||
proc parseName(my: var JsonParser) = | ||
var pos = my.bufpos | ||
|
@@ -355,17 +428,13 @@ proc parseName(my: var JsonParser) = | |
inc(pos) | ||
my.bufpos = pos | ||
|
||
proc getTok*(my: var JsonParser): TokKind = | ||
setLen(my.a, 0) | ||
proc getTok*(my: var JsonParser): TokKind {.inline.} = | ||
skip(my) # skip whitespace, comments | ||
case my.buf[my.bufpos] | ||
of '-', '.', '0'..'9': | ||
parseNumber(my) | ||
if {'.', 'e', 'E'} in my.a: | ||
result = tkFloat | ||
else: | ||
result = tkInt | ||
result = parseNumber(my) | ||
of '"': | ||
setLen(my.a, 0) | ||
result = parseString(my) | ||
of '[': | ||
inc(my.bufpos) | ||
|
@@ -388,6 +457,7 @@ proc getTok*(my: var JsonParser): TokKind = | |
of '\0': | ||
result = tkEof | ||
of 'a'..'z', 'A'..'Z', '_': | ||
setLen(my.a, 0) | ||
parseName(my) | ||
case my.a | ||
of "null": result = tkNull | ||
|
@@ -524,3 +594,31 @@ proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} = | |
proc eat*(p: var JsonParser, tok: TokKind) = | ||
if p.tok == tok: discard getTok(p) | ||
else: raiseParseErr(p, tokToStr[tok]) | ||
|
||
iterator jsonTokens*(input: Stream, path: string, rawStringLiterals = false, | ||
strIntegers = true, strFloats = true): ptr JsonParser = | ||
## Yield each successive `ptr JsonParser` while parsing `input` with error | ||
## label `path`. | ||
var p: JsonParser | ||
p.open(input, path, rawStringLiterals = rawStringLiterals, | ||
strIntegers = strIntegers, strFloats = strFloats) | ||
try: | ||
var tok = p.getTok | ||
while tok notin {tkEof,tkError}: | ||
yield p.addr # `JsonParser` is a pretty big struct | ||
tok = p.getTok | ||
finally: | ||
p.close() | ||
|
||
iterator jsonTokens*(path: string, rawStringLiterals = false, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These API additions are not good for two reasons:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They're unused because the idea was to provide something like I was more concerned about your feedback about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return type could be For now please remove these iterators, it's easy enough to write a open/close/loop snippet.
I would use a single |
||
strIntegers = true, strFloats = true): ptr JsonParser = | ||
## Yield each successive `ptr JsonParser` while parsing file data at `path`. | ||
## Example usage: | ||
## | ||
## .. code-block:: nim | ||
## var sum = 0.0 # total all json floats named "myKey" in JSON file `path`. | ||
## for tok in jsonTokens(path, false, false, false): | ||
## if t.tok == tkFloat and t.a == "myKey": sum += t.f | ||
for tok in jsonTokens(newFileStream(path), path, | ||
rawStringLiterals, strIntegers, strFloats): | ||
yield tok |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is not the proc kinda too big to be
{.inline.}
?. 🤔There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. In the by far most common case it is just one line (two almost always perfectly predicted compare-jumps and one array reference).
It is only used just once, for human mental abstraction. So, there is no duplicated generated code.
Also, people are too sparing of
{.inline.}
in my view. Function call overhead is like 12..15 cycles * 3..6way superscalar =~ 36..90 dynamic instruction slots. So, if duplication is not a concern, that is the the rough amount of work to compare to.If you were really going to browbeat me over inline then you should complaining about
parseNumber
being inline, notpow10
. ;-) {EDIT: oh, wait, that's what you were doing! Sorry! } But many numbers to parse might be just single-digits which take only a few cycles. In fact, the more numbers you have the more likely many are single digits. So, the function call overhead would then be quite large compared to the work done.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Another thing inline buys you here is putting
parseNumber
C code in the same translation unit as the parsing itself. So, the C compilers metrics/heuristics for inlining can at least be applied. Otherwise you rely on LTO which a lot of people do not enable. Given that this whole bit of work was to get really fast speed, it seems appropriate for that reason as well as the "many single digit" cases. If you do PGO builds the compiler will actually try to measure if inlining is good or not. According to the GCC guys that is the main benefit of PGO. I am always getting 1.5x to 2.0x speed ups from PGO with Nim which suggests to me that our{.inline.}
choices are not generally so great (or at least a few critical choices are not...).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, and also
parseNumber
is also only called in one place ingetTok
. So, still no extra L1 I-cache style resource hit, and I also added{.inline.}
ongetTok
to get the meat of all this logic into the translation unit of whatever is using the json parser.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, Nim's
{.inline.}
just marks it in the generated C asinline
. So, it's still up to C/C++ compiler code metrics what ultimately happens. Those surely do take into account the statically observable number of expansions/substitutions and their sizes. So, the many calls togetTok
fromjson
are not a real problem. But if anyone really wants me to remove them I can. I always use (and recommend using) PGO when performance matters, myself.