Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up parsejson 3.25x (with a gcc-10.2 PGO build) on number heavy input #16055

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
literals remain in the "raw" string form so that client code can easily treat
small and large numbers uniformly.

- The parsejson module now combines number validation & parsing to net a speed
up of over 2x on number heavy inputs. It also allows *not* retaining origin
strings for integers & floats for another 1.5x speed up (over 3X overall).
A couple convenience iterators were also added.

- Added `randState` template that exposes the default random number generator.
Useful for library authors.

Expand Down
4 changes: 2 additions & 2 deletions lib/pure/json.nim
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ proc parseJson(p: var JsonParser; rawIntegers, rawFloats: bool): JsonNode =
result = newJRawNumber(p.a)
else:
try:
result = newJInt(parseBiggestInt(p.a))
result = newJInt(p.getInt)
except ValueError:
result = newJRawNumber(p.a)
discard getTok(p)
Expand All @@ -822,7 +822,7 @@ proc parseJson(p: var JsonParser; rawIntegers, rawFloats: bool): JsonNode =
result = newJRawNumber(p.a)
else:
try:
result = newJFloat(parseFloat(p.a))
result = newJFloat(p.getFloat)
except ValueError:
result = newJRawNumber(p.a)
discard getTok(p)
Expand Down
186 changes: 142 additions & 44 deletions lib/pure/parsejson.nim
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,15 @@ type
stateExpectObjectComma, stateExpectColon, stateExpectValue

JsonParser* = object of BaseLexer ## the parser object.
a*: string
tok*: TokKind
a*: string ## last valid string
i*: int64 ## last valid integer
f*: float ## last valid float
tok*: TokKind ## current token kind
kind: JsonEventKind
err: JsonError
state: seq[ParserState]
filename: string
rawStringLiterals: bool
rawStringLiterals, strIntegers, strFloats: bool

JsonKindError* = object of ValueError ## raised by the ``to`` macro if the
## JSON kind is incorrect.
Expand Down Expand Up @@ -102,17 +104,25 @@ const
]

proc open*(my: var JsonParser, input: Stream, filename: string;
rawStringLiterals = false) =
rawStringLiterals = false, strIntegers = true, strFloats = true) =
## initializes the parser with an input stream. `Filename` is only used
## for nice error messages. If `rawStringLiterals` is true, string literals
## are kept with their surrounding quotes and escape sequences in them are
## left untouched too.
## left untouched too. If `strIntegers` is true, the `a` field is set to the
## substring used to build the integer `i`. If `strFloats` is true, the `a`
## field is set to the substring used to build the float `f`. These are
## distinct from `rawFloats` & `rawIntegers` in `json` module, but must be
## true for those raw* forms to work correctly. Parsing is about 1.5x faster
## with all 3 flags false vs. all true, but false `str` defaults are needed
## for backward compatibility.
lexbase.open(my, input)
my.filename = filename
my.state = @[stateStart]
my.kind = jsonError
my.a = ""
my.rawStringLiterals = rawStringLiterals
my.strIntegers = strIntegers
my.strFloats = strFloats

proc close*(my: var JsonParser) {.inline.} =
## closes the parser `my` and its associated input stream.
Expand All @@ -126,13 +136,13 @@ proc str*(my: JsonParser): string {.inline.} =

proc getInt*(my: JsonParser): BiggestInt {.inline.} =
## returns the number for the event: ``jsonInt``
assert(my.kind == jsonInt)
return parseBiggestInt(my.a)
assert(my.tok == tkInt)
return cast[BiggestInt](my.i) # A no-op unless BiggestInt changes

proc getFloat*(my: JsonParser): float {.inline.} =
## returns the number for the event: ``jsonFloat``
assert(my.kind == jsonFloat)
return parseFloat(my.a)
assert(my.tok == tkFloat)
return my.f

proc kind*(my: JsonParser): JsonEventKind {.inline.} =
## returns the current event type for the JSON parser
Expand Down Expand Up @@ -317,35 +327,98 @@ proc skip(my: var JsonParser) =
break
my.bufpos = pos

proc parseNumber(my: var JsonParser) =
var pos = my.bufpos
if my.buf[pos] == '-':
add(my.a, '-')
inc(pos)
if my.buf[pos] == '.':
add(my.a, "0.")
inc(pos)
template jsOrVmBlock(caseJsOrVm, caseElse: untyped): untyped =
when nimvm:
block:
caseJsOrVm
else:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'E', 'e'}:
add(my.a, my.buf[pos])
inc(pos)
if my.buf[pos] in {'+', '-'}:
add(my.a, my.buf[pos])
inc(pos)
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
block:
when defined(js) or defined(nimscript):
# nimscript has to be here to avoid semantic checking of caseElse
caseJsOrVm
else:
caseElse

template doCopy(a, b, start, endp1: untyped): untyped =
jsOrVmBlock:
a = b[start ..< endp1]
do:
let n = endp1 - start
if n > 0:
a.setLen n
copyMem a[0].addr, b[start].addr, n

proc i64(c: char): int64 {.inline.} = int64(ord(c) - ord('0'))

proc pow10(e: int64): float {.inline.} =
const p10 = [1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14,
1e-13, 1e-12, 1e-11, 1e-10, 1e-09, 1e-08, 1e-07, 1e-06, 1e-05,
1e-4, 1e-3, 1e-2, 1e-1, 1.0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1e8, 1e9] # 4*64B cache lines = 32 slots
if -22 <= e and e <= 9:
return p10[e + 22] # common case=small table lookup
result = 1.0
var base = 10.0
var e = e
if e < 0:
e = -e
base = 0.1
while e != 0:
if (e and 1) != 0:
result *= base
e = e shr 1
base *= base

proc parseNumber(my: var JsonParser): TokKind {.inline.} =
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is not the proc kinda too big to be {.inline.} ?. 🤔

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. In the by far most common case it is just one line (two almost always perfectly predicted compare-jumps and one array reference).

It is only used just once, for human mental abstraction. So, there is no duplicated generated code.

Also, people are too sparing of {.inline.} in my view. Function call overhead is like 12..15 cycles * 3..6way superscalar =~ 36..90 dynamic instruction slots. So, if duplication is not a concern, that is the the rough amount of work to compare to.

If you were really going to browbeat me over inline then you should complaining about parseNumber being inline, not pow10. ;-) {EDIT: oh, wait, that's what you were doing! Sorry! } But many numbers to parse might be just single-digits which take only a few cycles. In fact, the more numbers you have the more likely many are single digits. So, the function call overhead would then be quite large compared to the work done.

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another thing inline buys you here is putting parseNumber C code in the same translation unit as the parsing itself. So, the C compilers metrics/heuristics for inlining can at least be applied. Otherwise you rely on LTO which a lot of people do not enable. Given that this whole bit of work was to get really fast speed, it seems appropriate for that reason as well as the "many single digit" cases. If you do PGO builds the compiler will actually try to measure if inlining is good or not. According to the GCC guys that is the main benefit of PGO. I am always getting 1.5x to 2.0x speed ups from PGO with Nim which suggests to me that our {.inline.} choices are not generally so great (or at least a few critical choices are not...).

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, and also parseNumber is also only called in one place in getTok. So, still no extra L1 I-cache style resource hit, and I also added {.inline.} on getTok to get the meat of all this logic into the translation unit of whatever is using the json parser.

Copy link
Contributor Author

@c-blake c-blake Nov 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, Nim's {.inline.} just marks it in the generated C as inline. So, it's still up to C/C++ compiler code metrics what ultimately happens. Those surely do take into account the statically observable number of expansions/substitutions and their sizes. So, the many calls to getTok from json are not a real problem. But if anyone really wants me to remove them I can. I always use (and recommend using) PGO when performance matters, myself.

# Parse/validate/classify all at once, both setting & returning token kind
# and, if not `tkError`, leaving the binary numeric answer in `my.[if]`.
const Sign = {'+', '-'} # NOTE: `parseFloat` can generalize this to INF/NAN.
var i = my.bufpos # NUL ('\0') terminated
var noDot = false
var exp = 0'i64
var p10 = 0
var pnt = -1 # find '.' (point); do digits
var nD = 0
my.i = 0'i64 # build my.i up from zero..
if my.buf[i] in Sign:
i.inc # skip optional sign
while my.buf[i] != '\0': # ..and track scale/pow10.
if my.buf[i] notin Digits:
if my.buf[i] != '.' or pnt >= 0:
break # a second '.' is forbidden
pnt = nD # save location of '.' (point)
nD.dec # undo loop's nD.inc
elif nD < 18: # 2**63==9.2e18 => 18 digits ok
my.i = 10 * my.i + my.buf[i].i64 # core ASCII->binary transform
else: # 20+ digits before decimal
p10.inc # any digit moves implicit '.'
i.inc
nD.inc
if my.buf[my.bufpos] == '-':
my.i = -my.i # adjust sign
if pnt < 0: # never saw '.'
pnt = nD; noDot = true # so set to number of digits
elif nD == 1:
return tkError # ONLY "[+-]*\.*"
if my.buf[i] in {'E', 'e'}: # optional exponent
i.inc
let i0 = i
if my.buf[i] in Sign:
i.inc # skip optional sign
while my.buf[i] in Digits: # build exponent
exp = 10 * exp + my.buf[i].i64
i.inc
if my.buf[i0] == '-':
exp = -exp # adjust sign
elif noDot: # and my.i < (1'i64 shl 53'i64) ? # No '.' & No [Ee]xponent
my.bufpos = i
if my.strIntegers: doCopy(my.a, my.buf, my.bufpos, i)
return tkInt # mark as integer
exp += pnt - nD + p10 # combine explicit&implicit exp
my.f = my.i.float * pow10(exp) # has round-off vs. 80-bit
if my.strFloats: doCopy(my.a, my.buf, my.bufpos, i)
my.bufpos = i
return tkFloat # mark as float

proc parseName(my: var JsonParser) =
var pos = my.bufpos
Expand All @@ -355,17 +428,13 @@ proc parseName(my: var JsonParser) =
inc(pos)
my.bufpos = pos

proc getTok*(my: var JsonParser): TokKind =
setLen(my.a, 0)
proc getTok*(my: var JsonParser): TokKind {.inline.} =
skip(my) # skip whitespace, comments
case my.buf[my.bufpos]
of '-', '.', '0'..'9':
parseNumber(my)
if {'.', 'e', 'E'} in my.a:
result = tkFloat
else:
result = tkInt
result = parseNumber(my)
of '"':
setLen(my.a, 0)
result = parseString(my)
of '[':
inc(my.bufpos)
Expand All @@ -388,6 +457,7 @@ proc getTok*(my: var JsonParser): TokKind =
of '\0':
result = tkEof
of 'a'..'z', 'A'..'Z', '_':
setLen(my.a, 0)
parseName(my)
case my.a
of "null": result = tkNull
Expand Down Expand Up @@ -524,3 +594,31 @@ proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} =
proc eat*(p: var JsonParser, tok: TokKind) =
if p.tok == tok: discard getTok(p)
else: raiseParseErr(p, tokToStr[tok])

iterator jsonTokens*(input: Stream, path: string, rawStringLiterals = false,
strIntegers = true, strFloats = true): ptr JsonParser =
## Yield each successive `ptr JsonParser` while parsing `input` with error
## label `path`.
var p: JsonParser
p.open(input, path, rawStringLiterals = rawStringLiterals,
strIntegers = strIntegers, strFloats = strFloats)
try:
var tok = p.getTok
while tok notin {tkEof,tkError}:
yield p.addr # `JsonParser` is a pretty big struct
tok = p.getTok
finally:
p.close()

iterator jsonTokens*(path: string, rawStringLiterals = false,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These API additions are not good for two reasons:

  • An iterator that always produces the same value is a foreign concept.
  • It looks like they are unused. :-)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're unused because the idea was to provide something like json.parseJsonFragments but in the raw parsing parsejson context. I think they are also not normal in yielding a ptr JsonParser, but that actually made like 1.5X performance difference with the new faster logic because the JsonParser object is big enough the alloc/copies showed up a the top of the profile. It mostly seemed the easiest way to really streamline the usage syntax, as in here. I'm open to alternative suggestions to streamline said syntax, or just removing them and leaving them to the user, though.

I was more concerned about your feedback about int64 but not uint64 overflow ideas, going down the options = set[enum] route and adding a new u: uint64 field for users to conditionally/unconditionally access when they maybe/definitely expect unsigned, and that sort of direction. That's much more to do/get right than, say, moving those iterators to example code/doing something else. So, if you hated the general direction I wanted to hold off on that work.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type could be lent T but I don't know of today's compiler accepts that (it really shouldn't without a spec addition...).

For now please remove these iterators, it's easy enough to write a open/close/loop snippet.

I was more concerned about your feedback about int64 but not uint64 overflow ideas, going down the options = set[enum] route and adding a new u: uint64 field for users to conditionally/unconditionally access when they maybe/definitely expect unsigned, and that sort of direction.

I would use a single int64 field and then accessor procs/templates that hide the cast[uint64](x). And options = set[enum] is the better design yes. I couldn't use it because of backwards compatibility reasons. There is a different solution that deals with the compat problems but it's annoying to do, maybe it's time to make .since a builtin that works better.

strIntegers = true, strFloats = true): ptr JsonParser =
## Yield each successive `ptr JsonParser` while parsing file data at `path`.
## Example usage:
##
## .. code-block:: nim
## var sum = 0.0 # total all json floats named "myKey" in JSON file `path`.
## for tok in jsonTokens(path, false, false, false):
## if t.tok == tkFloat and t.a == "myKey": sum += t.f
for tok in jsonTokens(newFileStream(path), path,
rawStringLiterals, strIntegers, strFloats):
yield tok
9 changes: 5 additions & 4 deletions tests/stdlib/tjsonutils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ discard """

import std/jsonutils
import std/json
import std/math

proc testRoundtrip[T](t: T, expected: string) =
let j = t.toJson
Expand Down Expand Up @@ -238,7 +239,7 @@ template fn() =
doAssert not foo.b
doAssert foo.f == 0.0
doAssert foo.c == 1
doAssert foo.c1 == 3.14159
doAssert almostEqual(foo.c1, 3.14159, 1)

block testExceptionOnWrongDiscirminatBranchInJson:
var foo = Foo(b: false, f: 3.14159, c: 0, c0: 42)
Expand All @@ -247,7 +248,7 @@ template fn() =
fromJson(foo, json, Joptions(allowMissingKeys: true))
# Test that the original fields are not reset.
doAssert not foo.b
doAssert foo.f == 3.14159
doAssert almostEqual(foo.f, 3.14159, 1)
doAssert foo.c == 0
doAssert foo.c0 == 42

Expand All @@ -258,7 +259,7 @@ template fn() =
doAssert not foo.b
doAssert foo.f == 2.71828
doAssert foo.c == 1
doAssert foo.c1 == 3.14159
doAssert almostEqual(foo.c1, 3.14159, 1)

block testAllowExtraKeysInJsonOnWrongDisciriminatBranch:
var foo = Foo(b: false, f: 3.14159, c: 0, c0: 42)
Expand All @@ -267,7 +268,7 @@ template fn() =
allowExtraKeys: true))
# Test that the original fields are not reset.
doAssert not foo.b
doAssert foo.f == 3.14159
doAssert almostEqual(foo.f, 3.14159, 1)
doAssert foo.c == 0
doAssert foo.c0 == 42

Expand Down