-
Notifications
You must be signed in to change notification settings - Fork 16
Restore performance of parsing #206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,6 +1,7 @@ | ||||||
| # must be outermost layer | ||||||
| function Result(parser) | ||||||
| function(conf::AbstractConf{T}, source, pos, len, ::Type{RT}=T) where {T, RT} | ||||||
| Base.@_inline_meta | ||||||
| startpos = pos | ||||||
| code = SUCCESS | ||||||
| b = eof(source, pos, len) ? 0x00 : peekbyte(source, pos) | ||||||
|
|
@@ -35,6 +36,7 @@ emptysentinel(opts::Options) = emptysentinel(opts.flags.checksentinel && isempty | |||||
| function emptysentinel(checksent::Bool) | ||||||
| function(parser) | ||||||
| function checkemptysentinel(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| pos, code, pl, x = parser(conf, source, pos, len, b, code, pl) | ||||||
| if checksent && pl.len == 0 && (!isgreedy(T) || !quoted(code)) | ||||||
| code &= ~(OK | INVALID) | ||||||
|
|
@@ -54,6 +56,7 @@ whitespace(opts::Options) = whitespace(opts.flags.spacedelim, opts.flags.tabdeli | |||||
| function whitespace(spacedelim, tabdelim, stripquoted, stripwh) | ||||||
| function(parser) | ||||||
| function stripwhitespace(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| # strip leading whitespace | ||||||
| if !eof(source, pos, len) && ( | ||||||
| # pre-quotes, if delim is not whitespace | ||||||
|
|
@@ -108,7 +111,7 @@ function whitespace(spacedelim, tabdelim, stripquoted, stripwh) | |||||
| end | ||||||
| end | ||||||
|
|
||||||
| function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T} | ||||||
| @inline function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T} | ||||||
| # for quoted fields, find the closing quote character | ||||||
| # we should be positioned at the correct place to find the closing quote character if everything is as it should be | ||||||
| # if we don't find the quote character immediately, something's wrong, so mark INVALID | ||||||
|
|
@@ -199,6 +202,7 @@ quoted(opts::Options) = quoted(opts.flags.checkquoted, opts.oq, opts.cq, opts.e, | |||||
| function quoted(checkquoted, oq, cq, e, stripquoted) | ||||||
| function(parser) | ||||||
| function findquoted(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| isquoted = false | ||||||
| if checkquoted && !eof(source, pos, len) | ||||||
| isquoted, pos = checktoken(source, pos, len, b, oq) | ||||||
|
|
@@ -235,6 +239,7 @@ sentinel(opts::Options) = sentinel(opts.flags.checksentinel, opts.sentinel) | |||||
| function sentinel(chcksentinel, sentinel) | ||||||
| function(parser) | ||||||
| function checkforsentinel(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| match, sentinelpos = (!chcksentinel || isempty(sentinel) || eof(source, pos, len)) ? (false, 0) : checktokens(source, pos, len, b, sentinel) | ||||||
| pos, code, pl, x = parser(conf, source, pos, len, b, code, pl) | ||||||
| # @show match, sentinelpos, pos, pl | ||||||
|
|
@@ -259,7 +264,7 @@ function sentinel(chcksentinel, sentinel) | |||||
| end | ||||||
| end | ||||||
|
|
||||||
| function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T} | ||||||
| @inline function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T} | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as
Suggested change
[posted by claude] |
||||||
| # now we check for a delimiter; if we don't find it, keep parsing until we do | ||||||
| # for greedy strings, we need to keep track of the last non-whitespace character | ||||||
| # if we're stripping whitespace, but note we've already skipped leading whitespace | ||||||
|
|
@@ -359,6 +364,7 @@ delimiter(opts::Options) = delimiter(opts.flags.checkdelim, opts.delim, opts.fla | |||||
| function delimiter(checkdelim, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) | ||||||
| function(parser) | ||||||
| function _finddelimiter(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| pos, code, pl, x = parser(conf, source, pos, len, b, code, pl) | ||||||
| if eof(source, pos, len) || !checkdelim || delimited(code) || newline(code) # greedy case | ||||||
| return pos, code, pl, x | ||||||
|
|
@@ -372,6 +378,7 @@ end | |||||
|
|
||||||
| function typeparser(opts::Options) | ||||||
| function(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T} | ||||||
| Base.@_inline_meta | ||||||
| return typeparser(conf, source, pos, len, b, code, pl, opts) | ||||||
| end | ||||||
| end | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -2,7 +2,7 @@ isgreedy(::Type{T}) where {T <: AbstractString} = true | |||||
| isgreedy(::Type{Symbol}) = true | ||||||
| isgreedy(T) = false | ||||||
|
|
||||||
| function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString} | ||||||
| @inline function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString} | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Inlining the String typeparser lets the String path flatten transitively into each pipeline specialization (typeparser → findendquoted → ...), which compounds the cache cost of the scanning-loop inlines;
Suggested change
[posted by claude] |
||||||
| if quoted(code) | ||||||
| code |= OK | ||||||
| return findendquoted(T, source, pos, len, b, code, pl, true, opts.cq, opts.e, opts.flags.stripquoted) | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
findendquoted/finddelimiterare the two big scanning loops, and thequoted/delimiterlayers wrap every type's pipeline — marking them@inlinestamps a copy of the loop body into every (type × source × return-type) pipeline specialization the workload compiles. Keeping them as shared compiled units (together with the String typeparser suggestion) measured -1.4MB cache (-20%) and ~-0.3s precompile on this PR, with runtime parity: they do O(field-length) work per call, so the call into a shared type-stable instance amortizes — unlike the per-character float digit machine, where inlining is the right move.[posted by claude]