Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/bools.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
const DEFAULT_TRUE = "true"
const DEFAULT_FALSE = "false"

function typeparser(::AbstractConf{Bool}, source, pos, len, b, code, pl, options::Options)
@inline function typeparser(::AbstractConf{Bool}, source, pos, len, b, code, pl, options::Options)
x = false
trues = options.trues
falses = options.falses
Expand Down
11 changes: 9 additions & 2 deletions src/components.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# must be outermost layer
function Result(parser)
function(conf::AbstractConf{T}, source, pos, len, ::Type{RT}=T) where {T, RT}
Base.@_inline_meta
startpos = pos
code = SUCCESS
b = eof(source, pos, len) ? 0x00 : peekbyte(source, pos)
Expand Down Expand Up @@ -35,6 +36,7 @@ emptysentinel(opts::Options) = emptysentinel(opts.flags.checksentinel && isempty
function emptysentinel(checksent::Bool)
function(parser)
function checkemptysentinel(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
pos, code, pl, x = parser(conf, source, pos, len, b, code, pl)
if checksent && pl.len == 0 && (!isgreedy(T) || !quoted(code))
code &= ~(OK | INVALID)
Expand All @@ -54,6 +56,7 @@ whitespace(opts::Options) = whitespace(opts.flags.spacedelim, opts.flags.tabdeli
function whitespace(spacedelim, tabdelim, stripquoted, stripwh)
function(parser)
function stripwhitespace(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
# strip leading whitespace
if !eof(source, pos, len) && (
# pre-quotes, if delim is not whitespace
Expand Down Expand Up @@ -108,7 +111,7 @@ function whitespace(spacedelim, tabdelim, stripquoted, stripwh)
end
end

function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T}
@inline function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

findendquoted/finddelimiter are the two big scanning loops, and the quoted/delimiter layers wrap every type's pipeline — marking them @inline stamps a copy of the loop body into every (type × source × return-type) pipeline specialization the workload compiles. Keeping them as shared compiled units (together with the String typeparser suggestion) measured -1.4MB cache (-20%) and ~-0.3s precompile on this PR, with runtime parity: they do O(field-length) work per call, so the call into a shared type-stable instance amortizes — unlike the per-character float digit machine, where inlining is the right move.

Suggested change
@inline function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T}
function findendquoted(::Type{T}, source, pos, len, b, code, pl, isquoted, cq, e, stripquoted) where {T}

[posted by claude]

# for quoted fields, find the closing quote character
# we should be positioned at the correct place to find the closing quote character if everything is as it should be
# if we don't find the quote character immediately, something's wrong, so mark INVALID
Expand Down Expand Up @@ -199,6 +202,7 @@ quoted(opts::Options) = quoted(opts.flags.checkquoted, opts.oq, opts.cq, opts.e,
function quoted(checkquoted, oq, cq, e, stripquoted)
function(parser)
function findquoted(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
isquoted = false
if checkquoted && !eof(source, pos, len)
isquoted, pos = checktoken(source, pos, len, b, oq)
Expand Down Expand Up @@ -235,6 +239,7 @@ sentinel(opts::Options) = sentinel(opts.flags.checksentinel, opts.sentinel)
function sentinel(chcksentinel, sentinel)
function(parser)
function checkforsentinel(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
match, sentinelpos = (!chcksentinel || isempty(sentinel) || eof(source, pos, len)) ? (false, 0) : checktokens(source, pos, len, b, sentinel)
pos, code, pl, x = parser(conf, source, pos, len, b, code, pl)
# @show match, sentinelpos, pos, pl
Expand All @@ -259,7 +264,7 @@ function sentinel(chcksentinel, sentinel)
end
end

function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T}
@inline function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as findendquoted above.

Suggested change
@inline function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T}
function finddelimiter(::Type{T}, source, pos, len, b, code, pl, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace) where {T}

[posted by claude]

# now we check for a delimiter; if we don't find it, keep parsing until we do
# for greedy strings, we need to keep track of the last non-whitespace character
# if we're stripping whitespace, but note we've already skipped leading whitespace
Expand Down Expand Up @@ -359,6 +364,7 @@ delimiter(opts::Options) = delimiter(opts.flags.checkdelim, opts.delim, opts.fla
function delimiter(checkdelim, delim, ignorerepeated, cmt, ignoreemptylines, stripwhitespace)
function(parser)
function _finddelimiter(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
pos, code, pl, x = parser(conf, source, pos, len, b, code, pl)
if eof(source, pos, len) || !checkdelim || delimited(code) || newline(code) # greedy case
return pos, code, pl, x
Expand All @@ -372,6 +378,7 @@ end

function typeparser(opts::Options)
function(conf::AbstractConf{T}, source, pos, len, b, code, pl) where {T}
Base.@_inline_meta
return typeparser(conf, source, pos, len, b, code, pl, opts)
end
end
Expand Down
2 changes: 1 addition & 1 deletion src/dates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ function tryparsenext(tok, source, pos, len, b, code)::Tuple{Any, Int, UInt8, Re
return val, pos, b, code
end

function typeparser(::AbstractConf{T}, source::Union{AbstractVector{UInt8}, IO}, pos, len, b, code, pl, options) where {T <: Dates.TimeType}
@inline function typeparser(::AbstractConf{T}, source::Union{AbstractVector{UInt8}, IO}, pos, len, b, code, pl, options) where {T <: Dates.TimeType}
fmt = options.dateformat
df = fmt === nothing ? default_format(T) : fmt
tokens = df.tokens
Expand Down
21 changes: 12 additions & 9 deletions src/floats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ function typeparser(::AbstractConf{BigFloat}, source, pos, len, b, code, pl, opt
end
end

function typeparser(conf::AbstractConf{T}, source, pos, len, b, code, pl, options) where {T <: SupportedFloats}
@inline function typeparser(conf::AbstractConf{T}, source, pos, len, b, code, pl, options) where {T <: SupportedFloats}
# keep track of starting pos in case of invalid, we can rewind to start of parsing
startpos = pos
x = zero(T)
Expand Down Expand Up @@ -298,7 +298,7 @@ getx(x, f) = f === nothing ? x : nothing
@noinline _parsedigits(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, overflow_invalid::Bool, ndigits::Int, f::F) where {T, IntType, F} =
parsedigits(conf, source, pos, len, b, code, options, digits, neg, startpos, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}

function parsedigits(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, overflow_invalid::Bool=false, ndigits::Int=0, f::F=nothing) where {T, IntType, F}
@inline function parsedigits(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, overflow_invalid::Bool=false, ndigits::Int=0, f::F=nothing) where {T, IntType, F}
x = zero(T)
anydigits = false
has_groupmark = _has_groupmark(options, code)
Expand All @@ -312,7 +312,7 @@ function parsedigits(conf::AbstractConf{T}, source, pos, len, b, code, options,
while true
if b <= 0x09
if overflows(IntType) && digits > overflowval(IntType)
return _parsedigits(conf, source, pos, len, b + UInt8('0'), code, options, Base.inferencebarrier(_widen(digits)), neg, startpos, overflow_invalid, ndigits, f)
return _parsedigits(conf, source, pos, len, b + UInt8('0'), code, options, Base.inferencebarrier(_widen(digits)), neg, startpos, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}
elseif ndigits > maxdigits(T)
# if input is way too big, just bail
fastseek!(source, startpos - 1)
Expand Down Expand Up @@ -392,8 +392,11 @@ function parsedigits(conf::AbstractConf{T}, source, pos, len, b, code, options,
# now we parse any digits following decimal point (if any); start `frac` at UInt64(0)
# `digits` still receives any fractional digits, `frac` just keeps track of how many digits
# were parsed to combine with any "e123" exponent numbers to determine final exponent value
(overflows(IntType) && digits > overflowval(IntType)) && (digits = Base.inferencebarrier(_widen(digits)))
x, code, pos = parsefrac(conf, source, pos, len, b, code, options, digits, neg, startpos, UInt64(0), overflow_invalid, ndigits, f)
if overflows(IntType) && digits > overflowval(IntType)
x, code, pos = _parsefrac(conf, source, pos, len, b, code, options, Base.inferencebarrier(_widen(digits)), neg, startpos, UInt64(0), overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}
else
x, code, pos = parsefrac(conf, source, pos, len, b, code, options, digits, neg, startpos, UInt64(0), overflow_invalid, ndigits, f)
end

@label done
return x, code, pos
Expand All @@ -405,7 +408,7 @@ end
@noinline _parsefrac(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, frac, overflow_invalid, ndigits, f::F) where {T, IntType, F} =
parsefrac(conf, source, pos, len, b, code, options, digits, neg, startpos, frac, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}

function parsefrac(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, frac, overflow_invalid, ndigits, f::F) where {T, IntType, F}
@inline function parsefrac(conf::AbstractConf{T}, source, pos, len, b, code, options, digits::IntType, neg::Bool, startpos, frac, overflow_invalid, ndigits, f::F) where {T, IntType, F}
x = zero(T)
parsedanyfrac = false
FT = FLOAT64
Expand Down Expand Up @@ -434,7 +437,7 @@ function parsefrac(conf::AbstractConf{T}, source, pos, len, b, code, options, di
b = peekbyte(source, pos) - UInt8('0')
b > 0x09 && break
if overflows(IntType) && digits > overflowval(IntType)
return _parsefrac(conf, source, pos, len, b + UInt8('0'), code, options, Base.inferencebarrier(_widen(digits)), neg, startpos, frac, overflow_invalid, ndigits, f)
return _parsefrac(conf, source, pos, len, b + UInt8('0'), code, options, Base.inferencebarrier(_widen(digits)), neg, startpos, frac, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}
end
end
b += UInt8('0')
Expand Down Expand Up @@ -502,7 +505,7 @@ end
@noinline _parseexp(conf::AbstractConf{T}, source, pos, len, b, code, options, digits, neg::Bool, startpos, frac, exp::ExpType, negexp, FT, overflow_invalid, ndigits, f::F) where {T, ExpType, F} =
parseexp(conf, source, pos, len, b, code, options, digits, neg, startpos, frac, exp, negexp, FT, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}

function parseexp(conf::AbstractConf{T}, source, pos, len, b, code, options, digits, neg::Bool, startpos, frac, exp::ExpType, negexp, FT, overflow_invalid, ndigits, f::F) where {T, ExpType, F}
@inline function parseexp(conf::AbstractConf{T}, source, pos, len, b, code, options, digits, neg::Bool, startpos, frac, exp::ExpType, negexp, FT, overflow_invalid, ndigits, f::F) where {T, ExpType, F}
x = zero(T)
# note that `b` has already had `b - UInt8('0')` applied to it for parseexp
while true
Expand Down Expand Up @@ -535,7 +538,7 @@ function parseexp(conf::AbstractConf{T}, source, pos, len, b, code, options, dig
@goto done
end
if overflows(ExpType) && exp > overflowval(ExpType)
return _parseexp(conf, source, pos, len, b, code, options, digits, neg, startpos, frac, Base.inferencebarrier(_widen(exp)), negexp, FT, overflow_invalid, ndigits, f)
return _parseexp(conf, source, pos, len, b, code, options, digits, neg, startpos, frac, Base.inferencebarrier(_widen(exp)), negexp, FT, overflow_invalid, ndigits, f)::Tuple{rettype(T), ReturnCode, Int}
end
end
@label done
Expand Down
2 changes: 1 addition & 1 deletion src/ints.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ overflowval(::Type{T}) where {T <: Integer} = div(typemax(T) - T(9), T(10))
# if we eventually support non-base 10
# overflowval(::Type{T}, base) where {T <: Integer} = div(typemax(T) - base + 1, base)

function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: Integer}
@inline function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: Integer}
x = zero(T)
neg = false
has_groupmark = _has_groupmark(opts, code)
Expand Down
2 changes: 1 addition & 1 deletion src/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ isgreedy(::Type{T}) where {T <: AbstractString} = true
isgreedy(::Type{Symbol}) = true
isgreedy(T) = false

function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString}
@inline function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining the String typeparser lets the String path flatten transitively into each pipeline specialization (typeparser → findendquoted → ...), which compounds the cache cost of the scanning-loop inlines; xparse(String, ...) benchmarks measured parity without it.

Suggested change
@inline function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString}
function typeparser(::AbstractConf{T}, source, pos, len, b, code, pl, opts) where {T <: AbstractString}

[posted by claude]

if quoted(code)
code |= OK
return findendquoted(T, source, pos, len, b, code, pl, true, opts.cq, opts.e, opts.flags.stripquoted)
Expand Down
Loading