Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ require (
github.com/zeebo/xxh3 v1.1.0
go.opentelemetry.io/collector/component v1.56.0
go.opentelemetry.io/collector/component/componenttest v0.150.0
go.opentelemetry.io/collector/confmap v1.56.0
go.opentelemetry.io/collector/confmap/xconfmap v0.150.0
go.opentelemetry.io/collector/consumer/consumertest v0.150.0
go.opentelemetry.io/collector/consumer/xconsumer v0.150.0
Expand All @@ -41,6 +42,7 @@ require (
go.opentelemetry.io/otel/metric v1.43.0
go.opentelemetry.io/proto/otlp v1.10.0
go.opentelemetry.io/proto/otlp/profiles/v1development v0.3.0
go.uber.org/goleak v1.3.0
go.uber.org/zap/exp v0.3.0
golang.org/x/arch v0.26.0
golang.org/x/exp v0.0.0-20260410095643-746e56fc9e2f
Expand Down Expand Up @@ -123,7 +125,6 @@ require (
github.com/tklauser/numcpus v0.11.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/collector/confmap v1.56.0 // indirect
go.opentelemetry.io/collector/consumer v1.56.0 // indirect
go.opentelemetry.io/collector/consumer/consumererror v0.150.0 // indirect
go.opentelemetry.io/collector/featuregate v1.56.0 // indirect
Expand Down
12 changes: 12 additions & 0 deletions interpreter/luajit/luajit.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,18 @@ func (l *luajitInstance) processVMs(ebpf interpreter.EbpfHandler, pid libpf.PID)
badVMs = append(badVMs, g)
continue
}
// Pre-cache GCproto objects from trace startpt pointers.
// This reads the proto data while the VM state is stable (during sync),
// avoiding the race where protos get GC'd between eBPF sample capture
// and Go-side symbolization.
for _, t := range traces {
if t.startpt != 0 {
if _, err := l.getGCproto(t.startpt); err != nil {
logf("lj: pre-cache proto %x for trace %d: %v", t.startpt, t.traceno, err)
}
}
}

// Don't do anything if nothing changed.
if hash == l.traceHashes[g] {
continue
Expand Down
28 changes: 19 additions & 9 deletions interpreter/luajit/proto.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,24 +89,34 @@ func newProto(rm remotememory.RemoteMemory, pt libpf.Address) (*proto, error) {
return nil, err
}

// reading memory from a remote process is always dicey, validate
// we're looking at a GCproto object by checking that the debugging
// Try to read the chunkname first — this is the most useful piece of
// information and survives even if the debug-info pointers are stale.
if p.chunkname != 0 {
p.name = rm.String(p.chunkname + sizeofGCstr)
if !utf8.ValidString(p.name) {
p.name = ""
}
}

// Validate the GCproto object by checking that the debugging
// info pointers are valid internal pointers or NULL.
end := pt + libpf.Address(p.sizept)
bad := func(addr libpf.Address) bool {
return addr != 0 && (addr < pt || addr >= end)
}
if bad(p.lineinfo) || bad(p.uvinfo) || bad(p.varinfo) {
// If chunkname is still readable, return a degraded proto that
// provides file-name attribution even without line info.
if p.name != "" {
p.lineinfo = 0
p.uvinfo = 0
p.varinfo = 0
p.sizebc = 0
return p, nil
}
return nil, errors.New("invalid GCproto object")
}

// string data is stored after the GCstr object
p.name = rm.String(p.chunkname + sizeofGCstr)
if !utf8.ValidString(p.name) {
return nil, errors.New("invalid chunkname string")
}

// This should never be empty string.
if p.name == "" {
return nil, errors.New("invalid chunkname string")
}
Expand Down
38 changes: 36 additions & 2 deletions interpreter/luajit/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ import (
// $7 = (uint16_t *) 0x50
const tracePartOffset = 0x50

// Offset of GCtrace.startpt (GCRef) — the prototype this trace was compiled from.
// (gdb) p &((GCtrace*)0)->startpt = 0x40
const traceStartPtOffset = 0x40

// Definition:
// https://github.com/openresty/luajit2/blob/7952882d/src/lj_jit.h#L423
type jitStatePart struct {
Expand All @@ -50,6 +54,9 @@ type trace struct {
traceno uint16 /* Trace number. */
_ uint16 /* Linked trace (or self for loops). */
root uint16 /* Root trace of side trace (or 0 for root traces). */

// Not part of the struct read at tracePartOffset — read separately.
startpt libpf.Address // GCRef startpt at offset 0x40
}

// key == traceId
Expand Down Expand Up @@ -92,13 +99,40 @@ func loadTraces(tracesAddr libpf.Address, rm remotememory.RemoteMemory) (uint64,
traces := traceMap{}
for _, addr := range traceAddrs {
t := trace{}
if err := rm.Read(addr+tracePartOffset, pfunsafe.FromPointer(&t)); err != nil {
// Read the packed fields starting at tracePartOffset (0x50).
// We read into a temporary to avoid including the startpt field.
type tracePacked struct {
_ uint32
Szmcode uint32
Mcode uint64
_ uint32
_ uint16
Spadjust uint16
Traceno uint16
_ uint16
Root uint16
}
var tp tracePacked
if err := rm.Read(addr+tracePartOffset, pfunsafe.FromPointer(&tp)); err != nil {
return 0, nil, err
}
t.szmcode = tp.Szmcode
t.mcode = tp.Mcode
t.spadjust = tp.Spadjust
t.traceno = tp.Traceno
t.root = tp.Root
// Read startpt (GCRef at offset 0x40) separately — it's before tracePartOffset.
var startptRef uint64
if err := rm.Read(addr+traceStartPtOffset, pfunsafe.FromPointer(&startptRef)); err != nil {
// Non-fatal: we can still use the trace for mapping, just without proto pre-caching.
logf("lj: failed to read startpt for trace at %x: %v", addr, err)
} else {
t.startpt = libpf.Address(startptRef)
}
if t.traceno > uint16(sztrace) {
return 0, nil, errors.New("invalid traceno")
}
logf("lj: added trace(%d) from %x", t.traceno, tracesAddr)
logf("lj: added trace(%d) from %x startpt=%x", t.traceno, tracesAddr, t.startpt)
traces[t.traceno] = t
}
return h, traces, nil
Expand Down
2 changes: 1 addition & 1 deletion processmanager/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func TestFrameCacheCrossProcessPollution(t *testing.T) {
realPID := libpf.PID(os.Getpid())
pid := process.New(realPID, realPID)
elfRef := pfelf.NewReference(exec, pid)
loaderInfo := interpreter.NewLoaderInfo(goHostFileID, elfRef)
loaderInfo := interpreter.NewLoaderInfo(goHostFileID, elfRef, nil)
rm := remotememory.NewProcessVirtualMemory(realPID)

goData, err := golang.Loader(nil, loaderInfo)
Expand Down
5 changes: 4 additions & 1 deletion support/ebpf/interpreter_dispatcher.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ struct pid_page_to_mapping_info_t {
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
__type(key, PIDPage);
__type(value, PIDPageMappingInfo);
__uint(max_entries, 524288); // 2^19
// Raise from 2^19 to 2^22 for LuaJIT: each nginx worker's JIT trace
// prefixes consume ~65K entries; 16+ workers need ~1M entries plus
// headroom for native process mappings.
__uint(max_entries, 4194304); // 2^22
__uint(map_flags, BPF_F_NO_PREALLOC);
} pid_page_to_mapping_info SEC(".maps");

Expand Down
5 changes: 4 additions & 1 deletion support/ebpf/luajit_tracer.ebpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,10 @@ struct luajit_procs_t {
} luajit_procs SEC(".maps");

// The number of LuaJIT frames to unwind per frame-unwinding eBPF program.
#define FRAMES_PER_WALK_LUAJIT_STACK 15
// Keep this low enough that the later-kernel verifier does not blow the
// processed-instruction budget in walk_luajit_stack; deeper Lua stacks are
// handled by tail-calling back into PROG_UNWIND_LUAJIT for another chunk.
#define FRAMES_PER_WALK_LUAJIT_STACK 8

#if defined(__x86_64__)
#define DISPATCH r14
Expand Down
Binary file modified support/ebpf/tracer.ebpf.amd64
Binary file not shown.
Binary file modified support/ebpf/tracer.ebpf.arm64
Binary file not shown.
2 changes: 1 addition & 1 deletion tracer/tracer.go
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ func loadAllMaps(coll *cebpf.CollectionSpec, cfg *Config,
const (
// The following sizes X are used as 2^X, and determined empirically.
// 1 million executable pages / 4GB of executable address space
pidPageMappingInfoSize = 20
pidPageMappingInfoSize = 22
stackDeltaPageToInfoSize = 16
exeIDToStackDeltasSize = 16
)
Expand Down
Loading