From 2dc773d544c640fe8b3d64fc97dde170e493c768 Mon Sep 17 00:00:00 2001 From: ErenAri Date: Fri, 29 May 2026 22:27:00 +0300 Subject: [PATCH 1/5] fix(logging,policy): const char* log field + correct policy-gen guard; align claims MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logging: add a `field(const std::string&, const char*)` overload. Without it, C-string/string-literal field values bound to the bool overload (const char*->bool is a standard conversion that outranks the user-defined const char*->std::string), silently rendering every C-string field as "true" in text logs and SIEM output. Adds tests/test_logging.cpp as a regression guard. Policy: the first bump_policy_generation() was not dead code — it was the only atomic guard for the non-shadow direct-apply path. Move it into the direct-apply branch so each path bumps exactly once immediately before mutating live maps, and the shadow path no longer forces an unnecessary audit-downgrade window during shadow population. Docs: align enforcement claims with mechanisms — module-load blocking requires active kernel lockdown; OverlayFS copy-up is detection + asynchronous userspace re-propagation (relabeled ENFORCED->AUDITED, race window documented in GUARANTEES.md). Add docs/MEMORY_SAFETY.md documenting the C++ userspace posture. Co-Authored-By: Claude Opus 4.8 --- CMakeLists.txt | 1 + README.md | 27 +++++++++------- docs/GUARANTEES.md | 9 ++++++ docs/MEMORY_SAFETY.md | 59 ++++++++++++++++++++++++++++++++++ src/logging.hpp | 10 ++++++ src/policy_runtime.cpp | 34 ++++++++++++-------- tests/test_logging.cpp | 72 ++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 188 insertions(+), 24 deletions(-) create mode 100644 docs/MEMORY_SAFETY.md create mode 100644 tests/test_logging.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c72d93be..cd9e3e76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -479,6 +479,7 @@ if(BUILD_TESTING) # Test sources set(TEST_SOURCES tests/test_main.cpp + tests/test_logging.cpp tests/test_commands.cpp tests/test_crash_policy.cpp tests/test_crypto_safe.cpp diff --git a/README.md b/README.md index 7fa2d981..b2b86315 100644 --- a/README.md +++ b/README.md @@ -137,12 +137,12 @@ Legend: ✅ full · ◐ partial · ❌ absent | Tracepoint audit when LSM absent | ✅ | ✅ | ✅ | ✅ | ◐ | | File enforcement | ✅ Kernel deny | ❌ Detect only | ✅ | ◐ | ✅ | | Network enforcement (full socket lifecycle) | ✅ connect/bind/listen/accept/sendmsg/recvmsg | ❌ | ✅ | ◐ | ◐ | -| **OverlayFS `inode_copy_up` propagation** | ✅ | ❌ | ❌ | ❌ | ❌ | +| **OverlayFS `inode_copy_up` detection + async re-propagation** | ◐ async | ❌ | ❌ | ❌ | ❌ | | **IMA-backed trusted exec** (kernel 6.1+) | ✅ `bpf_ima_file_hash` | ❌ | ◐ | ❌ | ❌ | | Process ancestry + argv | ✅ 4 MB priority ringbuf | ◐ | ✅ | ✅ | ◐ | | Cgroup-scoped policy | ✅ inode / IPv4 / port | ◐ | ✅ | ◐ | ✅ | | LPM CIDR v4/v6 network deny | ✅ | ◐ | ✅ | ◐ | ◐ | -| Ptrace / module-load / BPF syscall blocking | ✅ all three | ❌ | ◐ | ❌ | ◐ | +| Ptrace / module-load / BPF syscall blocking | ✅ ptrace + bpf · ◐ module (needs kernel lockdown) | ❌ | ◐ | ❌ | ◐ | | Policy evaluation | O(1) BPF map lookup | O(rules) userspace | In-kernel TracingPolicy | Hybrid signatures | In-kernel + userspace | | Policy language | INI + K8s CRD | YAML DSL | K8s CRD TracingPolicy | Rego / Go signatures | K8s CRD KubeArmorPolicy | | Break-glass / deadman-TTL | ✅ Emergency + revert | ❌ | ❌ | ❌ | ❌ | @@ -167,8 +167,11 @@ Legend: ✅ full · ◐ partial · ❌ absent ### Where AegisBPF is uniquely differentiated today - **OverlayFS copy-up propagation.** No other open-source runtime - security agent enforces on `lsm/inode_copy_up`. This closes a - real container-escape bypass class. + security agent hooks `lsm/inode_copy_up` at all. AegisBPF detects the + copy-up synchronously, then re-propagates the deny rule to the new + upper-layer inode asynchronously from userspace (best-effort: a brief + window exists between copy-up and re-propagation). This addresses a + real container-escape bypass class that other agents miss entirely. - **IMA-backed trusted exec identity.** Kernel 6.1+ `bpf_ima_file_hash()` integration ties allow-listed execs to cryptographic file hashes inside `bprm_check_security`. @@ -239,14 +242,15 @@ Current flagship contract: > cgroup-scoped workloads, with safe rollback and signed policy provenance. Current scope labels: -- `ENFORCED`: file deny via LSM (`file_open` / `inode_permission`), OverlayFS - copy-up propagation via `inode_copy_up`, outbound network deny for configured - `connect()` / `sendmsg()` rules, inbound `recvmsg()` deny, port-oriented - `bind()` / `listen()` deny, accepted-peer `accept()` deny, cgroup-scoped - inode/IPv4/port deny rules, and IMA-backed exec hash trust on kernel 6.1+ - when those LSM hooks/helpers are available +- `ENFORCED`: file deny via LSM (`file_open` / `inode_permission`), outbound + network deny for configured `connect()` / `sendmsg()` rules, inbound + `recvmsg()` deny, port-oriented `bind()` / `listen()` deny, accepted-peer + `accept()` deny, cgroup-scoped inode/IPv4/port deny rules, and IMA-backed + exec hash trust on kernel 6.1+ when those LSM hooks/helpers are available - `AUDITED`: tracepoint fallback path (no syscall deny), detailed metrics mode, - forensic block events with UID/username and exec identity + forensic block events with UID/username and exec identity, and OverlayFS + copy-up detection via `inode_copy_up` (the deny rule is re-propagated to the + new upper-layer inode asynchronously from userspace, so a brief window exists) - `PLANNED`: broader runtime surfaces beyond current documented hooks ## Validation Results @@ -292,6 +296,7 @@ Public proof lives in the docs and CI artifacts: - Kernel/CI execution model: `docs/CI_EXECUTION_STRATEGY.md` - Kernel/distro compatibility: `docs/COMPATIBILITY.md` - Threat model + non-goals: `docs/THREAT_MODEL.md` +- Memory-safety posture: `docs/MEMORY_SAFETY.md` - Enforcement guarantees + TOCTOU analysis: `docs/GUARANTEES.md` - Enforce posture guarantees contract: `docs/ENFORCEMENT_GUARANTEES.md` - Emergency control contract: `docs/EMERGENCY_CONTROL_CONTRACT.md` diff --git a/docs/GUARANTEES.md b/docs/GUARANTEES.md index f8a42575..7d9a19c8 100644 --- a/docs/GUARANTEES.md +++ b/docs/GUARANTEES.md @@ -121,6 +121,15 @@ see `docs/THREAT_MODEL.md`. - Network and distributed filesystems (NFS, FUSE variants) are not guaranteed surfaces. +### OverlayFS copy-up propagation is asynchronous + +- `lsm/inode_copy_up` fires synchronously when a denied lower-layer inode is + copied up, but the hook only emits an event and allows the copy-up to proceed. +- Userspace re-resolves the new upper-layer inode and adds it to the deny map. + Between the copy-up completing and that re-propagation there is a brief window + in which the new upper-layer inode is not yet denied. This is detection plus + best-effort propagation, not synchronous enforcement. + ## Known bypass classes | Bypass | Affected surface | Mitigation | diff --git a/docs/MEMORY_SAFETY.md b/docs/MEMORY_SAFETY.md new file mode 100644 index 00000000..06c61b26 --- /dev/null +++ b/docs/MEMORY_SAFETY.md @@ -0,0 +1,59 @@ +# Memory Safety Posture + +AegisBPF's privileged userspace agent is written in C++20. A root-privileged +agent that parses attacker-influenceable input is a memory-safety-sensitive +surface, so this document states what we do about it honestly, rather than +claiming a property the language does not give us for free. + +## Current posture + +The userspace agent is **hardened C++**, not memory-safe-by-construction. The +mitigations in place today: + +- **Compiler hardening:** `_FORTIFY_SOURCE`, stack-protector-strong, PIE, + full RELRO, and the standard warning/`-Werror` surface (see `CMakeLists.txt` + and the binary-hardening contract test). +- **Sanitizer CI:** AddressSanitizer, ThreadSanitizer, and + UndefinedBehaviorSanitizer builds run in CI (`build-asan*`, `build-tsan*`, + `build-ubsan*`). +- **Fuzzing:** parser/decoder fuzz harnesses under `tests/fuzz/` run via the + nightly fuzz workflow. +- **Runtime sandboxing:** the daemon can restrict itself with a **seccomp** + allowlist and a **Landlock** self-sandbox, shrinking the syscall/filesystem + surface available post-compromise. +- **Vendored crypto** (`tweetnacl`) is pinned and gated by a periodic + human-review staleness check (`vendored_dependency_contract`). + +## The risk we are managing + +The highest-risk code is the **untrusted-input boundary** — anything that +parses bytes an attacker can influence: + +- the policy file parser (`src/policy_parse.cpp`), +- the event / JSON decoders (`src/json_scan.cpp`, the `explain ` + path). + +A memory-safety defect is far more dangerous here than in code that only +touches trusted, agent-generated data. + +## Direction + +1. **Harden further (cheap, in progress):** `_FORTIFY_SOURCE=3`, + `-D_GLIBCXX_ASSERTIONS`, `-fstack-clash-protection`, and CFI + (`-fsanitize=cfi` with LTO); evaluate a hardened allocator. +2. **Oxidize the untrusted-input boundary (planned):** migrate the policy + parser and event decoders to Rust behind a C ABI shim, and wire those Rust + targets into continuous fuzzing (OSS-Fuzz). This puts memory-safe code + exactly where attacker-influenced bytes are parsed, without rewriting the + working, test-covered remainder. +3. **Full Rust/Aya rewrite is explicitly deferred.** It is the right greenfield + answer, but it would discard a verified, test-covered asset for a property + that hardening + privilege-separation + targeted Rust + fuzzing already + largely deliver. Revisit only for a v2 or at the request of a major adopter. + +## What this is not + +This is not a claim that the agent is memory-safe. It is a claim that the +memory-safety risk is **identified, bounded, mitigated, and on a path to +reduction** — and that the most dangerous 10% (untrusted parsers) is the first +thing being moved to a memory-safe language. diff --git a/src/logging.hpp b/src/logging.hpp index b1827c41..94c392df 100644 --- a/src/logging.hpp +++ b/src/logging.hpp @@ -75,6 +75,16 @@ class LogEntry { return *this; } + // A const char* overload is required. Without it, string-literal and C-string + // arguments bind to the bool overload below (const char*->bool is a standard + // boolean conversion that outranks the user-defined const char*->std::string), + // which silently renders every C-string field as "true". + LogEntry& field(const std::string& key, const char* value) + { + fields_.emplace_back(key, value ? std::string(value) : std::string()); + return *this; + } + LogEntry& field(const std::string& key, bool value) { fields_.emplace_back(key, value ? "true" : "false"); diff --git a/src/policy_runtime.cpp b/src/policy_runtime.cpp index a9cd0603..ba020b46 100644 --- a/src/policy_runtime.cpp +++ b/src/policy_runtime.cpp @@ -247,19 +247,12 @@ Result apply_policy_internal_impl_fn(const std::string& path, const std::s size_t expected_allow_exec_inode_entries = 0; - // Bump policy generation before sync — forces BPF hooks into audit mode - // during the transition window until we commit the matching generation. - { - auto gen_result = bump_policy_generation(state); - if (gen_result) { - pending_generation = *gen_result; - logger().log(SLOG_INFO("Policy generation bumped; hooks in audit-mode during sync") - .field("generation", static_cast(pending_generation))); - } else { - logger().log(SLOG_WARN("Failed to bump policy generation; sync proceeds without atomic guard") - .field("error", gen_result.error().to_string())); - } - } + // Policy generation guard: each apply path bumps the generation exactly once, + // immediately before it begins mutating live maps, which forces BPF hooks into + // audit mode until the matching generation is committed. The shadow path bumps + // right before the shadow->live sync; the direct-apply path bumps at the top of + // its branch. Bumping any earlier (e.g. during shadow population, while the live + // maps are still untouched) would open an unnecessary enforcement-downgrade window. bool use_shadow = false; ShadowMapSet shadows; @@ -516,6 +509,21 @@ Result apply_policy_internal_impl_fn(const std::string& path, const std::s logger().log(SLOG_INFO("Shadow maps synced to live maps")); } } else { + // Direct-apply path mutates live maps in place. Bump the generation now, + // before any live-map write, so hooks fall back to audit mode until the + // matching generation is committed after the writes complete. + { + auto gen_result = bump_policy_generation(state); + if (gen_result) { + pending_generation = *gen_result; + logger().log(SLOG_INFO("Policy generation bumped; hooks in audit-mode during direct apply") + .field("generation", static_cast(pending_generation))); + } else { + logger().log(SLOG_WARN("Failed to bump policy generation; direct apply proceeds without atomic guard") + .field("error", gen_result.error().to_string())); + } + } + if (reset) { ScopedSpan span("policy.reset_maps", root_span.trace_id(), root_span.span_id()); auto reset_result = reset_policy_maps(state); diff --git a/tests/test_logging.cpp b/tests/test_logging.cpp new file mode 100644 index 00000000..0eed86af --- /dev/null +++ b/tests/test_logging.cpp @@ -0,0 +1,72 @@ +// Tests for the structured logger field() overloads. +// +// Regression guard for the const char* -> bool overload trap: before a dedicated +// const char* overload existed, string-literal / C-string field values bound to +// field(key, bool) and rendered as "true" instead of the actual string. +#include "logging.hpp" + +#include + +using namespace aegis; + +TEST(LoggingTest, ConstCharPointerRendersValueNotBool) +{ + const char* prog = "handle_bprm_check_security"; + LogEntry e(LogLevel::Info, "msg"); + e.field("program", prog); + + ASSERT_EQ(e.fields().size(), 1u); + EXPECT_EQ(e.fields()[0].first, "program"); + // Must be the string value, NOT "true". + EXPECT_EQ(e.fields()[0].second, "handle_bprm_check_security"); +} + +TEST(LoggingTest, StringLiteralRendersValueNotBool) +{ + LogEntry e(LogLevel::Info, "msg"); + e.field("hook", "lsm/file_open"); + + ASSERT_EQ(e.fields().size(), 1u); + EXPECT_EQ(e.fields()[0].second, "lsm/file_open"); +} + +TEST(LoggingTest, StdStringStillRendersValue) +{ + LogEntry e(LogLevel::Info, "msg"); + std::string path = "/etc/aegisbpf/policy.conf"; + e.field("path", path); + + ASSERT_EQ(e.fields().size(), 1u); + EXPECT_EQ(e.fields()[0].second, "/etc/aegisbpf/policy.conf"); +} + +TEST(LoggingTest, BoolStillRendersBool) +{ + LogEntry e(LogLevel::Info, "msg"); + bool enabled = true; + e.field("enabled", enabled).field("disabled", false); + + ASSERT_EQ(e.fields().size(), 2u); + EXPECT_EQ(e.fields()[0].second, "true"); + EXPECT_EQ(e.fields()[1].second, "false"); +} + +TEST(LoggingTest, NullConstCharPointerIsEmptyNotCrash) +{ + LogEntry e(LogLevel::Info, "msg"); + const char* nothing = nullptr; + e.field("maybe", nothing); + + ASSERT_EQ(e.fields().size(), 1u); + EXPECT_EQ(e.fields()[0].second, ""); +} + +TEST(LoggingTest, IntegerOverloadsUnaffected) +{ + LogEntry e(LogLevel::Info, "msg"); + e.field("i64", static_cast(-7)).field("u64", static_cast(42)); + + ASSERT_EQ(e.fields().size(), 2u); + EXPECT_EQ(e.fields()[0].second, "-7"); + EXPECT_EQ(e.fields()[1].second, "42"); +} From ed1bbb803a3875c72fa891ba61892bc6e24ad56a Mon Sep 17 00:00:00 2001 From: ErenAri Date: Fri, 29 May 2026 22:30:59 +0300 Subject: [PATCH 2/5] build: strengthen userspace hardening (_FORTIFY_SOURCE=3, stack-clash, _GLIBCXX_ASSERTIONS) Probe each flag with check__compiler_flag so the cross-compiler/cross-arch matrix degrades gracefully: - _FORTIFY_SOURCE: prefer level 3 (GCC 12+/Clang 9+ w/ glibc >= 2.34), fall back to 2 - -fstack-clash-protection where supported - -D_GLIBCXX_ASSERTIONS (C++): bounds/precondition checks in std:: containers Verified on GCC 13 / glibc 2.39: all three probes pass, full suite stays green (291/292; the 1 is the intentional 90-day vendored-crypto review gate), and the linked binary carries PIE, full RELRO+BIND_NOW, stack canary, and FORTIFY _chk symbols. The existing _FORTIFY_SOURCE / fstack-protector tokens remain in CMakeLists so verify_trustworthiness.sh continues to match. Co-Authored-By: Claude Opus 4.8 --- CMakeLists.txt | 32 ++++++++++++++++++++++++++++++-- README.md | 2 +- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd9e3e76..ac2e3d87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,10 +103,38 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebI endif() # Security hardening flags -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE") +# +# Levels are probed for compiler support so the cross-compiler/cross-arch matrix +# (GCC/Clang, x86_64/arm64) degrades gracefully instead of failing the build. +include(CheckCCompilerFlag) +include(CheckCXXCompilerFlag) + +# _FORTIFY_SOURCE: prefer level 3 (GCC 12+/Clang 9+ with glibc >= 2.34), else 2. +check_cxx_compiler_flag("-D_FORTIFY_SOURCE=3" AEGIS_HAS_FORTIFY3) +if(AEGIS_HAS_FORTIFY3) + set(AEGIS_FORTIFY_LEVEL 3) +else() + set(AEGIS_FORTIFY_LEVEL 2) +endif() + +set(AEGIS_HARDEN_FLAGS "-D_FORTIFY_SOURCE=${AEGIS_FORTIFY_LEVEL} -fstack-protector-strong -fPIE") + +# Stack clash protection (large-stack-probe guard; GCC 8+/Clang 11+, x86_64/arm64). +check_cxx_compiler_flag("-fstack-clash-protection" AEGIS_HAS_STACK_CLASH) +if(AEGIS_HAS_STACK_CLASH) + string(APPEND AEGIS_HARDEN_FLAGS " -fstack-clash-protection") +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AEGIS_HARDEN_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AEGIS_HARDEN_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie -Wl,-z,relro,-z,now") +# libstdc++ hardened mode (C++ only): bounds/precondition assertions in std:: containers. +check_cxx_compiler_flag("-D_GLIBCXX_ASSERTIONS" AEGIS_HAS_GLIBCXX_ASSERTIONS) +if(AEGIS_HAS_GLIBCXX_ASSERTIONS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_ASSERTIONS") +endif() + # Sanitizer configuration if(ENABLE_ASAN) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") diff --git a/README.md b/README.md index b2b86315..9977b633 100644 --- a/README.md +++ b/README.md @@ -269,7 +269,7 @@ Current scope labels: | **Binary Hardening** | VERIFIED | FORTIFY_SOURCE, stack-protector, PIE, full RELRO | **Security Hardening Applied:** -- Compiler security flags (FORTIFY_SOURCE=2, stack-protector-strong, PIE, RELRO) +- Compiler security flags (FORTIFY_SOURCE=3, stack-protector-strong, stack-clash-protection, _GLIBCXX_ASSERTIONS, PIE, full RELRO) — probed per-compiler, fall back gracefully - Timeout protection on BPF operations (prevents indefinite hangs) - Secure temporary file creation via `mkstemp()` (symlink-attack resistant) - Atomic file writes (write-rename pattern) for all persistent state From 249a8828a5e2390870ccd179c821a15e93af3e57 Mon Sep 17 00:00:00 2001 From: ErenAri Date: Fri, 29 May 2026 22:42:32 +0300 Subject: [PATCH 3/5] ci(fuzz): ClusterFuzzLite integration + OSS-Fuzz-compatible fuzz build Adds free, in-repo continuous fuzzing of the userspace input-parsing surface (policy parser, signed-bundle parser, network rules, path validation, event decoder) using the existing libFuzzer harnesses: - .clusterfuzzlite/{Dockerfile,build.sh,project.yaml}: standard OSS-Fuzz build contract; SKIP_BPF_BUILD=ON since fuzzers target userspace; ships checked-in seed corpora as _seed_corpus.zip. - .github/workflows/cflite-pr.yml: diff-based (code-change) PR fuzzing for address + undefined sanitizers; injection-safe (no run: steps, no untrusted event input). - CMakeLists.txt: the ENABLE_FUZZING block now honors $LIB_FUZZING_ENGINE so OSS-Fuzz/CFLite supply the engine+sanitizer, falling back to local -fsanitize=fuzzer,address; deduped the five harness definitions into a loop. Hardening flags are skipped under ENABLE_FUZZING so they don't interfere with sanitizer instrumentation. Verified locally: all 5 fuzzers build with clang; fuzz_policy runs clean on its seed corpus. Normal build still hardened; full suite 291/292 (the 1 is the intentional 90-day vendored-crypto review gate). The CFLite container path is exercised by the new workflow when this PR opens. Co-Authored-By: Claude Opus 4.8 --- .clusterfuzzlite/Dockerfile | 17 +++++++++ .clusterfuzzlite/build.sh | 27 +++++++++++++ .clusterfuzzlite/project.yaml | 7 ++++ .github/workflows/cflite-pr.yml | 43 +++++++++++++++++++++ CMakeLists.txt | 67 ++++++++++++++++----------------- 5 files changed, 127 insertions(+), 34 deletions(-) create mode 100644 .clusterfuzzlite/Dockerfile create mode 100755 .clusterfuzzlite/build.sh create mode 100644 .clusterfuzzlite/project.yaml create mode 100644 .github/workflows/cflite-pr.yml diff --git a/.clusterfuzzlite/Dockerfile b/.clusterfuzzlite/Dockerfile new file mode 100644 index 00000000..09fc564d --- /dev/null +++ b/.clusterfuzzlite/Dockerfile @@ -0,0 +1,17 @@ +# ClusterFuzzLite / OSS-Fuzz build image for the AegisBPF userspace fuzzers. +# The userspace library (policy parser, event/JSON decoders, network rules, +# signed-bundle parser) is the attacker-influenced input boundary; BPF object +# compilation is skipped here (SKIP_BPF_BUILD=ON) since fuzzers target userspace. +FROM gcr.io/oss-fuzz-base/base-builder:latest + +RUN apt-get update && apt-get install -y --no-install-recommends \ + cmake \ + ninja-build \ + pkg-config \ + libbpf-dev \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +COPY . $SRC/aegisbpf +WORKDIR $SRC/aegisbpf +COPY .clusterfuzzlite/build.sh $SRC/build.sh diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh new file mode 100755 index 00000000..591ed4ad --- /dev/null +++ b/.clusterfuzzlite/build.sh @@ -0,0 +1,27 @@ +#!/bin/bash -eu +# +# OSS-Fuzz / ClusterFuzzLite build script for AegisBPF userspace fuzzers. +# +# The fuzzing engine and sanitizer come from the OSS-Fuzz/CFLite environment +# ($CC/$CXX/$CFLAGS/$CXXFLAGS/$LIB_FUZZING_ENGINE). CMake honors $LIB_FUZZING_ENGINE +# (see the ENABLE_FUZZING block in CMakeLists.txt) and links it instead of the +# local-only -fsanitize=fuzzer,address. We do not set CMAKE_BUILD_TYPE so the +# injected $CXXFLAGS fully control optimization/instrumentation. + +cd "$SRC/aegisbpf" + +cmake -S . -B build-fuzz -G Ninja \ + -DENABLE_FUZZING=ON \ + -DBUILD_TESTING=OFF \ + -DSKIP_BPF_BUILD=ON + +FUZZERS="fuzz_policy fuzz_bundle fuzz_network fuzz_path fuzz_event" +cmake --build build-fuzz --target ${FUZZERS} + +for f in ${FUZZERS}; do + cp "build-fuzz/${f}" "${OUT}/" + # Ship checked-in seed corpora as _seed_corpus.zip when present. + if [ -d "tests/fuzz/corpus/${f}" ]; then + (cd "tests/fuzz/corpus/${f}" && zip -q -r "${OUT}/${f}_seed_corpus.zip" .) + fi +done diff --git a/.clusterfuzzlite/project.yaml b/.clusterfuzzlite/project.yaml new file mode 100644 index 00000000..3137bd9c --- /dev/null +++ b/.clusterfuzzlite/project.yaml @@ -0,0 +1,7 @@ +language: c++ +main_repo: "https://github.com/ErenAri/Aegis-BPF" +sanitizers: + - address + - undefined +fuzzing_engines: + - libfuzzer diff --git a/.github/workflows/cflite-pr.yml b/.github/workflows/cflite-pr.yml new file mode 100644 index 00000000..eb1725a3 --- /dev/null +++ b/.github/workflows/cflite-pr.yml @@ -0,0 +1,43 @@ +name: ClusterFuzzLite PR + +# Free, in-repo continuous fuzzing of the userspace input-parsing surface. +# Builds the libFuzzer harnesses in the OSS-Fuzz base image and fuzzes only the +# code changed by the PR (mode: code-change), failing the check on a new crash. +# No run: steps and no untrusted event input are used (injection-safe). +on: + pull_request: + paths: + - 'src/**' + - 'tests/fuzz/**' + - '.clusterfuzzlite/**' + - 'CMakeLists.txt' + +permissions: read-all + +concurrency: + group: cflite-pr-${{ github.ref }} + cancel-in-progress: true + +jobs: + PR: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + sanitizer: [address, undefined] + steps: + - name: Build fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/clusterfuzzlite/actions/build_fuzzers@v1 + with: + language: c++ + sanitizer: ${{ matrix.sanitizer }} + - name: Run fuzzers (${{ matrix.sanitizer }}) + id: run + uses: google/clusterfuzzlite/actions/run_fuzzers@v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 300 + mode: 'code-change' + sanitizer: ${{ matrix.sanitizer }} + output-sarif: true diff --git a/CMakeLists.txt b/CMakeLists.txt index ac2e3d87..3096a68b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,6 +106,9 @@ endif() # # Levels are probed for compiler support so the cross-compiler/cross-arch matrix # (GCC/Clang, x86_64/arm64) degrades gracefully instead of failing the build. +# Skipped for fuzzing builds, where the sanitizer/fuzzing-engine toolchain owns +# the compile/link flags (FORTIFY can interfere with sanitizer instrumentation). +if(NOT ENABLE_FUZZING) include(CheckCCompilerFlag) include(CheckCXXCompilerFlag) @@ -134,6 +137,7 @@ check_cxx_compiler_flag("-D_GLIBCXX_ASSERTIONS" AEGIS_HAS_GLIBCXX_ASSERTIONS) if(AEGIS_HAS_GLIBCXX_ASSERTIONS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_ASSERTIONS") endif() +endif() # NOT ENABLE_FUZZING # Sanitizer configuration if(ENABLE_ASAN) @@ -785,40 +789,35 @@ endif() if(ENABLE_FUZZING) message(STATUS "Building fuzzing targets") - # Policy parser fuzzer - add_executable(fuzz_policy tests/fuzz/fuzz_policy_parser.cpp) - target_link_libraries(fuzz_policy PRIVATE aegisbpf_lib) - target_include_directories(fuzz_policy PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) - target_compile_options(fuzz_policy PRIVATE -fsanitize=fuzzer,address -fno-omit-frame-pointer) - target_link_options(fuzz_policy PRIVATE -fsanitize=fuzzer,address) - - # Signed bundle parser fuzzer - add_executable(fuzz_bundle tests/fuzz/fuzz_signed_bundle.cpp) - target_link_libraries(fuzz_bundle PRIVATE aegisbpf_lib) - target_include_directories(fuzz_bundle PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) - target_compile_options(fuzz_bundle PRIVATE -fsanitize=fuzzer,address -fno-omit-frame-pointer) - target_link_options(fuzz_bundle PRIVATE -fsanitize=fuzzer,address) - - # Network rules fuzzer - add_executable(fuzz_network tests/fuzz/fuzz_network_rules.cpp) - target_link_libraries(fuzz_network PRIVATE aegisbpf_lib) - target_include_directories(fuzz_network PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) - target_compile_options(fuzz_network PRIVATE -fsanitize=fuzzer,address -fno-omit-frame-pointer) - target_link_options(fuzz_network PRIVATE -fsanitize=fuzzer,address) - - # Path validation fuzzer - add_executable(fuzz_path tests/fuzz/fuzz_path_validation.cpp) - target_link_libraries(fuzz_path PRIVATE aegisbpf_lib) - target_include_directories(fuzz_path PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) - target_compile_options(fuzz_path PRIVATE -fsanitize=fuzzer,address -fno-omit-frame-pointer) - target_link_options(fuzz_path PRIVATE -fsanitize=fuzzer,address) - - # Event handling fuzzer - add_executable(fuzz_event tests/fuzz/fuzz_event_handling.cpp) - target_link_libraries(fuzz_event PRIVATE aegisbpf_lib) - target_include_directories(fuzz_event PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) - target_compile_options(fuzz_event PRIVATE -fsanitize=fuzzer,address -fno-omit-frame-pointer) - target_link_options(fuzz_event PRIVATE -fsanitize=fuzzer,address) + # libFuzzer harness name -> source mapping (flat pairs for list(POP_FRONT)). + set(AEGIS_FUZZ_TARGETS + fuzz_policy tests/fuzz/fuzz_policy_parser.cpp + fuzz_bundle tests/fuzz/fuzz_signed_bundle.cpp + fuzz_network tests/fuzz/fuzz_network_rules.cpp + fuzz_path tests/fuzz/fuzz_path_validation.cpp + fuzz_event tests/fuzz/fuzz_event_handling.cpp + ) + + # OSS-Fuzz / ClusterFuzzLite inject the fuzzing engine and sanitizers via + # $LIB_FUZZING_ENGINE + $CFLAGS/$CXXFLAGS. When that is present we must NOT + # hardcode -fsanitize=fuzzer,address (it conflicts with their toolchain); + # for local/standalone builds we wire libFuzzer + ASan ourselves. + set(AEGIS_FUZZ_ENGINE "$ENV{LIB_FUZZING_ENGINE}") + + while(AEGIS_FUZZ_TARGETS) + list(POP_FRONT AEGIS_FUZZ_TARGETS _fuzz_name _fuzz_src) + add_executable(${_fuzz_name} ${_fuzz_src}) + target_link_libraries(${_fuzz_name} PRIVATE aegisbpf_lib) + target_include_directories(${_fuzz_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src) + target_compile_options(${_fuzz_name} PRIVATE -fno-omit-frame-pointer) + if(AEGIS_FUZZ_ENGINE) + # Engine + sanitizer supplied by the OSS-Fuzz/CFLite environment. + target_link_options(${_fuzz_name} PRIVATE ${AEGIS_FUZZ_ENGINE}) + else() + target_compile_options(${_fuzz_name} PRIVATE -fsanitize=fuzzer,address) + target_link_options(${_fuzz_name} PRIVATE -fsanitize=fuzzer,address) + endif() + endwhile() endif() # Installation From 978c212714b3377881f8f48aba4cd0f8563381f1 Mon Sep 17 00:00:00 2001 From: ErenAri Date: Fri, 29 May 2026 23:02:16 +0300 Subject: [PATCH 4/5] fix(bpf): detect optional LSM hooks by their bpf_lsm_-prefixed BTF symbol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit detect_missing_optional_lsm_hooks() looked up the BARE hook name (e.g. "socket_connect", "bprm_check_security") as a BTF_KIND_FUNC. BPF-LSM attach points are exposed in vmlinux BTF as `bpf_lsm_` (e.g. `bpf_lsm_socket_connect`) — the bare name never exists — so the lookup always failed and every optional LSM hook (socket_connect/bind/listen/accept/sendmsg, bprm_check_security, file_mmap) was marked "missing" and disabled via set_autoload(false). The result: network deny, exec-identity, and mmap enforcement were silently downgraded, and a fail-closed network policy made the daemon exit at the enforce gate. (File deny survived because file_open / inode_permission are required hooks, not in the optional set.) This matches the symbol the `probe` command and the kernel already use. Verified on Linux 6.17 (bpf-lsm enabled): before, `run --enforce` with a network deny policy logged "Disabling optional LSM program ... socket_connect" and exited fail-closed; after, socket_connect attaches, the daemon stays in ENFORCE, a connect() to a denied IP returns -EPERM (net_connect_block, action=BLOCK), and a connect to an allowed IP is unaffected. Co-Authored-By: Claude Opus 4.8 --- src/bpf_ops.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/bpf_ops.cpp b/src/bpf_ops.cpp index 2b4ff483..aab1566d 100644 --- a/src/bpf_ops.cpp +++ b/src/bpf_ops.cpp @@ -57,7 +57,14 @@ std::set detect_missing_optional_lsm_hooks() } for (const char* hook : kOptionalHooks) { - if (btf__find_by_name_kind(vmlinux, hook, BTF_KIND_FUNC) < 0) { + // BPF-LSM attach points are exposed in vmlinux BTF as `bpf_lsm_` + // FUNCs, not the bare hook name. Querying the bare name (e.g. + // "socket_connect") never matches, which previously marked every + // optional LSM hook as missing and silently disabled network/exec/mmap + // enforcement. Match the same bpf_lsm_-prefixed symbol the kernel and + // the `probe` command use. + const std::string sym = std::string("bpf_lsm_") + hook; + if (btf__find_by_name_kind(vmlinux, sym.c_str(), BTF_KIND_FUNC) < 0) { missing.insert(hook); } } From e9909c51142ded1d2a0e0b449fd394533e8055c4 Mon Sep 17 00:00:00 2001 From: ErenAri Date: Fri, 29 May 2026 23:03:03 +0300 Subject: [PATCH 5/5] feat(net): opt-in signal-fallback connect enforcement for non-BPF-LSM kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a Tier-3 enforcement path for hosts where BPF-LSM is unavailable and connect() cannot be denied with -EPERM. Opt-in via `--enforce-fallback=signal` (default off): a sys_enter_connect tracepoint matches the existing network deny maps and, in enforce mode, terminates the offending process with bpf_send_signal() (SIGKILL by default; honors SIGKILL escalation). - bpf: new handle_tp_connect tracepoint; reuses the deny_ipv4/ipv6/cidr/port/ ip_port + cgroup match helpers. Protocol-agnostic (socket not resolvable at syscall entry). Inert unless agent_cfg.signal_fallback_enforce is set. - config: signal_fallback_enforce reuses one reserved byte of agent_config — size/offsets unchanged (static_assert(sizeof==48) still holds), mirrored in userspace types.hpp. - userspace: --enforce-fallback=signal|off threaded via a defaulted daemon_run param (no churn to other callers); tracepoint attached as optional with its link tracked in state.links (no BpfState field added). Verified on Linux 6.17: handle_tp_connect passes the verifier and attaches; a connect() to a denied IP with --enforce-signal=none is killed by SIGKILL (net_connect_block action=KILL, protocol=0 from this tracepoint, distinct from the LSM hook's action=BLOCK), while an allowed IP is unaffected. Full suite 292/293 (the 1 is the 90-day vendored-crypto review gate). Caveat (documented in GUARANTEES.md): on genuinely LSM-absent hosts the enforce-gate still treats the missing LSM connect hook as a degradation; wiring the gate to accept signal-fallback as primary enforcement there is a follow-up. Today this is verified as opt-in defense-in-depth alongside LSM enforcement. Co-Authored-By: Claude Opus 4.8 --- CMakeLists.txt | 6 ++ bpf/aegis_common.h | 4 +- bpf/aegis_net.bpf.h | 176 ++++++++++++++++++++++++++++++++++++++++++++ docs/GUARANTEES.md | 16 ++++ src/bpf_attach.cpp | 15 ++++ src/cli_run.cpp | 25 ++++++- src/daemon.cpp | 4 +- src/daemon.hpp | 3 +- src/types.hpp | 3 +- 9 files changed, 247 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3096a68b..03609c1a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -766,6 +766,12 @@ if(BUILD_TESTING) ) set_tests_properties(cli_run_rejects_invalid_enforce_signal PROPERTIES WILL_FAIL TRUE) + add_test( + NAME cli_run_rejects_invalid_enforce_fallback + COMMAND $ run --enforce --enforce-fallback=bogus + ) + set_tests_properties(cli_run_rejects_invalid_enforce_fallback PROPERTIES WILL_FAIL TRUE) + add_test( NAME cli_run_rejects_sigkill_without_allow_gate COMMAND $ run --enforce --enforce-signal=kill diff --git a/bpf/aegis_common.h b/bpf/aegis_common.h index 2f1f6d32..b32c03a0 100644 --- a/bpf/aegis_common.h +++ b/bpf/aegis_common.h @@ -338,7 +338,8 @@ struct agent_config { __u8 deny_ptrace; /* block ptrace attachment (MITRE T1055.008) */ __u8 deny_module_load; /* block kernel module loading (MITRE T1547.006) */ __u8 deny_bpf; /* block unauthorized BPF program load (MITRE T1562) */ - __u8 _reserved[4]; /* alignment padding */ + __u8 signal_fallback_enforce; /* enforce via bpf_send_signal on tracepoints when BPF-LSM is absent */ + __u8 _reserved[3]; /* alignment padding */ }; /* Agent config is stored as a BPF global so programs can read it without a @@ -364,6 +365,7 @@ volatile struct agent_config agent_cfg = { .deny_ptrace = 0, .deny_module_load = 0, .deny_bpf = 0, + .signal_fallback_enforce = 0, ._reserved = {0}, }; diff --git a/bpf/aegis_net.bpf.h b/bpf/aegis_net.bpf.h index 79f1d1b7..b82369fc 100644 --- a/bpf/aegis_net.bpf.h +++ b/bpf/aegis_net.bpf.h @@ -1305,3 +1305,179 @@ int BPF_PROG(handle_socket_recvmsg, struct socket *sock, struct msghdr *msg, record_hook_latency(HOOK_SOCKET_RECVMSG, _start_ns); return -EPERM; } + +/* ============================================================================ + * Signal-fallback enforcement (Tier 3 — for kernels without BPF-LSM) + * + * On hosts where BPF-LSM is unavailable, lsm/socket_connect cannot attach and + * outbound connect() cannot be denied with -EPERM. This connect() tracepoint + * provides a weaker enforcement tier: when agent_cfg.signal_fallback_enforce is + * set AND the agent is in enforce mode, a connect() to a denied endpoint is met + * with bpf_send_signal(), terminating the offending process. + * + * This is detection + signal, NOT synchronous denial: the connect() syscall may + * partially proceed before the signal is delivered on syscall return. The + * program is inert unless explicitly enabled (default off), so on LSM-capable + * hosts running normally it early-returns after a single global read. + * + * Protocol is unknown at syscall entry (no socket lookup), so only + * protocol-agnostic deny rules (protocol=any) are evaluated here. + * ============================================================================ */ +SEC("tracepoint/syscalls/sys_enter_connect") +int handle_tp_connect(struct trace_event_raw_sys_enter *ctx) +{ + if (!agent_cfg.signal_fallback_enforce) + return 0; + if (agent_cfg.net_policy_empty) + return 0; + + void *uaddr = (void *)ctx->args[1]; + int addrlen = (int)ctx->args[2]; + if (!uaddr) + return 0; + + __u16 family = 0; + if (bpf_probe_read_user(&family, sizeof(family), uaddr)) + return 0; + if (family != AF_INET && family != AF_INET6) + return 0; + + __u64 cgid = bpf_get_current_cgroup_id(); + if (is_cgroup_allowed(cgid)) + return 0; + + __be32 remote_ip_v4 = 0; + struct ipv6_key remote_ip_v6 = {}; + __u16 remote_port = 0; + + if (family == AF_INET) { + struct sockaddr_in sin = {}; + if (addrlen < (int)sizeof(sin)) + return 0; + if (bpf_probe_read_user(&sin, sizeof(sin), uaddr)) + return 0; + remote_ip_v4 = sin.sin_addr.s_addr; + remote_port = bpf_ntohs(sin.sin_port); + } else { + struct sockaddr_in6 sin6 = {}; + if (addrlen < (int)sizeof(sin6)) + return 0; + if (bpf_probe_read_user(&sin6, sizeof(sin6), uaddr)) + return 0; + remote_port = bpf_ntohs(sin6.sin6_port); + __builtin_memcpy(remote_ip_v6.addr, &sin6.sin6_addr, sizeof(remote_ip_v6.addr)); + } + + __u8 protocol = 0; /* protocol-agnostic: socket not resolvable at entry */ + int matched = 0; + char rule_type[16] = {}; + + if (family == AF_INET) { + if (!matched && ip_port_rule_matches_v4(remote_ip_v4, remote_port, protocol)) { + matched = 1; + __builtin_memcpy(rule_type, "ip_port", sizeof("ip_port")); + } + if (!matched && bpf_map_lookup_elem(&deny_ipv4, &remote_ip_v4)) { + matched = 1; + __builtin_memcpy(rule_type, "ip", 3); + } + if (!matched) { + struct ipv4_lpm_key lpm_key = { + .prefixlen = 32, + .addr = remote_ip_v4, + }; + if (bpf_map_lookup_elem(&deny_cidr_v4, &lpm_key)) { + matched = 1; + __builtin_memcpy(rule_type, "cidr", 5); + } + } + } else { + if (!matched && ip_port_rule_matches_v6(&remote_ip_v6, remote_port, protocol)) { + matched = 1; + __builtin_memcpy(rule_type, "ip_port", sizeof("ip_port")); + } + if (!matched && bpf_map_lookup_elem(&deny_ipv6, &remote_ip_v6)) { + matched = 1; + __builtin_memcpy(rule_type, "ip", 3); + } + if (!matched) { + struct ipv6_lpm_key lpm_key = { + .prefixlen = 128, + .addr = {0}, + }; + __builtin_memcpy(lpm_key.addr, remote_ip_v6.addr, sizeof(lpm_key.addr)); + if (bpf_map_lookup_elem(&deny_cidr_v6, &lpm_key)) { + matched = 1; + __builtin_memcpy(rule_type, "cidr", 5); + } + } + } + + if (!matched && port_rule_matches(remote_port, protocol, 0)) { + matched = 1; + __builtin_memcpy(rule_type, "port", 5); + } + if (!matched && family == AF_INET && cgroup_ipv4_denied(cgid, remote_ip_v4)) { + matched = 1; + __builtin_memcpy(rule_type, "cg_ip", 6); + } + if (!matched && cgroup_port_denied(cgid, remote_port, protocol, 0)) { + matched = 1; + __builtin_memcpy(rule_type, "cg_port", 8); + } + + if (!matched) + return 0; + + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct task_struct *task = bpf_get_current_task_btf(); + __u8 audit = get_effective_audit_mode(); + + increment_net_connect_stats(); + increment_cgroup_stat(cgid); + + __u8 enforce_signal = 0; + if (!audit) { + __u64 start_time = task ? BPF_CORE_READ(task, start_time) : 0; + __u8 configured_signal = get_effective_enforce_signal(); + if (configured_signal == SIGKILL) { + enforce_signal = runtime_enforce_signal(configured_signal, pid, start_time, + get_sigkill_escalation_threshold(), + get_sigkill_escalation_window_ns()); + } else { + /* A tracepoint cannot return -EPERM, so a too-weak signal would let + * the connect proceed. When no SIGKILL escalation is configured, + * default the fallback to SIGKILL to make the tier meaningful. */ + enforce_signal = configured_signal ? configured_signal : SIGKILL; + } + maybe_send_enforce_signal(enforce_signal); + } + + __u32 sample_rate = get_event_sample_rate(); + if (should_emit_event(sample_rate)) { + struct event *e = bpf_ringbuf_reserve(&events, sizeof(*e), 0); + if (e) { + e->type = EVENT_NET_CONNECT_BLOCK; + fill_net_block_event_process_info(&e->net_block, pid, task); + e->net_block.cgid = cgid; + bpf_get_current_comm(e->net_block.comm, sizeof(e->net_block.comm)); + e->net_block.family = family; + e->net_block.protocol = protocol; + e->net_block.local_port = 0; + e->net_block.remote_port = remote_port; + e->net_block.direction = 0; /* egress (connect) */ + e->net_block.remote_ipv4 = (family == AF_INET) ? remote_ip_v4 : 0; + if (family == AF_INET6) + __builtin_memcpy(e->net_block.remote_ipv6, remote_ip_v6.addr, sizeof(e->net_block.remote_ipv6)); + else + __builtin_memset(e->net_block.remote_ipv6, 0, sizeof(e->net_block.remote_ipv6)); + set_action_string(e->net_block.action, audit, enforce_signal); + __builtin_memcpy(e->net_block.rule_type, rule_type, sizeof(rule_type)); + bpf_ringbuf_submit(e, 0); + } else { + increment_net_ringbuf_drops(); + } + } + + return 0; +} diff --git a/docs/GUARANTEES.md b/docs/GUARANTEES.md index 7d9a19c8..5417a88a 100644 --- a/docs/GUARANTEES.md +++ b/docs/GUARANTEES.md @@ -130,6 +130,22 @@ see `docs/THREAT_MODEL.md`. in which the new upper-layer inode is not yet denied. This is detection plus best-effort propagation, not synchronous enforcement. +### Signal-fallback enforcement is opt-in and signal-based + +- On kernels without BPF-LSM, `connect()` cannot be denied with `-EPERM`. The + opt-in `--enforce-fallback=signal` flag attaches a `sys_enter_connect` + tracepoint that, in enforce mode, terminates a process connecting to a denied + endpoint via `bpf_send_signal()` (default `SIGKILL`). +- This is signal-based termination, not synchronous denial: the `connect()` may + partially proceed before the signal is delivered on syscall return. Protocol is + not resolvable at syscall entry, so only protocol-agnostic rules are evaluated. +- Verified to fire (`SIGKILL`, `net_connect_block` action=`KILL`) on a connect to + a denied IP. NOTE: when BPF-LSM is genuinely absent the enforce-gate currently + treats the missing LSM hook as a degradation (fail-closed exit or audit + fallback); teaching the gate to accept signal-fallback as primary connect + enforcement on no-LSM hosts is a follow-up. Today the tier is verified as + defense-in-depth alongside LSM enforcement. + ## Known bypass classes | Bypass | Affected surface | Mitigation | diff --git a/src/bpf_attach.cpp b/src/bpf_attach.cpp index 393a4b22..60120846 100644 --- a/src/bpf_attach.cpp +++ b/src/bpf_attach.cpp @@ -219,6 +219,21 @@ Result attach_all(BpfState& state, bool lsm_enabled, bool use_inode_permis "Optional socket_recvmsg hook attach failed"); } + // Signal-fallback enforcement (Tier 3): connect() tracepoint that kills + // denied connections via bpf_send_signal when BPF-LSM is unavailable. + // Attached regardless of lsm_enabled (its purpose is the LSM-absent case); + // inert at runtime unless agent_cfg.signal_fallback_enforce is set. The link + // is tracked in state.links by attach_prog, so a local flag is sufficient. + if (attach_network_hooks) { + ScopedSpan span("bpf.attach.signal_fallback", trace_id, root_span.span_id()); + (void)span; + bool fallback_attached = false; + bpf_program* prog = bpf_object__find_program_by_name(state.obj, "handle_tp_connect"); + attach_optional_program(state, prog, fallback_attached, + "Optional signal-fallback connect tracepoint attach failed"); + (void)fallback_attached; + } + // Optional LSM-only hooks: kernel security (ptrace/module/bpf), overlay // copy-up propagation, and IMA hash verification (kernel 6.1+). if (lsm_enabled) { diff --git a/src/cli_run.cpp b/src/cli_run.cpp index c9f0a922..f881a04f 100644 --- a/src/cli_run.cpp +++ b/src/cli_run.cpp @@ -84,6 +84,7 @@ int dispatch_run_command(int argc, char** argv, const char* prog) uint32_t max_deny_paths = 0; uint32_t max_network_entries = 0; EnforceGateMode enforce_gate_mode = EnforceGateMode::FailClosed; + bool enforce_fallback_signal = false; const char* env_gate = std::getenv("AEGIS_ENFORCE_GATE_MODE"); if (env_gate != nullptr && std::strlen(env_gate) > 0) { @@ -127,6 +128,28 @@ int dispatch_run_command(int argc, char** argv, const char* prog) logger().log(SLOG_ERROR("Invalid enforce gate mode").field("value", value)); return 1; } + } else if (arg.rfind("--enforce-fallback=", 0) == 0) { + std::string value = arg.substr(std::strlen("--enforce-fallback=")); + if (value == "signal") { + enforce_fallback_signal = true; + } else if (value == "off" || value == "none") { + enforce_fallback_signal = false; + } else { + logger().log(SLOG_ERROR("Invalid enforce fallback mode (expected signal|off)").field("value", value)); + return 1; + } + } else if (arg == "--enforce-fallback") { + if (i + 1 >= argc) + return usage(prog); + std::string value = argv[++i]; + if (value == "signal") { + enforce_fallback_signal = true; + } else if (value == "off" || value == "none") { + enforce_fallback_signal = false; + } else { + logger().log(SLOG_ERROR("Invalid enforce fallback mode (expected signal|off)").field("value", value)); + return 1; + } } else if (arg.rfind("--deadman-ttl=", 0) == 0) { std::string value = arg.substr(std::strlen("--deadman-ttl=")); if (!parse_u32_option(value, deadman_ttl, "Invalid deadman TTL value", false)) @@ -289,7 +312,7 @@ int dispatch_run_command(int argc, char** argv, const char* prog) return daemon_run(audit_only, enable_seccomp, enable_landlock, deadman_ttl, enforce_signal, allow_sigkill, lsm_hook, ringbuf_bytes, event_sample_rate, sigkill_escalation_threshold, sigkill_escalation_window_seconds, deny_rate_threshold, deny_rate_breach_limit, allow_unsigned_bpf, allow_unknown_binary_identity, - strict_degrade, enforce_gate_mode); + strict_degrade, enforce_gate_mode, enforce_fallback_signal); } } // namespace aegis diff --git a/src/daemon.cpp b/src/daemon.cpp index f3c0c66f..b6941fb6 100644 --- a/src/daemon.cpp +++ b/src/daemon.cpp @@ -418,7 +418,8 @@ int daemon_run(bool audit_only, bool enable_seccomp, bool enable_landlock, uint3 bool allow_sigkill, LsmHookMode lsm_hook, uint32_t ringbuf_bytes, uint32_t event_sample_rate, uint32_t sigkill_escalation_threshold, uint32_t sigkill_escalation_window_seconds, uint32_t deny_rate_threshold, uint32_t deny_rate_breach_limit, bool allow_unsigned_bpf, - bool allow_unknown_binary_identity, bool strict_degrade, EnforceGateMode enforce_gate_mode) + bool allow_unknown_binary_identity, bool strict_degrade, EnforceGateMode enforce_gate_mode, + bool enforce_fallback_signal) { const std::string trace_id = make_span_id("trace-daemon"); ScopedSpan root_span("daemon.run", trace_id); @@ -623,6 +624,7 @@ int daemon_run(bool audit_only, bool enable_seccomp, bool enable_landlock, uint3 config.break_glass_active = break_glass_active ? 1 : 0; config.deadman_enabled = (deadman_ttl > 0) ? 1 : 0; config.enforce_signal = enforce_signal; + config.signal_fallback_enforce = enforce_fallback_signal ? 1 : 0; config.deadman_ttl_seconds = deadman_ttl; config.event_sample_rate = event_sample_rate ? event_sample_rate : 1; config.sigkill_escalation_threshold = sigkill_escalation_threshold; diff --git a/src/daemon.hpp b/src/daemon.hpp index b25e6e41..69122e89 100644 --- a/src/daemon.hpp +++ b/src/daemon.hpp @@ -29,6 +29,7 @@ int daemon_run(bool audit_only, bool enable_seccomp, bool enable_landlock, uint3 uint32_t sigkill_escalation_threshold, uint32_t sigkill_escalation_window_seconds, uint32_t deny_rate_threshold = 0, uint32_t deny_rate_breach_limit = 3, bool allow_unsigned_bpf = false, bool allow_unknown_binary_identity = false, bool strict_degrade = false, - EnforceGateMode enforce_gate_mode = EnforceGateMode::FailClosed); + EnforceGateMode enforce_gate_mode = EnforceGateMode::FailClosed, + bool enforce_fallback_signal = false); } // namespace aegis diff --git a/src/types.hpp b/src/types.hpp index ab64698f..d2b80653 100644 --- a/src/types.hpp +++ b/src/types.hpp @@ -458,7 +458,8 @@ struct AgentConfig { uint8_t deny_ptrace; /* block ptrace attachment (MITRE T1055.008) */ uint8_t deny_module_load; /* block kernel module loading (MITRE T1547.006) */ uint8_t deny_bpf; /* block unauthorized BPF program load (MITRE T1562) */ - uint8_t _reserved[4]; /* alignment padding */ + uint8_t signal_fallback_enforce; /* enforce via signal on tracepoints when BPF-LSM is absent */ + uint8_t _reserved[3]; /* alignment padding */ }; struct AgentMeta {