diff --git a/.claude-plugin/README.md b/.claude-plugin/README.md
index e9e6468e9..309b70b6f 100644
--- a/.claude-plugin/README.md
+++ b/.claude-plugin/README.md
@@ -1,6 +1,6 @@
 # MemPalace Claude Code Plugin
 
-A Claude Code plugin that gives your AI a persistent memory system. Mine projects and conversations into a searchable palace backed by ChromaDB, with 33 MCP tools, auto-save hooks, and 5 guided skills.
+A Claude Code plugin that gives your AI a persistent memory system. Mine projects and conversations into a searchable palace backed by ChromaDB, with 35 MCP tools, auto-save hooks, and 5 guided skills.
 
 ## Prerequisites
 
@@ -41,16 +41,17 @@ After installing the plugin, run the init command to complete setup (installs th
 
 ## Hooks
 
-MemPalace registers two hooks that run automatically:
+MemPalace registers three hooks that run automatically:
 
 - **Stop** -- Saves conversation context every 15 messages.
+- **SessionEnd** -- Runs one final save in the background on a clean exit, so short sessions that never hit the Stop interval or a compaction are still captured.
 - **PreCompact** -- Preserves important memories before context compaction.
 
 Set the `MEMPAL_DIR` environment variable to a directory path to automatically run `mempalace mine` on that directory during each save trigger.
 
 ## MCP Server
 
-The plugin automatically configures a local MCP server with 33 tools for storing, searching, and managing memories. No manual MCP setup is required -- `/mempalace:init` handles everything.
+The plugin automatically configures a local MCP server with 34 tools for storing, searching, and managing memories. No manual MCP setup is required -- `/mempalace:init` handles everything.
 
 ## Full Documentation
 
diff --git a/.claude-plugin/hooks/hooks.json b/.claude-plugin/hooks/hooks.json
index 9960beda9..e04d4a5a7 100644
--- a/.claude-plugin/hooks/hooks.json
+++ b/.claude-plugin/hooks/hooks.json
@@ -12,6 +12,17 @@
         ]
       }
     ],
+    "SessionEnd": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/mempal-session-end-hook.sh\"",
+            "timeout": 10
+          }
+        ]
+      }
+    ],
     "PreCompact": [
       {
         "hooks": [
diff --git a/.claude-plugin/hooks/mempal-session-end-hook.sh b/.claude-plugin/hooks/mempal-session-end-hook.sh
new file mode 100644
index 000000000..51e05c52b
--- /dev/null
+++ b/.claude-plugin/hooks/mempal-session-end-hook.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# MemPalace SessionEnd Hook — thin wrapper calling the Python CLI.
+#
+# Claude Code documents a default SessionEnd hook timeout of 1.5s, and
+# "timeouts set on plugin-provided hooks do not raise the budget"
+# (https://code.claude.com/docs/en/hooks). A cold `mempalace` start alone
+# exceeds 1.5s, so the final mine must NOT run in the foreground — it would be
+# killed before it saved anything. Unlike the foreground Stop/PreCompact plugin
+# wrappers, this one backgrounds the hook and returns immediately; the detached
+# child finishes the save after the session has exited. All logic lives in
+# mempalace.hooks_cli for cross-harness extensibility.
+run_mempalace_hook() {
+  if command -v mempalace >/dev/null 2>&1; then
+    exec mempalace hook run "$@"
+  fi
+
+  MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
+  if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
+    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
+  fi
+  if "$MEMPAL_PYTHON_BIN" -c "import mempalace" >/dev/null 2>&1; then
+    exec "$MEMPAL_PYTHON_BIN" -m mempalace hook run "$@"
+  fi
+
+  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
+    exec python -m mempalace hook run "$@"
+  fi
+
+  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
+  exit 1
+}
+
+# Capture stdin (the SessionEnd JSON) before backgrounding — the parent's
+# stdin is gone once we return. Forward it to the detached worker, which runs
+# the final mine on its own time and outlives this process.
+payload="$(cat)"
+(
+  printf '%s' "$payload" | run_mempalace_hook --hook session-end --harness "${MEMPALACE_HOOK_HARNESS:-claude-code}"
+) >/dev/null 2>&1 </dev/null &
+disown 2>/dev/null || true
+
+# Return immediately so the harness never blocks on session exit.
+printf '{}'
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
index 52226cb36..157d2df80 100644
--- a/.claude-plugin/marketplace.json
+++ b/.claude-plugin/marketplace.json
@@ -8,8 +8,8 @@
     {
       "name": "mempalace",
       "source": "./.claude-plugin",
-      "description": "AI memory system — mine projects and conversations into a searchable palace. 33 MCP tools, auto-save hooks, guided setup.",
-      "version": "3.4.1",
+      "description": "AI memory system — mine projects and conversations into a searchable palace. 35 MCP tools, auto-save hooks, guided setup.",
+      "version": "3.5.0",
       "author": {
         "name": "milla-jovovich"
       }
diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
index aa0cba686..1f7cda909 100644
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "mempalace",
-  "version": "3.4.1",
-  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 33 MCP tools, auto-save hooks, and guided setup.",
+  "version": "3.5.0",
+  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 35 MCP tools, auto-save hooks, and guided setup.",
   "author": {
     "name": "milla-jovovich"
   },
diff --git a/.claude-plugin/skills/mempalace-recall/SKILL.md b/.claude-plugin/skills/mempalace-recall/SKILL.md
index 749994f89..d7a9eb8f7 100644
--- a/.claude-plugin/skills/mempalace-recall/SKILL.md
+++ b/.claude-plugin/skills/mempalace-recall/SKILL.md
@@ -53,6 +53,13 @@ a variable, fixing a typo). Recall is question-driven, not reflexive.
   an answer. Offer to widen the search or file the new information.
 - **MCP error / server down** — surface the error, suggest `mempalace
   status` or re-running `/init`; never fall back to guessing.
+- **Palace index corrupt / compactor error** — if the server reports an
+  HNSW segment-writer error, a ChromaDB compaction failure, or stays
+  "Not connected" after a write, the index is out of sync with
+  `chroma.sqlite3` but the rows are intact. Tell the user to stop the
+  server and rebuild from SQLite (`mempalace repair --mode from-sqlite
+  --archive-existing --yes`), not re-mine, which drops MCP-added drawers
+  and diary entries (#1843). Do not repair in-process.
 - **Conflicting facts** — trust the knowledge graph's time-valid answer;
   invalidate-then-add rather than overwriting silently.
 
diff --git a/.codex-plugin/README.md b/.codex-plugin/README.md
index 2d2478bb3..42574615e 100644
--- a/.codex-plugin/README.md
+++ b/.codex-plugin/README.md
@@ -1,6 +1,6 @@
 # MemPalace - Codex CLI Plugin
 
-Give your AI a persistent memory -- mine projects and conversations into a searchable palace backed by ChromaDB, with 33 MCP tools, auto-save hooks, and guided skills.
+Give your AI a persistent memory -- mine projects and conversations into a searchable palace backed by ChromaDB, with 35 MCP tools, auto-save hooks, and guided skills.
 
 ## Prerequisites
 
diff --git a/.codex-plugin/plugin.json b/.codex-plugin/plugin.json
index 462f401b3..e2eaf3c25 100644
--- a/.codex-plugin/plugin.json
+++ b/.codex-plugin/plugin.json
@@ -1,7 +1,7 @@
 {
   "name": "mempalace",
-  "version": "3.4.1",
-  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 33 MCP tools, auto-save hooks, and guided setup.",
+  "version": "3.5.0",
+  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 35 MCP tools, auto-save hooks, and guided setup.",
   "author": {
     "name": "milla-jovovich"
   },
@@ -27,7 +27,7 @@
   "interface": {
     "displayName": "MemPalace",
     "shortDescription": "AI memory system for Codex",
-    "longDescription": "Give your AI a persistent memory — mine projects and conversations into a searchable palace backed by ChromaDB, with 33 MCP tools, auto-save hooks, and guided skills.",
+    "longDescription": "Give your AI a persistent memory — mine projects and conversations into a searchable palace backed by ChromaDB, with 35 MCP tools, auto-save hooks, and guided skills.",
     "developerName": "milla-jovovich",
     "category": "Coding",
     "capabilities": [
diff --git a/.cursor-plugin/README.md b/.cursor-plugin/README.md
index 6ba9ba48e..49369588f 100644
--- a/.cursor-plugin/README.md
+++ b/.cursor-plugin/README.md
@@ -1,6 +1,6 @@
 # MemPalace Cursor Plugin
 
-A Cursor IDE plugin that gives your agent a persistent memory system. Auto-registers the `mempalace-mcp` server (33 MCP tools), ships 5 slash commands, two model-invocable skills (setup/mining/search and a recall protocol), and an optional recall rule.
+A Cursor IDE plugin that gives your agent a persistent memory system. Auto-registers the `mempalace-mcp` server (35 MCP tools), ships 5 slash commands, two model-invocable skills (setup/mining/search and a recall protocol), and an optional recall rule.
 
 > Hooks (auto-save + session-start memory recall) are shipped separately under `hooks/cursor/` so the plugin is safe to install in any Cursor workspace without touching the agent loop. See [Hooks](#hooks-optional) below.
 
@@ -87,7 +87,7 @@ This plugin ships `mcp.json` at the plugin root, so Cursor auto-loads the `mempa
 }
 ```
 
-All 33 MemPalace MCP tools (`mempalace_search`, `mempalace_add_drawer`, `mempalace_diary_write`, `mempalace_check_duplicate`, `mempalace_diary_read`, …) become available to the agent immediately. No manual `~/.cursor/mcp.json` edit required.
+All 34 MemPalace MCP tools (`mempalace_search`, `mempalace_add_drawer`, `mempalace_diary_write`, `mempalace_check_duplicate`, `mempalace_diary_read`, …) become available to the agent immediately. No manual `~/.cursor/mcp.json` edit required.
 
 If the server doesn't appear, confirm `mempalace-mcp` is on the user `$PATH`:
 
diff --git a/.cursor-plugin/marketplace.json b/.cursor-plugin/marketplace.json
index bd3ed05e2..61d7ba1b9 100644
--- a/.cursor-plugin/marketplace.json
+++ b/.cursor-plugin/marketplace.json
@@ -8,7 +8,7 @@
     {
       "name": "mempalace",
       "source": ".",
-      "description": "AI memory system — mine projects and conversations into a searchable palace. 33 MCP tools, slash commands, and a guided skill for Cursor.",
+      "description": "AI memory system — mine projects and conversations into a searchable palace. 35 MCP tools, slash commands, and a guided skill for Cursor.",
       "author": {
         "name": "milla-jovovich"
       }
diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json
index b3be76b3d..d7433be25 100644
--- a/.cursor-plugin/plugin.json
+++ b/.cursor-plugin/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "mempalace",
-  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 33 MCP tools, slash commands, and a guided skill for Cursor.",
+  "description": "Give your AI a memory — mine projects and conversations into a searchable palace. 35 MCP tools, slash commands, and a guided skill for Cursor.",
   "author": {
     "name": "milla-jovovich"
   },
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 671e1da54..80e3e5def 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,13 @@ jobs:
           python-version: "3.13"
           cache: 'pip'
       - run: pip install -e ".[dev]"
-      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10
+      # ChromaDB's rust HNSW core intermittently fails compaction on Windows
+      # ("Failed to apply logs to the hnsw segment writer") regardless of our
+      # code — a long-standing, non-reproducible-on-Linux/macOS flake. Retry
+      # ONLY that specific transient error (via --only-rerun) so real,
+      # deterministic failures still fail on the first run. Linux/macOS jobs
+      # deliberately run with no reruns so genuine regressions surface there.
+      - run: python -m pytest tests/ -v --ignore=tests/benchmarks --cov=mempalace --cov-report=term-missing --cov-fail-under=80 --durations=10 --reruns 2 --reruns-delay 5 --only-rerun "Failed to apply logs to the hnsw segment writer"
 
   test-macos:
     runs-on: macos-latest
diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml
index 6e6cceca3..4ca318b20 100644
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@@ -36,7 +36,7 @@ jobs:
       # do not push.
       - name: Log in to GHCR
         if: github.event_name != 'pull_request'
-        uses: docker/login-action@v3
+        uses: docker/login-action@v4
         with:
           registry: ${{ env.REGISTRY }}
           username: ${{ github.actor }}
@@ -44,7 +44,7 @@ jobs:
 
       - name: Extract metadata
         id: meta
-        uses: docker/metadata-action@v5
+        uses: docker/metadata-action@v6
         with:
           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
           # latest -> main (the latest release); semver tags -> released versions.
@@ -56,7 +56,7 @@ jobs:
             type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
 
       - name: Build and push
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v7
         with:
           context: .
           file: ./Dockerfile
@@ -85,7 +85,7 @@ jobs:
         uses: docker/setup-buildx-action@v3
 
       - name: Build GPU image (validation only — not published)
-        uses: docker/build-push-action@v6
+        uses: docker/build-push-action@v7
         with:
           context: .
           file: ./Dockerfile.gpu
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f10ef293..37f15b891 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,58 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 ---
 
+## [3.5.0] — 2026-06-22
+
+### Features
+
+- **Opt-in local daemon for queued writes.** A new `mempalace daemon` queues MemPalace writes through a single local process so background mines, diary saves, and hook-driven ingests serialize against one palace handle instead of racing for it. Opt-in and local-only — nothing binds to a public interface. (#1826)
+
+- **Opt-in HTTP transport for the MCP server.** `mempalace-mcp --transport http` serves JSON-RPC at `POST /mcp` (with a `GET /healthz` liveness probe) for operators running MemPalace behind a long-lived HTTP MCP client/proxy, avoiding the long-lived-stdio framing failures of #1801. stdio remains the default and is unchanged. The transport reuses the exact stdio request dispatcher (no separate write/search path), binds `127.0.0.1` by default, and is hardened against the two ways a local HTTP server leaks to the network: it pins the `Host` header to loopback on a loopback bind and rejects any non-loopback browser `Origin` (DNS-rebinding/SSRF guard), and supports an optional bearer token via `MEMPALACE_MCP_HTTP_TOKEN` (required on `/mcp`, never on `/healthz`). A 16 MiB request cap and a loud warning when bound to a non-loopback host round it out. (#1801, #1806)
+
+- **`mempalace_checkpoint` batch-save MCP tool.** Collapses multiple `add_drawer` calls plus an optional diary entry into a single MCP round-trip for agents that want to file a whole session at once. Stores content verbatim and reuses the existing idempotent add/dedup path. (#1851)
+
+- **`mempalace_delete_by_source` bulk-cleanup MCP tool.** Exact-match, dry-run-by-default deletion of every drawer (and its matching closet/AAAK index entries) for a given `source_file` — the recourse for benchmark/eval files mined into the same wing as real data and drowning out search. The dry run reports the drawer and closet blast radius before anything is removed, and the commit writes a WAL audit entry. (#1722, #1729)
+
+- **Optional `source_file` filter for `mempalace_search`.** Scope a search to an exact stored source path. The filter is threaded through every search path (vector, BM25/SQLite fallback, lexical union, and index-mismatch fallback) so it never silently drops a matching drawer, and results now expose the full `source_path` as a round-trippable key. (#1815, #1817)
+
+- **New transcript parsers / importers.** Continue.dev session parser (#731), Gemini CLI / AI Studio JSON session import (#204), and a Pi agent JSONL session normalizer (#169).
+
+- **Wider miner language coverage.** C# / .NET, PHP (#1819), Swift / Kotlin (#1368), and Java project detection including rootless subprojects (#1720).
+
+- **Final mine on Claude plugin `SessionEnd`.** The Claude Code plugin now runs a closing mine when a session ends so the last exchanges are captured without waiting for the next save nudge. (#1814, #1820)
+
+### Performance
+
+- **Overview/status MCP tools answer from the SQLite aggregate.** Large palaces no longer time out building wing/room/status overviews — the counts come from a single SQLite aggregate instead of a client-side fetch-and-tally. (#1748, #1379)
+
+- **`graph_stats` SQLite fast path.** Knowledge-graph stats are computed in SQLite rather than walking the collection, fixing large-palace timeouts. (#1379)
+
+- **Embedder caps ONNX-runtime intra-op threads** so a background mine no longer pins every core. (#1068)
+
+- **Backend pagination pushed into the query.** `sqlite_exact` (#1841, #1842) and `pgvector` (#1830, #1840) now apply `get(limit, offset)` in SQL, and Qdrant fetches bulk metadata in a single scroll with a larger page size (#1796, #1832).
+
+### Bug Fixes
+
+- **pgvector tolerates hostile transcript bytes.** A lone Unicode surrogate (#1833) or a NUL byte (#1829) in a transcript no longer aborts the whole mine — both are sanitized before the row is written.
+
+- **SQLite read-only URIs are percent-encoded** so palace paths with spaces or special characters open correctly, and `_sqlite_graph_stats` is routed through the same `sqlite_read_uri` helper.
+
+- **Stale ChromaDB HNSW divergence routes to the SQLite fallback** instead of failing the read outright. (#1816, #1822)
+
+- **Diverged-index recovery now points at `repair --mode from-sqlite`, not a re-mine.** A failed ChromaDB HNSW compaction leaves the index out of sync while the rows stay intact in `chroma.sqlite3`; the old "re-mine from source" advice silently dropped MCP-added drawers and diary entries (which have no source file). Both the legacy `repair`/`rebuild_index` error messages and the `repair-status` recommendation, plus the recall skill docs, now guide users to rebuild from SQLite. (#1843, #1847, #1849)
+
+- **The MCP server refuses a second writer for the same palace** rather than letting two processes race the same HNSW handle. (#1818, #1823)
+
+- **Windows hook miner spawns with `CREATE_NO_WINDOW`** so background mines no longer flash a console window. (#1783, #1848)
+
+- **`fact_checker` `__main__` no longer emits a runpy warning** under the test runner. (#1798)
+
+### Internal
+
+- Live-substrate conformance test module for pgvector (#1769); dependabot bumps for `docker/login-action` (3→4), `docker/build-push-action` (6→7), and `docker/metadata-action` (5→6) (#1788, #1787, #1786); ruff dev dependency bumped to 0.15.18.
+
+---
+
 ## [3.4.1] — 2026-06-14
 
 ### Features
diff --git a/README.md b/README.md
index 6f74c5b7f..eae20dc14 100644
--- a/README.md
+++ b/README.md
@@ -225,7 +225,7 @@ Usage and tool reference:
 
 ## MCP server
 
-33 MCP tools cover palace reads/writes, knowledge-graph operations,
+35 MCP tools cover palace reads/writes, knowledge-graph operations,
 cross-wing navigation, drawer management, and agent diaries. Installation
 and the full tool list:
 [mempalaceofficial.com/reference/mcp-tools](https://mempalaceofficial.com/reference/mcp-tools.html).
@@ -285,7 +285,7 @@ PRs welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
 MIT — see [LICENSE](LICENSE).
 
 <!-- Link Definitions -->
-[version-shield]: https://img.shields.io/badge/version-3.4.1-4dc9f6?style=flat-square&labelColor=0a0e14
+[version-shield]: https://img.shields.io/badge/version-3.5.0-4dc9f6?style=flat-square&labelColor=0a0e14
 [release-link]: https://github.com/MemPalace/mempalace/releases
 [python-shield]: https://img.shields.io/badge/python-3.9+-7dd8f8?style=flat-square&labelColor=0a0e14&logo=python&logoColor=7dd8f8
 [python-link]: https://www.python.org/
diff --git a/hooks/README.md b/hooks/README.md
index 05a895e3e..664f2babe 100644
--- a/hooks/README.md
+++ b/hooks/README.md
@@ -18,6 +18,7 @@ It covers hook wiring, JSONL backup, and one-time backfill.
 | Hook | When It Fires | What Happens |
 |------|--------------|-------------|
 | **Save Hook** | Every 15 human messages | Auto-mines transcript (tool output included), then blocks the AI to save topics/decisions/quotes |
+| **SessionEnd Hook** | Clean session exit | Backgrounds a final transcript mine (when a transcript exists) so short sessions aren't lost; returns immediately so teardown is never delayed. A lightweight diary checkpoint is written in the detached child. |
 | **PreCompact Hook** | Right before context compaction | Auto-mines transcript, then emergency save — forces the AI to save EVERYTHING before losing context |
 
 **Two-layer capture:** Hooks auto-mine the JSONL transcript directly into the palace (capturing raw tool output — Bash results, search findings, build errors). They also block the AI with a reason message telling it to save verbatim tool output and key context. Belt and suspenders — tool output gets stored even if the AI summarizes instead of quoting.
@@ -37,6 +38,13 @@ Add to `.claude/settings.local.json`:
         "timeout": 30
       }]
     }],
+    "SessionEnd": [{
+      "hooks": [{
+        "type": "command",
+        "command": "/absolute/path/to/hooks/mempal_session_end_hook.sh",
+        "timeout": 10
+      }]
+    }],
     "PreCompact": [{
       "hooks": [{
         "type": "command",
@@ -48,9 +56,15 @@ Add to `.claude/settings.local.json`:
 }
 ```
 
+`SessionEnd` runs once on a clean exit and backgrounds its work, so it
+returns instantly and stays within Claude Code's SessionEnd budget. Wired
+through `settings.local.json` (above) the `timeout` can raise that budget;
+the bundled plugin cannot, which is why the hook backgrounds rather than
+mining in the foreground.
+
 Make them executable:
 ```bash
-chmod +x hooks/mempal_save_hook.sh hooks/mempal_precompact_hook.sh
+chmod +x hooks/mempal_save_hook.sh hooks/mempal_session_end_hook.sh hooks/mempal_precompact_hook.sh
 ```
 
 ## Install — Antigravity (Google)
@@ -90,6 +104,13 @@ Add to `.codex/hooks.json`:
 }
 ```
 
+**Other harnesses:** the clean-exit save runs through the harness-agnostic
+`mempalace hook run --hook session-end` entry point. This release wires it
+for Claude Code. Antigravity exposes no dedicated session-end event (its
+lifecycle hooks are PreToolUse/PostToolUse/PreInvocation/PostInvocation/Stop,
+and MemPalace already saves there via `Stop`); Cursor and Codex can adopt the
+same entry point as a follow-up wherever their own session-end event is available.
+
 ## Configuration
 
 Edit `mempal_save_hook.sh` to change:
diff --git a/hooks/cursor/mempal_save_hook_cursor.sh b/hooks/cursor/mempal_save_hook_cursor.sh
index 8a3290dd6..19ed26a0c 100755
--- a/hooks/cursor/mempal_save_hook_cursor.sh
+++ b/hooks/cursor/mempal_save_hook_cursor.sh
@@ -163,14 +163,12 @@ _mempal_build_followup() {
 import json, sys
 wing = sys.argv[1] if len(sys.argv) > 1 else "cursor_session"
 msg = (
-    "MemPalace save checkpoint. "
-    "(1) Call mempalace_check_duplicate on the key topics, decisions, "
-    "and verbatim quotes from this session. "
-    "(2) For each non-duplicate, call mempalace_add_drawer (wing="
-    + wing + ", room=<short topic>, content=verbatim quote). "
-    "(3) Call mempalace_diary_write (agent_name=cursor-ide, wing="
-    + wing + ", entry=AAAK-format summary). "
-    "Then stop."
+    "MemPalace save checkpoint. Call mempalace_checkpoint ONCE with: "
+    "items=[{wing: " + wing + ", room: <short topic>, content: <verbatim "
+    "quote>}, ...] for the key topics, decisions, and verbatim quotes from "
+    "this session; and diary={agent_name: cursor-ide, wing: " + wing + ", "
+    "entry: <AAAK-format summary>}. It dedups, files non-duplicates, and "
+    "writes the diary in one call. Then stop."
 )
 print(json.dumps({"followup_message": msg}))
 ' "$WING"
diff --git a/hooks/mempal_session_end_hook.sh b/hooks/mempal_session_end_hook.sh
new file mode 100755
index 000000000..86363ce5b
--- /dev/null
+++ b/hooks/mempal_session_end_hook.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# MemPalace SessionEnd Hook — final save on clean exit.
+#
+# Claude Code documents a default SessionEnd hook timeout of 1.5s; a per-hook
+# "timeout" in settings.local.json can raise it (up to 60s), but a
+# plugin-provided timeout cannot (https://code.claude.com/docs/en/hooks). A cold
+# `mempalace` start alone can exceed 1.5s, so we background the hook and return
+# immediately; the detached child finishes the save after the session has
+# exited. All logic lives in mempalace.hooks_cli for cross-harness extensibility.
+run_mempalace_hook() {
+  if command -v mempalace >/dev/null 2>&1; then
+    exec mempalace hook run "$@"
+  fi
+
+  MEMPAL_PYTHON_BIN="${MEMPAL_PYTHON:-}"
+  if [ -z "$MEMPAL_PYTHON_BIN" ] || [ ! -x "$MEMPAL_PYTHON_BIN" ]; then
+    MEMPAL_PYTHON_BIN="$(command -v python3 2>/dev/null || echo python3)"
+  fi
+  if "$MEMPAL_PYTHON_BIN" -c "import mempalace" >/dev/null 2>&1; then
+    exec "$MEMPAL_PYTHON_BIN" -m mempalace hook run "$@"
+  fi
+
+  if command -v python >/dev/null 2>&1 && python -c "import mempalace" >/dev/null 2>&1; then
+    exec python -m mempalace hook run "$@"
+  fi
+
+  echo "MemPalace hook error: could not find a runnable mempalace command or module" >&2
+  exit 1
+}
+
+# Capture stdin (the SessionEnd JSON) before backgrounding — the parent's
+# stdin is gone once we return. Forward it to the detached worker, which runs
+# the final mine on its own time and outlives this process.
+payload="$(cat)"
+(
+  printf '%s' "$payload" | run_mempalace_hook --hook session-end --harness "${MEMPALACE_HOOK_HARNESS:-claude-code}"
+) >/dev/null 2>&1 </dev/null &
+disown 2>/dev/null || true
+
+# Return immediately so the harness never blocks on session exit.
+printf '{}'
diff --git a/integrations/shared/recall-protocol.md b/integrations/shared/recall-protocol.md
index 86e89e98b..e4b32eecd 100644
--- a/integrations/shared/recall-protocol.md
+++ b/integrations/shared/recall-protocol.md
@@ -67,10 +67,40 @@ question — not a system prompt or pasted conversation) plus optional
 - **MCP unavailable / tool error.** Surface the error plainly and suggest
   the user verify the server (`mempalace status`, or re-run install).
   Do not silently fall back to guessing from model memory.
+- **Palace index corrupt / compactor error.** When the server returns an
+  error mentioning the HNSW segment writer, a ChromaDB compaction
+  failure, or a stuck "Not connected" state after a write, the on-disk
+  vector index is out of sync with `chroma.sqlite3` — but the drawer rows
+  are intact in SQLite. Recover by rebuilding the index from SQLite, not
+  by re-mining. See "Recovering a corrupt index" below. Do not attempt an
+  in-process repair from the agent; guide the user to run the CLI.
 - **Stale or conflicting facts.** Prefer the knowledge graph's
   time-valid answer; if a fact has changed, invalidate the old one and
   add the new one rather than overwriting context silently.
 
+## Recovering a corrupt index
+
+A ChromaDB compaction failure can leave the drawers HNSW index out of
+sync with `chroma.sqlite3` and wedge the MCP server (every call returns
+"Not connected"). The data is safe in SQLite; rebuild the index from it.
+Guide the user through these CLI steps — never run an in-process rebuild
+from the agent (it can break other live clients):
+
+1. Stop the MCP server (kill the `mempalace-mcp` process, or restart the
+   host editor).
+2. Optional backup of the palace directory (`--archive-existing` already
+   moves the old palace aside, so this is belt-and-suspenders):
+   - macOS / Linux: `cp -a ~/.mempalace/palace ~/.mempalace/palace.bak.$(date +%F)`
+   - Windows (PowerShell): `Copy-Item -Recurse "$env:USERPROFILE\.mempalace\palace" "$env:USERPROFILE\.mempalace\palace.bak"`
+3. Rebuild from SQLite:
+   `mempalace repair --mode from-sqlite --archive-existing --yes`
+4. Verify: `mempalace repair-status` (divergence should read 0).
+5. Restart the MCP server.
+
+Do **not** re-mine from source files to recover: re-mining drops drawers
+added through the MCP server and diary entries, which have no source file
+(see MemPalace issue #1843).
+
 ## Anti-patterns
 
 - Answering about past work, people, or decisions from model memory when
diff --git a/mempalace/README.md b/mempalace/README.md
index ddeef061b..f8f3320b8 100644
--- a/mempalace/README.md
+++ b/mempalace/README.md
@@ -16,7 +16,7 @@ The Python package that powers MemPalace. All modules, all logic.
 | `dialect.py` | AAAK compression — entity codes, emotion markers, 30x lossless ratio |
 | `knowledge_graph.py` | Temporal entity-relationship graph — SQLite, time-filtered queries, fact invalidation |
 | `palace_graph.py` | Room-based navigation graph — BFS traversal, tunnel detection across wings |
-| `mcp_server.py` | MCP server — 33 tools, AAAK auto-teach, Palace Protocol, agent diary |
+| `mcp_server.py` | MCP server — 34 tools, AAAK auto-teach, Palace Protocol, agent diary |
 | `onboarding.py` | Guided first-run setup — asks about people/projects, generates AAAK bootstrap + wing config |
 | `entity_registry.py` | Entity code registry — maps names to AAAK codes, handles ambiguous names |
 | `entity_detector.py` | Auto-detect people and projects from file content |
diff --git a/mempalace/backends/base.py b/mempalace/backends/base.py
index 0c643b9c5..f89e47f5d 100644
--- a/mempalace/backends/base.py
+++ b/mempalace/backends/base.py
@@ -468,6 +468,39 @@ def effective_embedder_identity(self) -> Optional[EmbedderIdentity]:
         """
         return None
 
+    def get_all_metadata(self, where: Optional[dict] = None) -> list[dict]:
+        """Return every matching record's metadata in one logical pass (#1796).
+
+        Default implementation pages through :meth:`get` using
+        ``limit``/``offset`` -- correct for backends with a real server-side
+        cursor (e.g. Chroma's SQL OFFSET), and the same shape callers already
+        relied on before this method existed.
+
+        Backends whose ``get(limit=, offset=)`` is implemented by fully
+        materializing a result set and then Python-slicing it (no true
+        server-side cursor) MUST override this method to walk their native
+        cursor exactly once instead. Calling the default implementation on
+        such a backend is O(n^2) in collection size: each page re-walks the
+        entire collection just to discard everything outside the requested
+        slice. See issue #1796.
+        """
+        all_meta: list[dict] = []
+        offset = 0
+        page_size = 1000
+        while True:
+            kwargs: dict = {"include": ["metadatas"], "limit": page_size, "offset": offset}
+            if where:
+                kwargs["where"] = where
+            batch = self.get(**kwargs)
+            batch_meta = batch.metadatas if hasattr(batch, "metadatas") else batch.get("metadatas")
+            if not batch_meta:
+                break
+            all_meta.extend(batch_meta)
+            if len(batch_meta) < page_size:
+                break
+            offset += len(batch_meta)
+        return all_meta
+
     def maintenance_state(self) -> dict:
         """Return a structured snapshot of this collection's maintenance state.
 
diff --git a/mempalace/backends/chroma.py b/mempalace/backends/chroma.py
index 15e074a4e..940b6f8ef 100644
--- a/mempalace/backends/chroma.py
+++ b/mempalace/backends/chroma.py
@@ -9,6 +9,7 @@
 import pickle
 import re
 import sqlite3
+import time
 from collections import defaultdict
 from numbers import Integral
 from pathlib import Path
@@ -17,6 +18,7 @@
 import chromadb
 from chromadb.errors import NotFoundError as _ChromaNotFoundError
 
+from ..config import sqlite_read_uri
 from ._sidecar import EMBEDDER_SIDECAR_FILENAME, read_embedder_sidecar, write_embedder_sidecar
 from .base import (
     BaseBackend,
@@ -457,7 +459,7 @@ def _vector_segment_id(palace_path: str, collection_name: str) -> Optional[str]:
     if not os.path.isfile(db_path):
         return None
     try:
-        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
         try:
             row = conn.execute(
                 """
@@ -606,6 +608,7 @@ def _hnsw_element_count(palace_path: str, segment_id: str) -> Optional[int]:
 # sync_threshold) from expected steady-state lag.
 _HNSW_DIVERGENCE_FALLBACK_FLOOR = 2000
 _HNSW_DIVERGENCE_FRACTION = 0.10
+_HNSW_PERSISTENT_DIVERGENCE_GRACE_SECONDS = 300.0
 
 
 def _read_sync_threshold(palace_path: str, collection_name: str) -> int:
@@ -626,7 +629,7 @@ def _read_sync_threshold(palace_path: str, collection_name: str) -> int:
     if not os.path.isfile(db_path):
         return 1000
     try:
-        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
         try:
             cur = conn.cursor()
             cur.execute(
@@ -649,6 +652,45 @@ def _read_sync_threshold(palace_path: str, collection_name: str) -> int:
         return 1000
 
 
+def _collection_has_sync_threshold_metadata(palace_path: str, collection_name: str) -> bool:
+    """Return True when the collection explicitly stores hnsw:sync_threshold."""
+
+    db_path = os.path.join(palace_path, "chroma.sqlite3")
+    if not os.path.isfile(db_path):
+        return False
+
+    try:
+        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        try:
+            row = conn.execute(
+                """
+                SELECT 1
+                  FROM collection_metadata cm
+                  JOIN collections c ON cm.collection_id = c.id
+                 WHERE c.name = ?
+                   AND cm.key = 'hnsw:sync_threshold'
+                 LIMIT 1
+                """,
+                (collection_name,),
+            ).fetchone()
+            return row is not None
+        finally:
+            conn.close()
+    except Exception:
+        logger.debug("_collection_has_sync_threshold_metadata failed", exc_info=True)
+        return False
+
+
+def _hnsw_metadata_age_seconds(palace_path: str, segment_id: str) -> Optional[float]:
+    """Return index_metadata.pickle age in seconds, or None when unreadable."""
+
+    pickle_path = os.path.join(palace_path, segment_id, "index_metadata.pickle")
+    try:
+        return max(0.0, time.time() - os.path.getmtime(pickle_path))
+    except OSError:
+        return None
+
+
 def hnsw_capacity_status(palace_path: str, collection_name: str = "mempalace_drawers") -> dict:
     """Compare sqlite embedding count against HNSW element count.
 
@@ -693,11 +735,15 @@ def hnsw_capacity_status(palace_path: str, collection_name: str = "mempalace_dra
 
         hnsw_count = _hnsw_element_count(palace_path, seg_id)
         out["hnsw_count"] = hnsw_count
-
         sync_threshold = _read_sync_threshold(palace_path, collection_name)
-        # Two synchronization windows worth — see comment above
-        # _HNSW_DIVERGENCE_FALLBACK_FLOOR for the rationale.
-        divergence_floor = max(_HNSW_DIVERGENCE_FALLBACK_FLOOR, 2 * sync_threshold)
+        has_explicit_sync_threshold = _collection_has_sync_threshold_metadata(
+            palace_path,
+            collection_name,
+        )
+        metadata_age_seconds = (
+            _hnsw_metadata_age_seconds(palace_path, seg_id) if hnsw_count is not None else None
+        )
+        out["hnsw_metadata_age_seconds"] = metadata_age_seconds
 
         if hnsw_count is None:
             # No pickle yet, so this probe cannot measure HNSW capacity.
@@ -715,21 +761,53 @@ def hnsw_capacity_status(palace_path: str, collection_name: str = "mempalace_dra
 
         divergence = sqlite_count - hnsw_count
         out["divergence"] = divergence
-        threshold = max(divergence_floor, int(sqlite_count * _HNSW_DIVERGENCE_FRACTION))
-        if divergence > threshold:
+
+        # Newer palaces explicitly store mempalace's low sync threshold
+        # (currently 2), so a gap of dozens of rows is far beyond ordinary
+        # flush lag. Older palaces may lack the metadata row; keep the
+        # historical floor for fresh lag there, but do not let a stale pickle
+        # sit below the floor forever (#1816).
+        if has_explicit_sync_threshold:
+            threshold = max(0, 2 * sync_threshold)
+        else:
+            divergence_floor = max(_HNSW_DIVERGENCE_FALLBACK_FLOOR, 2 * sync_threshold)
+            threshold = max(
+                divergence_floor,
+                int(sqlite_count * _HNSW_DIVERGENCE_FRACTION),
+            )
+
+        out["threshold"] = threshold
+        stale_below_threshold = (
+            not has_explicit_sync_threshold
+            and divergence > 0
+            and metadata_age_seconds is not None
+            and metadata_age_seconds >= _HNSW_PERSISTENT_DIVERGENCE_GRACE_SECONDS
+        )
+
+        if divergence > threshold or stale_below_threshold:
             out["status"] = "diverged"
             out["diverged"] = True
             pct = 100.0 * divergence / max(sqlite_count, 1)
+            if divergence > threshold:
+                reason = f"exceeds threshold {threshold:,}"
+            else:
+                age = metadata_age_seconds or 0.0
+                reason = f"persisted below the old flush-lag floor for {age:.0f}s"
             out["message"] = (
                 f"HNSW index holds {hnsw_count:,} elements but sqlite has "
-                f"{sqlite_count:,} embeddings — {divergence:,} drawers ({pct:.0f}%) "
-                "are invisible to vector search. Run `mempalace repair` to rebuild."
+                f"{sqlite_count:,} embeddings - {divergence:,} drawers "
+                f"({pct:.0f}%) are missing from the flushed HNSW index "
+                f"({reason}). Vector reads are disabled until "
+                "`mempalace repair` rebuilds it."
             )
         else:
             out["status"] = "ok"
             out["message"] = (
                 f"HNSW {hnsw_count:,} / sqlite {sqlite_count:,} (within flush-lag tolerance)"
             )
+            if divergence < 0:
+                out["message"] += " (HNSW has extra flushed elements; treating as safe)"
+
     except Exception:
         logger.debug("hnsw_capacity_status failed", exc_info=True)
         out["message"] = "HNSW capacity probe raised; skipping"
@@ -746,7 +824,7 @@ def _sqlite_embedding_count(palace_path: str, collection_name: str) -> Optional[
     if not os.path.isfile(db_path):
         return None
     try:
-        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
         try:
             row = conn.execute(
                 """
@@ -807,7 +885,7 @@ def _sqlite_wing_room_counts(
     if not os.path.isfile(db_path):
         return None
     try:
-        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
         try:
             # Wait out a transient writer/checkpoint lock rather than falling
             # straight back to the expensive vector-index path (#1681).
@@ -1570,7 +1648,7 @@ def _lexical_search_via_sqlite(
         # rowid, embedding_id is the user-facing drawer id.
         public_ids: dict[int, str] = {}
         try:
-            conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+            conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
             conn.row_factory = sqlite3.Row
         except sqlite3.Error:
             logger.debug("Chroma lexical sqlite open failed", exc_info=True)
diff --git a/mempalace/backends/pgvector.py b/mempalace/backends/pgvector.py
index a97f29a34..cfe349450 100644
--- a/mempalace/backends/pgvector.py
+++ b/mempalace/backends/pgvector.py
@@ -37,6 +37,7 @@
 
 import numpy as np
 
+from ..config import strip_lone_surrogates
 from ._sidecar import EMBEDDER_SIDECAR_FILENAME, read_embedder_sidecar, write_embedder_sidecar
 from .base import (
     BackendClosedError,
@@ -80,6 +81,42 @@ def _json_dumps(obj: Any) -> str:
     return json.dumps(obj or {}, ensure_ascii=False, separators=(",", ":"), sort_keys=True)
 
 
+def _strip_nul(value: Any) -> Any:
+    """Recursively strip NUL (0x00) from strings, list/tuple items, and dict keys
+    and values so pgvector can store the result.
+
+    PostgreSQL cannot store NUL in ``text`` or ``jsonb``: psycopg rejects a raw
+    NUL in a text column ("PostgreSQL text fields cannot contain NUL (0x00)
+    bytes"), and a NUL in metadata serializes to a JSON unicode escape that the
+    ``jsonb`` cast rejects ("unsupported Unicode escape sequence"). A single
+    transcript that captured NUL in tool output would otherwise abort the whole
+    mine run (#1829). ChromaDB and the SQLite backend store the byte verbatim,
+    so stripping only here keeps the same inputs ingestible.
+
+    Applied to id, document, and metadata in :meth:`_PgVectorClient.upsert_rows`
+    so the write path never carries a NUL into Postgres. Only ``str`` values are
+    rewritten; the ``int``/``float``/``bool``/``None`` scalars JSON metadata
+    normalizes to pass through unchanged. Stripping is not injective, so two keys
+    (or ids)
+    differing only by a NUL collapse to one (last wins); this does not occur in
+    practice because drawer ids are SHA-256 hashes and metadata keys are fixed
+    field names, so only transcript-derived values are ever actually changed.
+    Unlike ``config.sanitize_content`` (which rejects NUL in user-supplied
+    content), the bulk-mine path strips so one stray byte cannot abort a whole
+    backfill. ``str.replace`` returns the original string when it holds no NUL,
+    so a clean document is not reallocated.
+    """
+    if isinstance(value, str):
+        return value.replace("\x00", "")
+    if isinstance(value, dict):
+        return {_strip_nul(key): _strip_nul(item) for key, item in value.items()}
+    if isinstance(value, list):
+        return [_strip_nul(item) for item in value]
+    if isinstance(value, tuple):
+        return tuple(_strip_nul(item) for item in value)
+    return value
+
+
 def _tokenize(text: str) -> list[str]:
     if not text:
         return []
@@ -581,9 +618,21 @@ def upsert_rows(self, table: str, rows: list[dict]) -> None:
         )
         params = [
             (
-                row["id"],
-                row["document"],
-                _json_dumps(row.get("metadata")),
+                # Strip both unstorable byte classes Postgres rejects before
+                # binding, so one stray byte in a transcript cannot abort the
+                # whole mine (#1829 NUL, #1833 lone surrogate).
+                #
+                # Order matters for metadata: NUL must be stripped *before*
+                # serialization (json escapes it to \\u0000, which the jsonb cast
+                # rejects), while a lone surrogate must be stripped *after*
+                # serialization (json.dumps(ensure_ascii=False) leaves it raw, so
+                # one pass over the serialized string cleans it without walking
+                # the dict). id/document are plain strings, so the two passes
+                # commute there. ids are NUL- and surrogate-free in practice, so
+                # those passes are defensive no-ops on the ON CONFLICT key.
+                strip_lone_surrogates(_strip_nul(row["id"])),
+                strip_lone_surrogates(_strip_nul(row["document"])),
+                strip_lone_surrogates(_json_dumps(_strip_nul(row.get("metadata")))),
                 _vector_literal(row["embedding"]),
                 row.get("updated_at") or _utcnow(),
             )
@@ -625,6 +674,8 @@ def scroll_rows(
         *,
         where: Optional[dict] = None,
         with_embedding: bool = False,
+        limit: Optional[int] = None,
+        offset: Optional[int] = None,
     ) -> list[dict]:
         qi = _quote_identifier(table)
         params: list = []
@@ -633,6 +684,18 @@ def scroll_rows(
         if with_embedding:
             cols += ", embedding"
         sql = f"SELECT {cols} FROM {qi} WHERE {where_sql}"
+        # Push pagination into SQL when a page is requested. ORDER BY the
+        # primary key gives OFFSET a stable order (an unordered scan may skip
+        # or repeat rows across pages); callers that scroll the whole table
+        # pass neither bound, leaving their SQL unchanged.
+        if limit is not None or offset:
+            sql += " ORDER BY id"
+            if limit is not None:
+                params.append(int(limit))
+                sql += " LIMIT %s"
+            if offset:
+                params.append(int(offset))
+                sql += " OFFSET %s"
         rows = self._execute(sql, params, fetch=True)
         return [
             self._row(record, with_embedding=with_embedding, with_distance=False)
@@ -792,13 +855,19 @@ def _ensure_table(self, dimension: int) -> None:
                 )
             self._known_dimension = existing_dim or dimension
 
-    def _scroll(self, *, where=None, with_embedding=False) -> list[dict]:
+    def _scroll(self, *, where=None, with_embedding=False, limit=None, offset=None) -> list[dict]:
         self._ensure_open()
         if not self._table_exists():
             if self._marker_exists():
                 raise CollectionNotInitializedError(self._collection_name)
             return []
-        return self._client.scroll_rows(self._table, where=where, with_embedding=with_embedding)
+        return self._client.scroll_rows(
+            self._table,
+            where=where,
+            with_embedding=with_embedding,
+            limit=limit,
+            offset=offset,
+        )
 
     def _rows(
         self,
@@ -1017,16 +1086,40 @@ def get(
         include=None,
     ) -> GetResult:
         spec = _IncludeSpec.resolve(include, default_distances=False)
-        rows = self._rows(
-            ids=ids, where=where, where_document=where_document, with_embedding=spec.embeddings
+        # Fast path for the common unfiltered page fetch (e.g.
+        # prefetch_mined_set's sweep): push LIMIT/OFFSET into the scan instead
+        # of fetching the whole table and slicing in Python, which is the
+        # O(rows x pages) cost this avoids. Only the no-filter case is pushed:
+        # the "metadata @> ..." pushdown is broader than the exact
+        # _matches_where re-filter for array/object values, so any filtered get
+        # keeps the full-scan path where that re-filter still runs. ids, where,
+        # where_document and negative bounds all fall through to the unchanged
+        # path below. (The document column is still selected for metadata-only
+        # pages; projecting it out needs the positional _row parser to change,
+        # so it stays a separate follow-up.)
+        push_page = (
+            ids is None
+            and not where
+            and not where_document
+            and (limit is None or limit >= 0)
+            and (offset is None or offset >= 0)
+            and (limit is not None or offset)
         )
-        if ids is not None:
-            by_id = {row["id"]: row for row in rows}
-            rows = [by_id[doc_id] for doc_id in ids if doc_id in by_id]
-        if offset:
-            rows = rows[offset:]
-        if limit is not None:
-            rows = rows[:limit]
+        if push_page:
+            rows = self._scroll(
+                where=None, with_embedding=spec.embeddings, limit=limit, offset=offset
+            )
+        else:
+            rows = self._rows(
+                ids=ids, where=where, where_document=where_document, with_embedding=spec.embeddings
+            )
+            if ids is not None:
+                by_id = {row["id"]: row for row in rows}
+                rows = [by_id[doc_id] for doc_id in ids if doc_id in by_id]
+            if offset:
+                rows = rows[offset:]
+            if limit is not None:
+                rows = rows[:limit]
         return GetResult(
             ids=[row["id"] for row in rows],
             documents=[row["document"] for row in rows] if spec.documents else [],
diff --git a/mempalace/backends/qdrant.py b/mempalace/backends/qdrant.py
index bc516d578..1e5b2b431 100644
--- a/mempalace/backends/qdrant.py
+++ b/mempalace/backends/qdrant.py
@@ -53,6 +53,20 @@
 _PAYLOAD_METADATA = "metadata"
 _POINT_NAMESPACE = uuid.UUID("c06c3fc7-5c14-4dc4-84c2-24a5f72d8dc1")
 _TOKEN_RE = re.compile(r"\w{2,}", re.UNICODE)
+# Page size for Qdrant's /points/scroll cursor. 4096 (up from the original
+# 256) cuts REST round-trips ~16x for any full-collection walk (#1796).
+# Qdrant's own docs suggest larger scroll batches are safe, and this is well
+# below typical REST payload-size limits for metadata-only (with_vector=False)
+# scrolls such as get_all_metadata().
+#
+# This constant also governs vector-bearing scrolls (with_vector=True), used
+# by _rows()/get() when embeddings are requested and by _query_local_exact()
+# for the $or/$contains local-filter query fallback. At 4096 rows per page,
+# high-dimensional embeddings make those particular responses tens of MB --
+# Qdrant handles it and round-trips still drop overall, but this is a real
+# trade-off, not a metadata-only optimization. (Noted in maintainer review
+# on #1832.)
+_SCROLL_PAGE_SIZE = 4096
 _SUPPORTED_OPERATORS = frozenset(
     {"$eq", "$ne", "$in", "$nin", "$and", "$or", "$contains", "$gt", "$gte", "$lt", "$lte"}
 )
@@ -480,7 +494,7 @@ def scroll_points(
         collection: str,
         *,
         qdrant_filter: Optional[dict] = None,
-        limit: int = 256,
+        limit: int = _SCROLL_PAGE_SIZE,
         offset: Any = None,
         with_vector: bool = False,
     ) -> tuple[list[dict], Any]:
@@ -732,7 +746,7 @@ def _scroll_all(
             points, offset = self._client.scroll_points(
                 self._remote_collection,
                 qdrant_filter=qdrant_filter,
-                limit=256,
+                limit=_SCROLL_PAGE_SIZE,
                 offset=offset,
                 with_vector=with_vector,
             )
@@ -1000,6 +1014,27 @@ def get(
             embeddings=[row["embedding"] or [] for row in rows] if spec.embeddings else None,
         )
 
+    def get_all_metadata(self, where: Optional[dict] = None) -> list[dict]:
+        """Return every matching record's metadata in one cursor pass (#1796).
+
+        Overrides the default offset-paginated implementation, which would
+        call self.get(limit=, offset=) in a loop -- and since self.get() is
+        backed by a full _scroll_all() materialization, each page of that
+        loop would re-walk the entire collection from the start just to
+        discard everything outside its slice (O(n^2) over collection size).
+
+        Delegates to self._rows(), the same single-scroll-plus-local-filter
+        helper that backs get()/delete(). With ids=None and
+        where_document=None, _rows() reduces to exactly one _scroll_all()
+        pass followed by an unconditional _matches_where() re-check on every
+        row -- the same filter logic get(), delete(), and lexical_search()
+        already use, so this can't independently drift from those call
+        sites. (Maintainer review on #1832: avoid duplicating the filter
+        dance inline.)
+        """
+        rows = self._rows(where=where)
+        return [row["metadata"] for row in rows]
+
     def delete(self, *, ids=None, where=None):
         _validate_where(where)
         if not self._remote_exists():
diff --git a/mempalace/backends/sqlite_exact.py b/mempalace/backends/sqlite_exact.py
index fff5444c7..53f1cde55 100644
--- a/mempalace/backends/sqlite_exact.py
+++ b/mempalace/backends/sqlite_exact.py
@@ -496,19 +496,32 @@ def update(self, *, ids, documents=None, metadatas=None, embeddings=None):
                 )
                 self._replace_fts(cur, collection_id, doc_id, doc)
 
-    def _rows(self, cur, *, where=None, where_document=None) -> list[dict]:
+    def _rows(self, cur, *, where=None, where_document=None, limit=None, offset=None) -> list[dict]:
         _validate_where(where)
         _validate_where(where_document)
         collection_id = self._collection_id(cur)
-        rows = cur.execute(
-            """
-            SELECT id, document, metadata_json, embedding
-            FROM documents
-            WHERE collection_id = ?
-            ORDER BY rowid
-            """,
-            (collection_id,),
-        ).fetchall()
+        sql = (
+            "SELECT id, document, metadata_json, embedding\n"
+            "FROM documents\n"
+            "WHERE collection_id = ?\n"
+            "ORDER BY rowid"
+        )
+        params = [collection_id]
+        # Emit SQL LIMIT/OFFSET only on an unfiltered page. With a
+        # where/where_document the post-filter loop below drops rows *after*
+        # this scan, so a SQL LIMIT/OFFSET would cut the wrong rows; those
+        # callers scan in full and paginate in Python. SQLite requires a LIMIT
+        # before OFFSET, so an offset-only page uses "LIMIT -1" (unbounded).
+        if where is None and where_document is None and (limit is not None or offset):
+            if limit is not None:
+                sql += "\nLIMIT ?"
+                params.append(int(limit))
+            elif offset:
+                sql += "\nLIMIT -1"
+            if offset:
+                sql += "\nOFFSET ?"
+                params.append(int(offset))
+        rows = cur.execute(sql, params).fetchall()
         out = []
         for doc_id, doc, meta_json, emb_blob in rows:
             meta = _json_loads(meta_json)
@@ -603,15 +616,33 @@ def get(
         include=None,
     ) -> GetResult:
         spec = _IncludeSpec.resolve(include, default_distances=False)
+        # Fast path for the common unfiltered page (e.g. the prefetch_mined_set
+        # and status sweeps): push LIMIT/OFFSET into the scan instead of
+        # materializing the whole collection and slicing in Python. Safe only
+        # with no post-filter (ids/where/where_document drop rows after the
+        # scan) and non-negative bounds: SQLite does not honor a negative LIMIT
+        # or OFFSET the way a Python slice does, so those keep the slice path.
+        push_page = (
+            ids is None
+            and where is None
+            and where_document is None
+            and (limit is None or limit >= 0)
+            and (offset is None or offset >= 0)
+            and (limit is not None or offset)
+        )
         with self._cursor() as cur:
-            rows = self._rows(cur, where=where, where_document=where_document)
-        if ids is not None:
-            by_id = {row["id"]: row for row in rows}
-            rows = [by_id[doc_id] for doc_id in ids if doc_id in by_id]
-        if offset:
-            rows = rows[offset:]
-        if limit is not None:
-            rows = rows[:limit]
+            if push_page:
+                rows = self._rows(cur, limit=limit, offset=offset)
+            else:
+                rows = self._rows(cur, where=where, where_document=where_document)
+        if not push_page:
+            if ids is not None:
+                by_id = {row["id"]: row for row in rows}
+                rows = [by_id[doc_id] for doc_id in ids if doc_id in by_id]
+            if offset:
+                rows = rows[offset:]
+            if limit is not None:
+                rows = rows[:limit]
         return GetResult(
             ids=[row["id"] for row in rows],
             documents=[row["document"] for row in rows] if spec.documents else [],
diff --git a/mempalace/cli.py b/mempalace/cli.py
index 7699610fa..2fbecd506 100644
--- a/mempalace/cli.py
+++ b/mempalace/cli.py
@@ -53,6 +53,12 @@
 _PASS_ZERO_LLM_MAX_SAMPLES = 20  # caps the LLM-tier sample count
 _EXPLICIT_BACKEND_ENV = "MEMPALACE_BACKEND_EXPLICIT"
 
+# Keep parser construction lightweight for --version and hook commands.
+# This mirrors miner.MAX_CHUNKS_PER_FILE without importing miner here;
+# importing miner pulls in Chroma dependencies before argparse can handle
+# lightweight exits such as --version.
+_CLI_MAX_CHUNKS_PER_FILE_DEFAULT = 50_000
+
 
 def _backend_arg(args):
     """Return a CLI-selected backend from subcommand or global flags."""
@@ -534,6 +540,27 @@ def cmd_mine(args):
     for raw in args.include_ignored or []:
         include_ignored.extend(part.strip() for part in raw.split(",") if part.strip())
 
+    if getattr(args, "background", False) and not getattr(args, "daemon", False):
+        print("mempalace: --background requires --daemon", file=sys.stderr)
+        sys.exit(2)
+
+    if getattr(args, "daemon", False):
+        payload = {
+            "source": args.dir,
+            "mode": args.mode,
+            "wing": args.wing,
+            "agent": args.agent,
+            "limit": args.limit,
+            "dry_run": args.dry_run,
+            "extract": args.extract,
+            "no_gitignore": args.no_gitignore,
+            "include_ignored": include_ignored,
+            "max_chunks_per_file": getattr(args, "max_chunks_per_file", None),
+            "redetect_origin": getattr(args, "redetect_origin", False),
+        }
+        _submit_daemon_cli_job("mine", payload, args, background=getattr(args, "background", False))
+        return
+
     # --redetect-origin re-runs corpus_origin on the current corpus state
     # and overwrites <palace>/.mempalace/origin.json before mining proceeds.
     # Heuristic-only by design — full LLM detection lives on `mempalace init`.
@@ -655,14 +682,28 @@ def cmd_sweep(args):
 
 def cmd_sync(args):
     """Prune drawers whose source files are gitignored, deleted, or moved (#1252)."""
-    from .mcp_server import _wal_log
+    palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
+
+    if getattr(args, "background", False) and not getattr(args, "daemon", False):
+        print("mempalace: --background requires --daemon", file=sys.stderr)
+        sys.exit(2)
+
+    if getattr(args, "daemon", False):
+        payload = {
+            "dir": args.dir,
+            "root": list(args.root or []),
+            "wing": args.wing,
+            "dry_run": args.dry_run,
+        }
+        _submit_daemon_cli_job("sync", payload, args, background=getattr(args, "background", False))
+        return
+
     from .palace import MineAlreadyRunning
+    from .wal import _wal_log
     from .backends import detect_backend_for_path
     from .palace import _backend_artifact_label, resolve_backend_name
     from .sync import sync_palace
 
-    palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
-
     if not os.path.isdir(palace_path):
         print(f"\n  No palace found at {palace_path}")
         return
@@ -745,6 +786,133 @@ def cmd_sync(args):
     print(f"\n{'=' * 55}\n")
 
 
+def _submit_daemon_cli_job(kind: str, payload: dict, args, *, background: bool) -> None:
+    palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
+    backend = _backend_arg(args)
+    from .daemon import DaemonError, submit_job
+
+    try:
+        job = submit_job(
+            kind,
+            payload,
+            palace_path=palace_path,
+            backend=backend,
+            wait=not background,
+            auto_start=True,
+        )
+    except DaemonError as exc:
+        print(f"mempalace: daemon submission failed: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    if background:
+        print(f"Submitted daemon job {job['id']} ({kind})")
+        return
+
+    result = job.get("result") or {}
+    from .service import print_job_result
+
+    exit_code = print_job_result(result)
+    if job.get("state") != "succeeded" and exit_code == 0:
+        error = job.get("error") or {}
+        print(
+            f"mempalace: daemon job failed: {error.get('message', 'unknown error')}",
+            file=sys.stderr,
+        )
+        exit_code = 1
+    if exit_code:
+        sys.exit(exit_code)
+
+
+def cmd_daemon(args):
+    palace_path = os.path.expanduser(args.palace) if args.palace else MempalaceConfig().palace_path
+    backend = _backend_arg(args)
+    from .daemon import (
+        TERMINAL_STATES,
+        DaemonError,
+        QueueStore,
+        get_client_if_running,
+        job_to_dict,
+        queue_path,
+        start_daemon,
+        stop_daemon,
+    )
+
+    action = getattr(args, "daemon_action", None)
+    try:
+        if action == "start":
+            if args.foreground:
+                start_daemon(palace_path, backend=backend, foreground=True)
+                return
+            client = start_daemon(palace_path, backend=backend, foreground=False)
+            health = client.health()
+            print(f"MemPalace daemon running on 127.0.0.1:{client.port}")
+            print(f"  Palace: {health.get('palace_path')}")
+            print(f"  PID:    {health.get('pid')}")
+            return
+
+        if action == "stop":
+            if stop_daemon(palace_path):
+                print("MemPalace daemon stopping")
+            else:
+                print("MemPalace daemon is not running")
+            return
+
+        if action == "status":
+            client = get_client_if_running(palace_path)
+            if client is None:
+                print("MemPalace daemon is not running")
+                sys.exit(1)
+            health = client.health()
+            print("MemPalace daemon is running")
+            print(f"  Palace: {health.get('palace_path')}")
+            print(f"  PID:    {health.get('pid')}")
+            print(f"  Active: {health.get('active_job_id') or '-'}")
+            print(f"  Jobs:   {health.get('counts') or {}}")
+            return
+
+        if action == "jobs":
+            client = get_client_if_running(palace_path)
+            if client is not None:
+                jobs = client.list_jobs(limit=args.limit)
+            else:
+                qpath = queue_path(palace_path)
+                if not qpath.exists():
+                    jobs = []
+                else:
+                    jobs = [
+                        job_to_dict(job, include_payload=False)
+                        for job in QueueStore(qpath).list(args.limit)
+                    ]
+            for job in jobs:
+                print(f"{job['id']}  {job['state']:<9}  {job['kind']:<10}  {job['created_at']}")
+            return
+
+        if action == "wait":
+            client = get_client_if_running(palace_path)
+            if client is not None:
+                job = client.wait(args.job_id)
+            else:
+                qpath = queue_path(palace_path)
+                if not qpath.exists():
+                    raise DaemonError("daemon is not running")
+                job = job_to_dict(QueueStore(qpath).get(args.job_id))
+                if job.get("state") not in TERMINAL_STATES:
+                    raise DaemonError(f"daemon is not running; job {args.job_id} is {job['state']}")
+            result = job.get("result") or {}
+            from .service import print_job_result
+
+            exit_code = print_job_result(result)
+            if job.get("state") != "succeeded" and exit_code == 0:
+                print(f"mempalace: daemon job failed: {job.get('error')}", file=sys.stderr)
+                exit_code = 1
+            if exit_code:
+                sys.exit(exit_code)
+            return
+    except DaemonError as exc:
+        print(f"mempalace: daemon error: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
 def cmd_search(args):
     from .searcher import search, SearchError
 
@@ -914,6 +1082,7 @@ def cmd_repair(args):
         _post_rebuild_cleanup,
         _rebuild_collection_via_temp,
         check_extraction_safety,
+        index_read_recovery_guidance,
         maybe_repair_poisoned_max_seq_id_before_rebuild,
         print_sqlite_integrity_abort,
         sqlite_integrity_errors,
@@ -1027,7 +1196,7 @@ def cmd_repair(args):
         print(f"  Drawers found: {total}")
     except Exception as e:
         print(f"  Error reading palace: {e}")
-        print("  Cannot recover — palace may need to be re-mined from source files.")
+        print(index_read_recovery_guidance())
         return
 
     if total == 0:
@@ -1480,13 +1649,22 @@ def main():
     p_mine.add_argument(
         "--dry-run", action="store_true", help="Show what would be filed without filing"
     )
+    p_mine.add_argument(
+        "--daemon",
+        action="store_true",
+        help="Submit this mine to the opt-in local daemon queue",
+    )
+    p_mine.add_argument(
+        "--background",
+        action="store_true",
+        help="With --daemon, return a job id immediately instead of waiting",
+    )
     p_mine.add_argument(
         "--extract",
         choices=["exchange", "general"],
         default="exchange",
         help="Extraction strategy for convos mode: 'exchange' (default) or 'general' (5 memory types)",
     )
-    from . import miner as _miner_for_default
 
     p_mine.add_argument(
         "--max-chunks-per-file",
@@ -1495,7 +1673,7 @@ def main():
         metavar="N",
         help=(
             f"Per-file chunk cap; files producing more chunks are skipped with a "
-            f"summary counter. Default {_miner_for_default.MAX_CHUNKS_PER_FILE} "
+            f"summary counter. Default {_CLI_MAX_CHUNKS_PER_FILE_DEFAULT} "
             f"(or MEMPALACE_MAX_CHUNKS_PER_FILE). Set 0 to disable. Lower this on "
             f"Windows if you hit ONNX bad_alloc (#1455)."
         ),
@@ -1543,6 +1721,16 @@ def main():
         action="store_false",
         help="Actually delete drawers (overrides --dry-run; requires --wing or a project root)",
     )
+    p_sync.add_argument(
+        "--daemon",
+        action="store_true",
+        help="Submit this sync to the opt-in local daemon queue",
+    )
+    p_sync.add_argument(
+        "--background",
+        action="store_true",
+        help="With --daemon, return a job id immediately instead of waiting",
+    )
 
     # search
     p_search = sub.add_parser("search", help="Find anything, exact words")
@@ -1605,7 +1793,7 @@ def main():
     p_hook_run.add_argument(
         "--hook",
         required=True,
-        choices=["session-start", "stop", "precompact"],
+        choices=["session-start", "stop", "session-end", "precompact"],
         help="Hook name to run",
     )
     p_hook_run.add_argument(
@@ -1705,6 +1893,27 @@ def main():
         help="Compare sqlite vs HNSW element counts (read-only; never opens a chromadb client)",
     )
 
+    # daemon
+    p_daemon = sub.add_parser("daemon", help="Manage the opt-in long-lived daemon")
+    daemon_sub = p_daemon.add_subparsers(dest="daemon_action")
+    p_daemon_start = daemon_sub.add_parser("start", help="Start the daemon")
+    p_daemon_start.add_argument(
+        "--foreground",
+        action="store_true",
+        help="Run in the foreground for debugging or process supervisors",
+    )
+    p_daemon_start.add_argument(
+        "--backend",
+        default=None,
+        help="Storage backend for this daemon (default: config/env/detected/chroma)",
+    )
+    daemon_sub.add_parser("stop", help="Stop the daemon")
+    daemon_sub.add_parser("status", help="Show daemon status")
+    p_daemon_jobs = daemon_sub.add_parser("jobs", help="List recent daemon jobs")
+    p_daemon_jobs.add_argument("--limit", type=int, default=20, help="Max jobs to show")
+    p_daemon_wait = daemon_sub.add_parser("wait", help="Wait for a daemon job")
+    p_daemon_wait.add_argument("job_id", help="Job id returned by --background")
+
     # mcp
     p_mcp = sub.add_parser(
         "mcp",
@@ -1806,6 +2015,13 @@ def main():
             p_palace.print_help()
         return
 
+    if args.command == "daemon":
+        if not getattr(args, "daemon_action", None):
+            p_daemon.print_help()
+            return
+        cmd_daemon(args)
+        return
+
     dispatch = {
         "init": cmd_init,
         "mine": cmd_mine,
diff --git a/mempalace/config.py b/mempalace/config.py
index 05d542c5e..36a1703b3 100644
--- a/mempalace/config.py
+++ b/mempalace/config.py
@@ -205,6 +205,21 @@ def sanitize_content(value: str, max_length: int = 100_000) -> str:
 DEFAULT_MAX_BACKUPS = 10
 
 
+def sqlite_read_uri(db_path: str) -> str:
+    """Return a read-only ``file:`` URI for ``sqlite3.connect(..., uri=True)``.
+
+    A bare ``f"file:{db_path}?mode=ro"`` mis-parses paths containing spaces or
+    other URI-reserved characters — common in real home directories (a Windows
+    user folder like ``First Last``, many macOS paths). ``pathname2url``
+    percent-encodes the path and normalizes separators so the database opens on
+    every platform.
+    """
+    from urllib.request import pathname2url
+
+    db_path = os.fspath(db_path)
+    return f"file:{pathname2url(db_path)}?mode=ro"
+
+
 @lru_cache(maxsize=1)
 def get_configured_collection_name() -> str:
     """Return the configured drawer collection name without repeated config-file reads."""
@@ -645,6 +660,37 @@ def embedding_model(self):
             return env_val.strip().lower()
         return str(self._file_config.get("embedding_model", "minilm")).strip().lower()
 
+    @property
+    def embedding_threads(self) -> int:
+        """Cap on the embedder's ONNX Runtime intra-op thread pool (#1068).
+
+        ChromaDB's ONNX embedder builds its ``InferenceSession`` with no thread
+        cap, so the intra-op pool defaults to the physical core count and a
+        background ``mine`` pins every core — stacked Stop-hook fires turn into
+        thermal events. ``OMP_NUM_THREADS`` is inert here (ORT owns its own
+        pool), so the cap is applied via ``SessionOptions`` in
+        :mod:`mempalace.embedding`.
+
+        Read from env ``MEMPALACE_EMBEDDING_THREADS`` first, then
+        ``embedding_threads`` in ``config.json``. Semantics:
+
+        - unset / ``"auto"`` → half the logical CPUs (min 1), so a background
+          mine leaves the machine usable out of the box.
+        - a positive integer → exactly that many intra-op threads.
+        - ``0`` or negative → uncapped: ORT's default (physical core count),
+          for users who want maximum indexing throughput.
+        """
+        raw = os.environ.get("MEMPALACE_EMBEDDING_THREADS")
+        if raw is None:
+            raw = self._file_config.get("embedding_threads")
+        if raw is None or str(raw).strip().lower() in ("", "auto"):
+            return max(1, (os.cpu_count() or 2) // 2)
+        try:
+            val = int(str(raw).strip())
+        except (TypeError, ValueError):
+            return max(1, (os.cpu_count() or 2) // 2)
+        return val if val > 0 else 0
+
     def set_embedding_model(self, model: str) -> None:
         """Persist the embedding-model choice to ``config.json``.
 
@@ -749,6 +795,19 @@ def hook_desktop_toast(self):
         """Whether the stop hook shows a desktop notification via notify-send."""
         return self._file_config.get("hooks", {}).get("desktop_toast", False)
 
+    @property
+    def hook_use_daemon(self):
+        """Whether hooks should submit save/mine work to the opt-in daemon."""
+        env_val = os.environ.get("MEMPALACE_HOOKS_DAEMON")
+        if env_val is not None:
+            return env_val.lower() in ("true", "1", "yes", "on")
+        value = self._file_config.get("hooks", {}).get("daemon", False)
+        if isinstance(value, bool):
+            return value
+        if isinstance(value, str):
+            return value.lower() in ("true", "1", "yes", "on")
+        return value == 1
+
     def set_hook_setting(self, key: str, value: bool):
         """Update a hook setting and write config to disk."""
         if "hooks" not in self._file_config:
diff --git a/mempalace/daemon.py b/mempalace/daemon.py
new file mode 100644
index 000000000..3b9b45dca
--- /dev/null
+++ b/mempalace/daemon.py
@@ -0,0 +1,1149 @@
+"""Long-lived local daemon for queued MemPalace writes.
+
+Daemon mode is strictly opt-in. The default CLI, hooks, and MCP paths still use
+their direct execution behavior unless callers explicitly request daemon-backed
+execution.
+"""
+
+from __future__ import annotations
+
+import argparse
+import contextlib
+import json
+import os
+import secrets
+import sqlite3
+import subprocess
+import sys
+import threading
+import time
+import uuid
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any
+from urllib import error as urlerror
+from urllib import request as urlrequest
+from urllib.parse import parse_qs, urlparse
+
+from .config import MempalaceConfig
+
+HOST = "127.0.0.1"
+STATE_ROOT_ENV = "MEMPALACE_DAEMON_STATE_ROOT"
+DEFAULT_WAIT_TIMEOUT = 60.0 * 60.0
+# Liveness-probe timeout for the hook "is a daemon already running?" precheck.
+# Kept well under the ~500ms hook budget so a wedged daemon can't stall the hook
+# (it falls back to the direct/spawn path instead). A healthy local daemon
+# answers /health in single-digit ms, so this rarely false-negatives.
+HOOK_PROBE_TIMEOUT = 0.5
+TERMINAL_STATES = {"succeeded", "failed", "cancelled"}
+MAX_ATTEMPTS = 3
+MAX_BODY_BYTES = 1 << 20  # 1 MiB cap on request bodies (auth-gated DoS guard)
+SHUTDOWN_DRAIN_SECONDS = 10.0
+# Terminal jobs are kept for diagnostics then pruned so the queue DB (which
+# holds verbatim payloads) doesn't grow without bound across a long-lived
+# daemon. Override via env for operators who want a longer/shorter window.
+JOB_RETENTION_DAYS = int(os.environ.get("MEMPALACE_DAEMON_RETENTION_DAYS", "7") or "7")
+try:
+    import fcntl as _fcntl  # POSIX only; absent on Windows
+except ImportError:  # pragma: no cover - Windows fallback
+    _fcntl = None
+
+
+def _chmod_private(path: Path) -> None:
+    try:
+        os.chmod(str(path), 0o600)
+    except OSError:
+        pass
+
+
+def _chmod_dir_private(path: Path) -> None:
+    try:
+        os.chmod(str(path), 0o700)
+    except OSError:
+        pass
+
+
+class DaemonError(RuntimeError):
+    """Raised when daemon client operations fail."""
+
+
+def _now() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def canonical_palace_path(path: str | None = None) -> str:
+    value = path or MempalaceConfig().palace_path
+    return os.path.abspath(os.path.realpath(os.path.expanduser(value)))
+
+
+def palace_key(palace_path: str) -> str:
+    import hashlib
+
+    normalized = os.path.normcase(canonical_palace_path(palace_path))
+    return hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:24]
+
+
+def state_root() -> Path:
+    raw = os.environ.get(STATE_ROOT_ENV)
+    if raw:
+        return Path(raw).expanduser()
+    return Path.home() / ".mempalace" / "daemon"
+
+
+def state_dir(palace_path: str) -> Path:
+    return state_root() / palace_key(palace_path)
+
+
+def _write_private(path: Path, text: str) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fd = os.open(str(path), os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w", encoding="utf-8") as fh:
+        fh.write(text)
+
+
+def ensure_token(palace_path: str) -> str:
+    token_path = state_dir(palace_path) / "token"
+    if token_path.exists():
+        token = token_path.read_text(encoding="utf-8").strip()
+        if token:
+            return token
+    token = secrets.token_urlsafe(32)
+    _write_private(token_path, token + "\n")
+    return token
+
+
+def read_token(palace_path: str) -> str:
+    token_path = state_dir(palace_path) / "token"
+    try:
+        return token_path.read_text(encoding="utf-8").strip()
+    except OSError as exc:
+        raise DaemonError(f"daemon token not found for {palace_path}") from exc
+
+
+def endpoint_path(palace_path: str) -> Path:
+    return state_dir(palace_path) / "endpoint.json"
+
+
+def pid_path(palace_path: str) -> Path:
+    return state_dir(palace_path) / "pid"
+
+
+def queue_path(palace_path: str) -> Path:
+    return state_dir(palace_path) / "queue.sqlite3"
+
+
+def _read_endpoint(palace_path: str) -> dict[str, Any]:
+    try:
+        with open(endpoint_path(palace_path), encoding="utf-8") as fh:
+            return json.load(fh)
+    except (OSError, json.JSONDecodeError) as exc:
+        raise DaemonError("daemon endpoint not found") from exc
+
+
+def _pid_alive_windows(pid: int) -> bool:
+    """Liveness probe for Windows that never sends a console control event.
+
+    ``os.kill(pid, 0)`` is NOT a harmless existence check on Windows: signal 0
+    is ``signal.CTRL_C_EVENT``, so Python routes it to
+    ``GenerateConsoleCtrlEvent`` and sends a Ctrl-C to the target's process
+    group instead of probing the pid. On a process with an attached console
+    (e.g. a CI runner) that Ctrl-C is delivered back to *this* interpreter and
+    surfaces as a spurious ``KeyboardInterrupt`` — exactly the hang seen when
+    ``DaemonClient`` polled a same-process endpoint. Probe via the Win32 process
+    handle API instead, which has no signalling side effects.
+    """
+    import ctypes
+    from ctypes import wintypes
+
+    SYNCHRONIZE = 0x00100000
+    WAIT_TIMEOUT = 0x00000102
+    ERROR_ACCESS_DENIED = 5
+
+    kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
+    kernel32.OpenProcess.restype = wintypes.HANDLE
+    kernel32.OpenProcess.argtypes = (wintypes.DWORD, wintypes.BOOL, wintypes.DWORD)
+    kernel32.WaitForSingleObject.restype = wintypes.DWORD
+    kernel32.WaitForSingleObject.argtypes = (wintypes.HANDLE, wintypes.DWORD)
+    kernel32.CloseHandle.argtypes = (wintypes.HANDLE,)
+
+    handle = kernel32.OpenProcess(SYNCHRONIZE, False, int(pid))
+    if not handle:
+        # No handle: access-denied means the process exists but isn't ours to
+        # open; any other error (invalid parameter / not found) means it's gone.
+        return ctypes.get_last_error() == ERROR_ACCESS_DENIED
+    try:
+        # A live process is not signalled, so the zero-timeout wait returns
+        # WAIT_TIMEOUT; an exited process is signalled and returns WAIT_OBJECT_0.
+        return kernel32.WaitForSingleObject(handle, 0) == WAIT_TIMEOUT
+    finally:
+        kernel32.CloseHandle(handle)
+
+
+def _pid_alive(pid: int) -> bool:
+    if pid <= 0:
+        return False
+    if os.name == "nt":
+        try:
+            return _pid_alive_windows(pid)
+        except OSError:
+            # If the Win32 probe itself fails, assume alive rather than risk
+            # discarding a healthy endpoint — and never fall back to os.kill.
+            return True
+    try:
+        os.kill(pid, 0)
+    except ProcessLookupError:
+        return False
+    except PermissionError:
+        return True
+    except OSError:
+        return False
+    return True
+
+
+@dataclass
+class Job:
+    id: str
+    kind: str
+    payload: dict[str, Any]
+    state: str
+    priority: int
+    dedupe_key: str | None
+    created_at: str
+    started_at: str | None
+    finished_at: str | None
+    result: dict[str, Any] | None
+    error: dict[str, Any] | None
+    attempts: int
+
+
+class QueueStore:
+    def __init__(self, path: Path):
+        self.path = path
+        self.path.parent.mkdir(parents=True, exist_ok=True)
+        self._lock = threading.RLock()
+        self._init_db()
+
+    @contextlib.contextmanager
+    def _connect(self):
+        """Open a short-lived sqlite3 connection and close it on exit.
+
+        The bare ``with sqlite3.connect(...)`` context manager only manages the
+        transaction (commit/rollback) — it does NOT close the connection, so every
+        QueueStore call in this long-lived daemon process leaked a connection FD.
+        In a daemon that runs thousands of jobs that is an unbounded FD leak. This
+        wrapper closes the connection on exit so each call is self-contained.
+        """
+        conn = sqlite3.connect(str(self.path), timeout=30)
+        try:
+            conn.row_factory = sqlite3.Row
+            yield conn
+            conn.commit()
+        except Exception:
+            conn.rollback()
+            raise
+        finally:
+            conn.close()
+
+    def _init_db(self) -> None:
+        with self._connect() as conn:
+            conn.execute("PRAGMA journal_mode=WAL")
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS jobs (
+                    id TEXT PRIMARY KEY,
+                    kind TEXT NOT NULL,
+                    payload_json TEXT NOT NULL,
+                    state TEXT NOT NULL,
+                    priority INTEGER NOT NULL DEFAULT 0,
+                    dedupe_key TEXT,
+                    created_at TEXT NOT NULL,
+                    started_at TEXT,
+                    finished_at TEXT,
+                    result_json TEXT,
+                    error_json TEXT,
+                    attempts INTEGER NOT NULL DEFAULT 0
+                )
+                """
+            )
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_jobs_state ON jobs(state, priority)")
+            conn.execute("CREATE INDEX IF NOT EXISTS idx_jobs_dedupe ON jobs(dedupe_key, state)")
+            # Unique partial index: at most one queued/running job per dedupe_key.
+            # Enforces the dedupe invariant across processes (TOCTOU-safe); finished
+            # jobs drop out of the index so a later identical enqueue is allowed.
+            conn.execute(
+                "CREATE UNIQUE INDEX IF NOT EXISTS idx_jobs_dedupe_active "
+                "ON jobs(dedupe_key) WHERE state IN ('queued', 'running')"
+            )
+        # The queue DB holds verbatim payloads (diary text, source paths) — lock it
+        # down to owner-only regardless of the invoking user's umask. The WAL/SHM
+        # sidecars carry the same un-checkpointed payloads, so harden them too when
+        # present (the daemon also runs under a 0o077 umask; this covers any
+        # QueueStore opened outside that scope, e.g. the CLI `daemon jobs` path).
+        _chmod_private(self.path)
+        for sidecar_suffix in ("-wal", "-shm"):
+            sidecar = self.path.with_name(self.path.name + sidecar_suffix)
+            if sidecar.exists():
+                _chmod_private(sidecar)
+
+    def prune_terminal(self, older_than_days: int = JOB_RETENTION_DAYS) -> int:
+        """Delete terminal (succeeded/failed/cancelled) jobs older than the
+        retention window.
+
+        Bounded growth for the queue DB, which holds verbatim payloads. Only
+        terminal jobs are eligible — queued/running jobs are never touched, so
+        a crash mid-prune cannot drop in-flight work (incremental-only). The
+        cutoff uses ``finished_at``; a terminal job is never re-examined by
+        recover_running, so deleting it is safe.
+        """
+        if older_than_days <= 0:
+            return 0
+        cutoff = (datetime.now(timezone.utc) - timedelta(days=older_than_days)).isoformat()
+        with self._lock, self._connect() as conn:
+            cur = conn.execute(
+                """
+                DELETE FROM jobs
+                WHERE state IN ('succeeded', 'failed', 'cancelled')
+                  AND finished_at IS NOT NULL
+                  AND finished_at < ?
+                """,
+                (cutoff,),
+            )
+            return int(cur.rowcount or 0)
+
+    def recover_running(self) -> int:
+        """Re-queue jobs left ``running`` by a crashed/killed daemon.
+
+        Jobs that have already exhausted ``MAX_ATTEMPTS`` claims are dead-lettered
+        to ``failed`` instead of being retried — non-idempotent kinds (diary_write
+        derives its entry_id from wall-clock time) would otherwise duplicate
+        verbatim palace content on every restart, violating the incremental-only
+        principle. The last error_json is preserved for diagnostics.
+        """
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                """
+                UPDATE jobs
+                SET state = 'failed', finished_at = ?,
+                    error_json = COALESCE(error_json, ?)
+                WHERE state = 'running' AND attempts >= ?
+                """,
+                (
+                    _now(),
+                    json.dumps(
+                        {"error_class": "MaxAttemptsExceeded", "message": "max attempts exceeded"},
+                        ensure_ascii=False,
+                    ),
+                    MAX_ATTEMPTS,
+                ),
+            )
+            cur = conn.execute(
+                """
+                UPDATE jobs
+                SET state = 'queued', started_at = NULL
+                WHERE state = 'running' AND attempts < ?
+                """,
+                (MAX_ATTEMPTS,),
+            )
+            return int(cur.rowcount or 0)
+
+    def enqueue(
+        self,
+        kind: str,
+        payload: dict[str, Any],
+        *,
+        dedupe_key: str | None = None,
+        priority: int = 0,
+    ) -> Job:
+        payload_json = json.dumps(payload, ensure_ascii=False, sort_keys=True)
+        with self._lock, self._connect() as conn:
+            if dedupe_key:
+                row = conn.execute(
+                    """
+                    SELECT * FROM jobs
+                    WHERE dedupe_key = ? AND state IN ('queued', 'running')
+                    ORDER BY created_at DESC
+                    LIMIT 1
+                    """,
+                    (dedupe_key,),
+                ).fetchone()
+                if row is not None:
+                    return self._row_to_job(row)
+
+            job_id = uuid.uuid4().hex
+            try:
+                conn.execute(
+                    """
+                    INSERT INTO jobs (
+                        id, kind, payload_json, state, priority, dedupe_key, created_at, attempts
+                    ) VALUES (?, ?, ?, 'queued', ?, ?, ?, 0)
+                    """,
+                    (job_id, kind, payload_json, int(priority), dedupe_key, _now()),
+                )
+            except sqlite3.IntegrityError:
+                # Unique partial index beat us in a cross-process race — return the
+                # job that won. SELECT-then-INSERT is not atomic across processes; the
+                # index is the source of truth.
+                if not dedupe_key:
+                    raise
+                row = conn.execute(
+                    """
+                    SELECT * FROM jobs
+                    WHERE dedupe_key = ? AND state IN ('queued', 'running')
+                    ORDER BY created_at DESC
+                    LIMIT 1
+                    """,
+                    (dedupe_key,),
+                ).fetchone()
+                if row is None:
+                    # Index guard fired but the row is already gone — retry the INSERT.
+                    conn.execute(
+                        """
+                        INSERT INTO jobs (
+                            id, kind, payload_json, state, priority, dedupe_key, created_at, attempts
+                        ) VALUES (?, ?, ?, 'queued', ?, ?, ?, 0)
+                        """,
+                        (job_id, kind, payload_json, int(priority), dedupe_key, _now()),
+                    )
+                    row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
+                return self._row_to_job(row)
+            row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
+            return self._row_to_job(row)
+
+    def claim_next(self) -> Job | None:
+        # Atomic across processes: the UPDATE only fires if the row is still
+        # 'queued'. If two daemon processes SELECT the same row, the first to
+        # UPDATE it flips state to 'running' (rowcount=1); the second's UPDATE
+        # matches 0 rows (WHERE state='queued' is now false) and we re-loop
+        # instead of double-executing the job. The in-process RLock does not
+        # protect against a second OS process — this guard does.
+        with self._lock, self._connect() as conn:
+            row = conn.execute(
+                """
+                SELECT * FROM jobs
+                WHERE state = 'queued'
+                ORDER BY priority DESC, created_at ASC
+                LIMIT 1
+                """
+            ).fetchone()
+            if row is None:
+                return None
+            cur = conn.execute(
+                """
+                UPDATE jobs
+                SET state = 'running', started_at = ?, attempts = attempts + 1
+                WHERE id = ? AND state = 'queued'
+                """,
+                (_now(), row["id"]),
+            )
+            if cur.rowcount != 1:
+                # Lost the race to another process — nothing to run this iteration.
+                return None
+            claimed = conn.execute("SELECT * FROM jobs WHERE id = ?", (row["id"],)).fetchone()
+            return self._row_to_job(claimed)
+
+    def finish(
+        self,
+        job_id: str,
+        *,
+        state: str,
+        result: dict[str, Any] | None = None,
+        error: dict[str, Any] | None = None,
+        only_if_running: bool = False,
+    ) -> Job:
+        # ``only_if_running`` guards the worker's finish against a lost race with
+        # shutdown's cancel: if the active job was already flipped to 'cancelled'
+        # by _drain_and_cleanup, a late worker finish must NOT overwrite it back to
+        # 'succeeded'/'failed' (which would un-cancel a job recover_running must
+        # not re-run). The conditional UPDATE makes the worker's finish a no-op in
+        # that window instead of relying on process-exit timing.
+        where = "WHERE id = ?" + (" AND state = 'running'" if only_if_running else "")
+        with self._lock, self._connect() as conn:
+            conn.execute(
+                f"""
+                UPDATE jobs
+                SET state = ?, finished_at = ?, result_json = ?, error_json = ?
+                {where}
+                """,
+                (
+                    state,
+                    _now(),
+                    json.dumps(result or {}, ensure_ascii=False),
+                    json.dumps(error or {}, ensure_ascii=False) if error else None,
+                    job_id,
+                ),
+            )
+            row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
+            return self._row_to_job(row)
+
+    def get(self, job_id: str) -> Job:
+        with self._lock, self._connect() as conn:
+            row = conn.execute("SELECT * FROM jobs WHERE id = ?", (job_id,)).fetchone()
+            if row is None:
+                raise DaemonError(f"unknown job id: {job_id}")
+            return self._row_to_job(row)
+
+    def list(self, limit: int = 20) -> list[Job]:
+        with self._lock, self._connect() as conn:
+            rows = conn.execute(
+                "SELECT * FROM jobs ORDER BY created_at DESC LIMIT ?",
+                (max(1, int(limit)),),
+            ).fetchall()
+            return [self._row_to_job(row) for row in rows]
+
+    def counts(self) -> dict[str, int]:
+        with self._lock, self._connect() as conn:
+            rows = conn.execute("SELECT state, COUNT(*) AS n FROM jobs GROUP BY state").fetchall()
+            return {str(row["state"]): int(row["n"]) for row in rows}
+
+    @staticmethod
+    def _row_to_job(row: sqlite3.Row) -> Job:
+        def _loads(value):
+            if not value:
+                return None
+            try:
+                return json.loads(value)
+            except json.JSONDecodeError:
+                return None
+
+        return Job(
+            id=str(row["id"]),
+            kind=str(row["kind"]),
+            payload=_loads(row["payload_json"]) or {},
+            state=str(row["state"]),
+            priority=int(row["priority"]),
+            dedupe_key=row["dedupe_key"],
+            created_at=str(row["created_at"]),
+            started_at=row["started_at"],
+            finished_at=row["finished_at"],
+            result=_loads(row["result_json"]),
+            error=_loads(row["error_json"]),
+            attempts=int(row["attempts"]),
+        )
+
+
+def job_to_dict(job: Job, *, include_payload: bool = True) -> dict[str, Any]:
+    out = {
+        "id": job.id,
+        "kind": job.kind,
+        "state": job.state,
+        "priority": job.priority,
+        "dedupe_key": job.dedupe_key,
+        "created_at": job.created_at,
+        "started_at": job.started_at,
+        "finished_at": job.finished_at,
+        "result": job.result,
+        "error": job.error,
+        "attempts": job.attempts,
+    }
+    if include_payload:
+        out["payload"] = job.payload
+    return out
+
+
+class DaemonRuntime:
+    def __init__(self, palace_path: str, backend: str | None = None):
+        self.palace_path = canonical_palace_path(palace_path)
+        self.backend = backend
+        self.store = QueueStore(queue_path(self.palace_path))
+        self.shutdown_event = threading.Event()
+        self.worker_wake = threading.Event()
+        self.active_job_id: str | None = None
+        self.worker_thread: threading.Thread | None = None
+
+    def start_worker(self) -> threading.Thread:
+        self.store.recover_running()
+        # Bounded growth: drop terminal jobs older than the retention window
+        # before bringing the worker up. Best-effort — a prune failure must not
+        # block startup.
+        try:
+            self.store.prune_terminal()
+        except Exception:  # noqa: BLE001 - retention is best-effort, never fatal
+            pass
+        thread = threading.Thread(
+            target=self._worker_loop, name="mempalace-daemon-worker", daemon=True
+        )
+        self.worker_thread = thread
+        thread.start()
+        return thread
+
+    def worker_alive(self) -> bool:
+        return self.worker_thread is not None and self.worker_thread.is_alive()
+
+    def _safe_finish(self, job_id: str, *, state: str, result: dict, error: dict | None) -> None:
+        try:
+            # only_if_running: if shutdown already cancelled this job, don't
+            # resurrect it. A finish failure must not kill the worker regardless.
+            self.store.finish(job_id, state=state, result=result, error=error, only_if_running=True)
+        except Exception:  # noqa: BLE001 - a finish failure must not kill the worker
+            pass
+
+    def _worker_loop(self) -> None:
+        from .service import execute_job
+
+        while not self.shutdown_event.is_set():
+            try:
+                job = self.store.claim_next()
+            except Exception:  # noqa: BLE001 - sqlite/disk errors must not kill the worker
+                self.shutdown_event.wait(1.0)
+                continue
+            if job is None:
+                self.worker_wake.wait(0.5)
+                self.worker_wake.clear()
+                continue
+            self.active_job_id = job.id
+            try:
+                payload = dict(job.payload)
+                # Override, never trust the client: an authenticated request for
+                # palace A must not be able to retarget the daemon at palace B.
+                payload["palace_path"] = self.palace_path
+                if self.backend:
+                    payload["backend"] = self.backend
+                result = execute_job(job.kind, payload)
+                ok = bool(result.get("success", True))
+                state = "succeeded" if ok else "failed"
+                error = None if ok else {"message": result.get("error", "job failed")}
+                self._safe_finish(job.id, state=state, result=result, error=error)
+            except (Exception, SystemExit) as exc:
+                # SystemExit is BaseException, not Exception — catching it here is
+                # deliberate. Without it, a sys.exit() in a dependency would slip
+                # past `except Exception`, kill this worker thread, leave the job
+                # stuck in 'running' forever, and stall every later job while
+                # /health keeps reporting ok. (See mcp_server.py tool_mine for the
+                # same BaseException-slip-past semantics, documented in comments.)
+                self._safe_finish(
+                    job.id,
+                    state="failed",
+                    result={"success": False, "exit_code": 1},
+                    error={"error_class": type(exc).__name__, "message": str(exc)},
+                )
+            finally:
+                self.active_job_id = None
+
+
+def _json_response(handler: BaseHTTPRequestHandler, status: int, payload: dict[str, Any]) -> None:
+    body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+    handler.send_response(status)
+    handler.send_header("Content-Type", "application/json; charset=utf-8")
+    handler.send_header("Content-Length", str(len(body)))
+    handler.send_header("Connection", "close")
+    handler.end_headers()
+    handler.wfile.write(body)
+    handler.close_connection = True
+
+
+def run_server(palace_path: str, *, backend: str | None = None, port: int = 0) -> None:
+    palace_path = canonical_palace_path(palace_path)
+    previous_env = {
+        "MEMPALACE_PALACE_PATH": os.environ.get("MEMPALACE_PALACE_PATH"),
+        "MEMPALACE_BACKEND_EXPLICIT": os.environ.get("MEMPALACE_BACKEND_EXPLICIT"),
+        "MEMPALACE_BACKEND": os.environ.get("MEMPALACE_BACKEND"),
+    }
+    os.environ["MEMPALACE_PALACE_PATH"] = palace_path
+    if backend:
+        os.environ["MEMPALACE_BACKEND_EXPLICIT"] = backend
+        os.environ["MEMPALACE_BACKEND"] = backend
+    # Privacy by architecture: tighten the umask to owner-only BEFORE the queue
+    # DB is created. SQLite's WAL/SHM sidecars hold un-checkpointed verbatim
+    # payloads and are (re)created with the process umask on every open/close
+    # cycle, so the umask must already be tight when DaemonRuntime builds the
+    # QueueStore (its _init_db opens the DB in WAL mode) — not only once the HTTP
+    # server starts. Restored in the finally at the end of run_server.
+    prev_umask = os.umask(0o077)
+    token = ensure_token(palace_path)
+    runtime = DaemonRuntime(palace_path, backend=backend)
+
+    class _Handler(BaseHTTPRequestHandler):
+        protocol_version = "HTTP/1.1"
+        timeout = 10
+
+        def log_message(self, fmt, *args):  # pragma: no cover - stdlib access logging noise
+            return
+
+        def _authorized(self) -> bool:
+            auth = self.headers.get("Authorization")
+            if auth and secrets.compare_digest(auth, f"Bearer {token}"):
+                return True
+            _json_response(self, 401, {"error": "unauthorized"})
+            return False
+
+        def _read_json(self) -> dict[str, Any]:
+            length = int(self.headers.get("Content-Length", "0") or "0")
+            # Reject a negative Content-Length explicitly: self.rfile.read(-1)
+            # would read until the client closes the connection, blocking the
+            # worker and bypassing the MAX_BODY_BYTES cap (an auth-gated DoS).
+            if length < 0:
+                raise ValueError("invalid Content-Length")
+            if length > MAX_BODY_BYTES:
+                raise ValueError("request body too large")
+            raw = self.rfile.read(length)
+            return json.loads(raw.decode("utf-8")) if raw else {}
+
+        def do_GET(self):
+            if not self._authorized():
+                return
+            try:
+                self._handle_get()
+            except Exception as exc:  # noqa: BLE001 - malformed query/DB error → 400
+                _json_response(self, 400, {"error": str(exc)})
+
+        def _handle_get(self):
+            parsed = urlparse(self.path)
+            if parsed.path == "/health":
+                _json_response(
+                    self,
+                    200,
+                    {
+                        "ok": True,
+                        "worker_alive": runtime.worker_alive(),
+                        "pid": os.getpid(),
+                        "palace_path": runtime.palace_path,
+                        "backend": runtime.backend,
+                        "active_job_id": runtime.active_job_id,
+                        "counts": runtime.store.counts(),
+                    },
+                )
+                return
+            if parsed.path == "/jobs":
+                qs = parse_qs(parsed.query)
+                limit = int((qs.get("limit") or ["20"])[0])
+                jobs = [
+                    job_to_dict(job, include_payload=False) for job in runtime.store.list(limit)
+                ]
+                _json_response(self, 200, {"jobs": jobs})
+                return
+            if parsed.path.startswith("/jobs/"):
+                job_id = parsed.path.rsplit("/", 1)[-1]
+                try:
+                    job = runtime.store.get(job_id)
+                except DaemonError as exc:
+                    _json_response(self, 404, {"error": str(exc)})
+                    return
+                # Payloads carry verbatim user content (diary text) — do not return
+                # them over HTTP unless the caller explicitly opts in.
+                qs = parse_qs(parsed.query)
+                include_payload = qs.get("include_payload", ["false"])[0].lower() in (
+                    "1",
+                    "true",
+                    "yes",
+                    "on",
+                )
+                _json_response(
+                    self, 200, {"job": job_to_dict(job, include_payload=include_payload)}
+                )
+                return
+            _json_response(self, 404, {"error": "not found"})
+
+        def do_POST(self):
+            if not self._authorized():
+                return
+            parsed = urlparse(self.path)
+            if parsed.path == "/jobs":
+                try:
+                    body = self._read_json()
+                    job = runtime.store.enqueue(
+                        str(body.get("kind") or ""),
+                        body.get("payload") or {},
+                        dedupe_key=body.get("dedupe_key"),
+                        priority=int(body.get("priority") or 0),
+                    )
+                    runtime.worker_wake.set()
+                except Exception as exc:  # noqa: BLE001 - client gets structured failure
+                    _json_response(self, 400, {"error": str(exc)})
+                    return
+                _json_response(self, 202, {"job": job_to_dict(job)})
+                return
+            if parsed.path == "/shutdown":
+                _json_response(self, 200, {"ok": True})
+                runtime.shutdown_event.set()
+                threading.Thread(target=httpd.shutdown, daemon=True).start()
+                return
+            _json_response(self, 404, {"error": "not found"})
+
+    class _Server(ThreadingHTTPServer):
+        daemon_threads = True
+        allow_reuse_address = True
+
+        def server_bind(self):
+            # http.server's HTTPServer.server_bind() calls socket.getfqdn(host)
+            # to set server_name — a reverse-DNS lookup. For our 127.0.0.1 bind
+            # that lookup is useless, and on a host with slow or absent reverse
+            # DNS it blocks daemon startup for ~30s (until the resolver times
+            # out), which looks exactly like the daemon never coming up. Bind via
+            # TCPServer directly and set the name from the literal host instead.
+            import socketserver
+
+            socketserver.TCPServer.server_bind(self)
+            host, port = self.server_address[:2]
+            self.server_name = host
+            self.server_port = port
+
+    # The owner-only umask set above (before DaemonRuntime built the queue DB)
+    # covers every file this process creates — queue.sqlite3, its WAL/SHM
+    # sidecars, and any future artifact — and is restored in the finally below.
+    try:
+        with _Server((HOST, port), _Handler) as httpd:
+            actual_port = int(httpd.server_address[1])
+            sd = state_dir(palace_path)
+            sd.mkdir(parents=True, exist_ok=True)
+            _chmod_dir_private(sd)
+            endpoint = {
+                "host": HOST,
+                "port": actual_port,
+                "pid": os.getpid(),
+                "palace_path": palace_path,
+                "started_at": _now(),
+            }
+            _write_private(endpoint_path(palace_path), json.dumps(endpoint, indent=2) + "\n")
+            _write_private(pid_path(palace_path), f"{os.getpid()}\n")
+            runtime.start_worker()
+            try:
+                httpd.serve_forever(poll_interval=0.5)
+            finally:
+                _drain_and_cleanup(runtime, palace_path, previous_env)
+    finally:
+        os.umask(prev_umask)
+
+
+def _drain_and_cleanup(
+    runtime: "DaemonRuntime", palace_path: str, previous_env: dict[str, str | None]
+) -> None:
+    """Drain the active job, then tear down server-side state.
+
+    Killing a daemon thread mid-write (mid mine upsert, mid irreversible sync
+    DELETE) violates incremental-only. Give the worker a bounded window to
+    finish, then mark whatever is still running as cancelled so recover_running
+    won't blindly re-run it on the next start (which would duplicate verbatim
+    content). Finally restore the env vars run_server mutated.
+    """
+    runtime.shutdown_event.set()
+    worker = runtime.worker_thread
+    if worker is not None:
+        worker.join(timeout=SHUTDOWN_DRAIN_SECONDS)
+    active = runtime.active_job_id
+    if active:
+        runtime._safe_finish(
+            active,
+            state="cancelled",
+            result={"success": False, "exit_code": 1},
+            error={"message": "cancelled by daemon shutdown"},
+        )
+    for stale in (endpoint_path(palace_path), pid_path(palace_path)):
+        try:
+            stale.unlink()
+        except OSError:
+            pass
+    for key, value in previous_env.items():
+        if value is None:
+            os.environ.pop(key, None)
+        else:
+            os.environ[key] = value
+
+
+class DaemonClient:
+    def __init__(self, palace_path: str):
+        self.palace_path = canonical_palace_path(palace_path)
+        endpoint = _read_endpoint(self.palace_path)
+        port = endpoint.get("port")
+        if port is None:
+            raise DaemonError("daemon endpoint missing port")
+        # Don't read the token until we trust the endpoint points at a live
+        # process we started: a stale endpoint whose pid is dead may have its
+        # port reused by an unrelated process, and we must not send our bearer
+        # token there.
+        pid = endpoint.get("pid")
+        if pid is not None and not _pid_alive(int(pid)):
+            raise DaemonError("daemon endpoint pid is not alive")
+        self.token = read_token(self.palace_path)
+        self.host = endpoint.get("host") or HOST
+        self.port = int(port)
+        # The daemon is always on 127.0.0.1, so a request must never go through
+        # an HTTP proxy. Building an opener with an empty ProxyHandler bypasses
+        # urllib's proxy discovery entirely. On macOS that discovery
+        # (urllib.request._scproxy, via the SystemConfiguration framework) runs
+        # on the first request to any host and is NOT bounded by the per-request
+        # timeout — on a CI runner with no network it can hang for tens of
+        # seconds, which looks exactly like the daemon never came up. A no-proxy
+        # opener is the correct production choice here and also removes that hang.
+        self._opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
+
+    @property
+    def base_url(self) -> str:
+        return f"http://{self.host}:{self.port}"
+
+    def request(
+        self,
+        method: str,
+        path: str,
+        body: dict[str, Any] | None = None,
+        *,
+        timeout: float = 5.0,
+    ) -> dict[str, Any]:
+        data = None if body is None else json.dumps(body, ensure_ascii=False).encode("utf-8")
+        req = urlrequest.Request(
+            self.base_url + path,
+            data=data,
+            method=method,
+            headers={
+                "Authorization": f"Bearer {self.token}",
+                "Content-Type": "application/json",
+            },
+        )
+        try:
+            with self._opener.open(req, timeout=timeout) as resp:
+                raw = resp.read().decode("utf-8")
+        except urlerror.HTTPError as exc:
+            raw = exc.read().decode("utf-8", errors="replace")
+            try:
+                payload = json.loads(raw)
+            except json.JSONDecodeError:
+                payload = {"error": raw or str(exc)}
+            raise DaemonError(str(payload.get("error", exc))) from exc
+        except OSError as exc:
+            raise DaemonError(str(exc)) from exc
+        if not raw:
+            return {}
+        try:
+            return json.loads(raw)
+        except json.JSONDecodeError as exc:
+            # A 2xx response with a non-JSON body (empty 200, truncated write,
+            # proxy HTML) shouldn't surface as a bare JSONDecodeError to callers
+            # that only know how to handle DaemonError.
+            raise DaemonError(f"daemon returned non-JSON response: {raw[:200]!r}") from exc
+
+    def health(self, *, timeout: float = 5.0) -> dict[str, Any]:
+        return self.request("GET", "/health", timeout=timeout)
+
+    def submit(
+        self,
+        kind: str,
+        payload: dict[str, Any],
+        *,
+        dedupe_key: str | None = None,
+        priority: int = 0,
+    ) -> dict[str, Any]:
+        return self.request(
+            "POST",
+            "/jobs",
+            {"kind": kind, "payload": payload, "dedupe_key": dedupe_key, "priority": priority},
+        )["job"]
+
+    def get_job(self, job_id: str) -> dict[str, Any]:
+        return self.request("GET", f"/jobs/{job_id}")["job"]
+
+    def list_jobs(self, limit: int = 20) -> list[dict[str, Any]]:
+        return self.request("GET", f"/jobs?limit={int(limit)}")["jobs"]
+
+    def wait(self, job_id: str, *, timeout: float = DEFAULT_WAIT_TIMEOUT) -> dict[str, Any]:
+        deadline = time.monotonic() + timeout
+        while True:
+            job = self.get_job(job_id)
+            if job["state"] in TERMINAL_STATES:
+                return job
+            if time.monotonic() >= deadline:
+                raise DaemonError(f"timed out waiting for job {job_id}")
+            time.sleep(0.2)
+
+    def shutdown(self) -> dict[str, Any]:
+        return self.request("POST", "/shutdown", {})
+
+
+def get_client_if_running(palace_path: str, *, health_timeout: float = 5.0) -> DaemonClient | None:
+    # health_timeout bounds the liveness probe. Hook callers (subject to the
+    # ~500ms hook budget) pass a short value via HOOK_PROBE_TIMEOUT so a wedged
+    # daemon — endpoint present, HTTP server not answering — can't stall the
+    # hook for the default 5s before it falls back to the direct path.
+    try:
+        client = DaemonClient(palace_path)
+        client.health(timeout=health_timeout)
+        return client
+    except DaemonError:
+        return None
+
+
+def _detached_kwargs(log_path: Path) -> dict[str, Any]:
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    log_fh = open(log_path, "a", encoding="utf-8")
+    # The daemon log may capture verbatim content in tracebacks — owner-only.
+    _chmod_private(log_path)
+    kwargs: dict[str, Any] = {
+        "stdin": subprocess.DEVNULL,
+        "stdout": log_fh,
+        "stderr": log_fh,
+        "close_fds": True,
+    }
+    if os.name == "nt":
+        flags = 0
+        for name in ("DETACHED_PROCESS", "CREATE_NEW_PROCESS_GROUP", "CREATE_BREAKAWAY_FROM_JOB"):
+            flags |= getattr(subprocess, name, 0)
+        if flags:
+            kwargs["creationflags"] = flags
+    else:
+        kwargs["start_new_session"] = True
+    return kwargs
+
+
+def start_daemon(
+    palace_path: str,
+    *,
+    backend: str | None = None,
+    foreground: bool = False,
+    timeout: float = 15.0,
+) -> DaemonClient:
+    palace_path = canonical_palace_path(palace_path)
+    ensure_token(palace_path)
+    existing = get_client_if_running(palace_path)
+    if existing is not None:
+        return existing
+    if foreground:
+        # Blocks until the daemon stops. A clean stop is a normal exit, not an
+        # error — return None so the caller (cmd_daemon) exits 0.
+        run_server(palace_path, backend=backend, port=0)
+        return None  # type: ignore[return-value]
+
+    sd = state_dir(palace_path)
+    sd.mkdir(parents=True, exist_ok=True)
+    _chmod_dir_private(sd)
+
+    # Spawn mutual exclusion: two concurrent `daemon start` callers would both
+    # observe no running daemon and both spawn a child, double-claiming jobs.
+    # A non-blocking flock serializes the check-then-spawn; the loser waits for
+    # the winner to finish coming up, then re-checks and reuses that daemon.
+    lock_fh = open(sd / "start.lock", "w") if _fcntl is not None else None
+    if _fcntl is not None and lock_fh is not None:
+        _chmod_private(sd / "start.lock")
+        try:
+            _fcntl.flock(lock_fh.fileno(), _fcntl.LOCK_EX | _fcntl.LOCK_NB)
+        except OSError:
+            # Another start is in flight — wait for it, then reuse its daemon.
+            _fcntl.flock(lock_fh.fileno(), _fcntl.LOCK_EX)
+            existing = get_client_if_running(palace_path)
+            if existing is not None:
+                return existing
+            # The other starter failed without bringing the daemon up; fall
+            # through and spawn ourselves (we now hold the lock).
+
+    for stale in (endpoint_path(palace_path), pid_path(palace_path)):
+        try:
+            stale.unlink()
+        except OSError:
+            pass
+    cmd = [
+        sys.executable,
+        "-m",
+        "mempalace.daemon",
+        "serve",
+        "--palace",
+        palace_path,
+    ]
+    if backend:
+        cmd.extend(["--backend", backend])
+    env = os.environ.copy()
+    if STATE_ROOT_ENV in os.environ:
+        env[STATE_ROOT_ENV] = os.environ[STATE_ROOT_ENV]
+    kwargs = _detached_kwargs(sd / "daemon.log")
+    proc = None
+    try:
+        proc = subprocess.Popen(cmd, env=env, **kwargs)
+    finally:
+        log_fh = kwargs.get("stdout")
+        if hasattr(log_fh, "close"):
+            log_fh.close()
+    try:
+        deadline = time.monotonic() + timeout
+        last_error = None
+        while time.monotonic() < deadline:
+            if proc.poll() is not None:
+                raise DaemonError(f"daemon exited during startup with code {proc.returncode}")
+            try:
+                client = DaemonClient(palace_path)
+                client.health()
+                return client
+            except DaemonError as exc:
+                last_error = exc
+                time.sleep(0.1)
+        raise DaemonError(f"daemon did not become ready: {last_error}")
+    except BaseException:
+        # Readiness failed — don't leak an orphaned detached child holding the
+        # port, token, queue, and log handle. Kill and reap it before raising.
+        if proc is not None and proc.poll() is None:
+            try:
+                proc.kill()
+                proc.wait()
+            except Exception:  # noqa: BLE001 - cleanup best-effort
+                pass
+        raise
+    finally:
+        if lock_fh is not None:
+            try:
+                lock_fh.close()
+            except Exception:  # noqa: BLE001 - cleanup best-effort
+                pass
+
+
+def ensure_client(
+    palace_path: str, *, backend: str | None = None, auto_start: bool = True
+) -> DaemonClient:
+    palace_path = canonical_palace_path(palace_path)
+    client = get_client_if_running(palace_path)
+    if client is not None:
+        return client
+    if not auto_start:
+        raise DaemonError("daemon is not running")
+    return start_daemon(palace_path, backend=backend)
+
+
+def submit_job(
+    kind: str,
+    payload: dict[str, Any],
+    *,
+    palace_path: str | None = None,
+    backend: str | None = None,
+    dedupe_key: str | None = None,
+    priority: int = 0,
+    wait: bool = True,
+    auto_start: bool = False,
+    timeout: float = DEFAULT_WAIT_TIMEOUT,
+) -> dict[str, Any]:
+    # Strictly opt-in: callers that want the daemon auto-started must say so
+    # explicitly (the CLI --daemon path passes auto_start=True). The default
+    # refuses to spawn a long-lived process on a background code path.
+    resolved_palace = canonical_palace_path(palace_path or payload.get("palace_path"))
+    payload = dict(payload)
+    payload["palace_path"] = resolved_palace  # override, never trust client input
+    if backend:
+        payload["backend"] = backend
+    client = ensure_client(resolved_palace, backend=backend, auto_start=auto_start)
+    job = client.submit(kind, payload, dedupe_key=dedupe_key, priority=priority)
+    if not wait:
+        return job
+    return client.wait(job["id"], timeout=timeout)
+
+
+def stop_daemon(palace_path: str) -> bool:
+    client = get_client_if_running(palace_path)
+    if client is None:
+        return False
+    client.shutdown()
+    return True
+
+
+def _cmd_serve(args) -> None:
+    run_server(args.palace, backend=args.backend, port=args.port)
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = argparse.ArgumentParser(description="MemPalace daemon internals")
+    sub = parser.add_subparsers(dest="command", required=True)
+    serve = sub.add_parser("serve")
+    serve.add_argument("--palace", required=True)
+    serve.add_argument("--backend", default=None)
+    serve.add_argument("--port", type=int, default=0)
+    args = parser.parse_args(argv)
+    if args.command == "serve":
+        _cmd_serve(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/mempalace/embedding.py b/mempalace/embedding.py
index 9dfb5861e..8952f91ee 100644
--- a/mempalace/embedding.py
+++ b/mempalace/embedding.py
@@ -32,6 +32,7 @@
 from __future__ import annotations
 
 import logging
+import os
 import threading
 from typing import Optional
 
@@ -112,6 +113,35 @@ def _resolve_providers(device: str) -> tuple[list, str]:
     return (requested, device)
 
 
+def _intra_op_session_options(intra_op_num_threads: int):
+    """Build ORT ``SessionOptions`` capping the intra-op thread pool (#1068).
+
+    Returns ``None`` when ``intra_op_num_threads <= 0`` so the caller leaves
+    ORT at its default (≈ physical core count). ChromaDB's embedder ignores
+    ``OMP_NUM_THREADS`` — ORT owns its own intra-op pool, settable only via
+    ``SessionOptions`` at session construction — so a cap has to be threaded
+    through here rather than via the environment.
+    """
+    if not intra_op_num_threads or intra_op_num_threads <= 0:
+        return None
+    import onnxruntime as ort
+
+    so = ort.SessionOptions()
+    so.intra_op_num_threads = intra_op_num_threads
+    return so
+
+
+def _resolve_intra_op_threads() -> int:
+    """Read the configured ORT intra-op thread cap (``0`` = uncapped, #1068)."""
+    try:
+        from .config import MempalaceConfig
+
+        return MempalaceConfig().embedding_threads
+    except Exception:
+        logger.debug("embedding_threads resolution failed; leaving ORT default", exc_info=True)
+        return 0
+
+
 def _build_ef_class():
     """Subclass ``ONNXMiniLM_L6_V2`` with name ``"default"``.
 
@@ -122,13 +152,51 @@ def _build_ef_class():
     palaces created with ``DefaultEmbeddingFunction`` *and* palaces we
     create ourselves, with the same GPU-capable ``preferred_providers``.
     """
+    from functools import cached_property
+
     from chromadb.utils.embedding_functions import ONNXMiniLM_L6_V2
 
     class _MempalaceONNX(ONNXMiniLM_L6_V2):
+        def __init__(self, preferred_providers=None, intra_op_num_threads=0):
+            super().__init__(preferred_providers=preferred_providers)
+            self._intra_op_num_threads = intra_op_num_threads
+
         @staticmethod
         def name() -> str:
             return "default"
 
+        @cached_property
+        def model(self):
+            # Upstream builds the InferenceSession with no intra-op thread cap,
+            # so ORT defaults its pool to the physical core count and a
+            # background mine pins every core (#1068). Rebuild the session the
+            # same way upstream does (same SessionOptions, same CoreML pruning,
+            # same model path) but with our cap applied. If upstream's
+            # internals shift, fall back to its uncapped build so embedding
+            # still works.
+            cap = getattr(self, "_intra_op_num_threads", 0)
+            if not cap or cap <= 0:
+                return super().model
+            try:
+                ort = self.ort
+                providers = self._preferred_providers or ort.get_available_providers()
+                providers = [p for p in providers if p != "CoreMLExecutionProvider"]
+                so = ort.SessionOptions()
+                so.log_severity_level = 3
+                so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+                so.intra_op_num_threads = cap
+                return ort.InferenceSession(
+                    os.path.join(self.DOWNLOAD_PATH, self.EXTRACTED_FOLDER_NAME, "model.onnx"),
+                    providers=providers,
+                    sess_options=so,
+                )
+            except Exception:
+                logger.warning(
+                    "thread-capped ORT session build failed; using ORT defaults",
+                    exc_info=True,
+                )
+                return super().model
+
     return _MempalaceONNX
 
 
@@ -173,13 +241,19 @@ def name() -> str:
         # when switching models. Keep it stable.
         return "embeddinggemma_300m"
 
-    def __init__(self, preferred_providers=None, batch_size: int = _EMBEDDINGGEMMA_BATCH_SIZE):
+    def __init__(
+        self,
+        preferred_providers=None,
+        batch_size: int = _EMBEDDINGGEMMA_BATCH_SIZE,
+        intra_op_num_threads: int = 0,
+    ):
         if batch_size < 1:
             raise ValueError(f"batch_size must be >= 1, got {batch_size}")
         self._providers = (
             list(preferred_providers) if preferred_providers else ["CPUExecutionProvider"]
         )
         self._batch_size = batch_size
+        self._intra_op_num_threads = intra_op_num_threads
         self._session = None
         self._tokenizer = None
         self._np = None
@@ -221,7 +295,11 @@ def _lazy_load(self) -> None:
             )
             tok_path = hf_hub_download(_EMBEDDINGGEMMA_REPO, filename="tokenizer.json")
 
-            session = ort.InferenceSession(model_path, providers=self._providers)
+            session = ort.InferenceSession(
+                model_path,
+                sess_options=_intra_op_session_options(self._intra_op_num_threads),
+                providers=self._providers,
+            )
             out_names = [o.name for o in session.get_outputs()]
             # Model card: sentence_embedding is the pooled output (last_hidden_state
             # is the per-token output we don't want).
@@ -309,12 +387,13 @@ def get_embedding_function(device: Optional[str] = None, model: Optional[str] =
         if cached is not None:
             return cached
 
+        threads = _resolve_intra_op_threads()
         if model == "embeddinggemma":
-            ef = EmbeddinggemmaONNX(preferred_providers=providers)
+            ef = EmbeddinggemmaONNX(preferred_providers=providers, intra_op_num_threads=threads)
         else:
             # Default: minilm (or anything we don't recognize — back-compat win).
             ef_cls = _build_ef_class()
-            ef = ef_cls(preferred_providers=providers)
+            ef = ef_cls(preferred_providers=providers, intra_op_num_threads=threads)
 
         _EF_CACHE[cache_key] = ef
     logger.info(
diff --git a/mempalace/hooks_cli.py b/mempalace/hooks_cli.py
index 3b86477e2..426b29ff1 100644
--- a/mempalace/hooks_cli.py
+++ b/mempalace/hooks_cli.py
@@ -1,8 +1,8 @@
 """
-Hook logic for MemPalace — Python implementation of session-start, stop, and precompact hooks.
+Hook logic for MemPalace — Python implementation of session-start, stop, session-end, and precompact hooks.
 
 Reads JSON from stdin, outputs JSON to stdout.
-Supported hooks: session-start, stop, precompact
+Supported hooks: session-start, stop, session-end, precompact
 Supported harnesses: claude-code, codex (extensible to cursor, gemini, etc.)
 """
 
@@ -25,7 +25,7 @@
 
 
 def _detached_popen_kwargs() -> dict:
-    """Kwargs that fully detach a Popen child so the hook process can exit.
+    """Kwargs that give a Popen child a hidden console so the hook can exit.
 
     Without these, Windows holds the parent open until the child closes the
     inherited stdout/stderr handles — manifesting as "Stop hook hangs" at
@@ -36,7 +36,7 @@ def _detached_popen_kwargs() -> dict:
     kwargs: dict = {"stdin": subprocess.DEVNULL, "close_fds": True}
     if os.name == "nt":
         flags = 0
-        for name in ("DETACHED_PROCESS", "CREATE_NEW_PROCESS_GROUP", "CREATE_BREAKAWAY_FROM_JOB"):
+        for name in ("CREATE_NO_WINDOW", "CREATE_NEW_PROCESS_GROUP", "CREATE_BREAKAWAY_FROM_JOB"):
             flags |= getattr(subprocess, name, 0)
         if flags:
             kwargs["creationflags"] = flags
@@ -509,6 +509,73 @@ def _spawn_mine(cmd: list) -> None:
         pass
 
 
+def _hooks_daemon_enabled() -> bool:
+    try:
+        return MempalaceConfig().hook_use_daemon is True
+    except Exception:
+        return False
+
+
+def _daemon_mine_dedupe_key(source: str, mode: str) -> str:
+    try:
+        source_key = str(Path(source).expanduser().resolve())
+    except OSError:
+        source_key = str(Path(source).expanduser())
+    return f"hook:mine:{mode}:{source_key}"
+
+
+def _daemon_available() -> bool:
+    """True iff a daemon is already running for the configured palace.
+
+    This is a fast localhost health check, not a spawn: the 500ms hook budget
+    forbids auto-starting a python subprocess from a hook (cold start is
+    ~15s). Daemon mode for hooks requires the user to have started the daemon
+    explicitly via `mempalace daemon start`; when it isn't up, hooks fall back
+    to the existing direct (in-process / spawn) path instead of blocking.
+    """
+    from .daemon import HOOK_PROBE_TIMEOUT, get_client_if_running
+
+    try:
+        return (
+            get_client_if_running(MempalaceConfig().palace_path, health_timeout=HOOK_PROBE_TIMEOUT)
+            is not None
+        )
+    except Exception:
+        return False
+
+
+def _submit_daemon_job(
+    kind: str,
+    payload: dict,
+    *,
+    dedupe_key: str = None,
+    priority: int = 0,
+    wait: bool = False,
+    timeout: float = 60.0,
+):
+    """Submit to an already-running daemon. Never auto-starts (see _daemon_available).
+
+    Raises DaemonError on a real failure (job rejected, timeout, daemon died
+    mid-submit). Callers must NOT fall back to the direct path on such errors —
+    the daemon may already have accepted the job, and re-running it would
+    duplicate verbatim content. Only an absent daemon (handled by the caller's
+    _daemon_available() precheck) should fall back.
+    """
+    from .daemon import submit_job
+
+    palace_path = MempalaceConfig().palace_path
+    return submit_job(
+        kind,
+        payload,
+        palace_path=palace_path,
+        dedupe_key=dedupe_key,
+        priority=priority,
+        wait=wait,
+        auto_start=False,
+        timeout=timeout,
+    )
+
+
 def _maybe_auto_ingest():
     """Background-mine MEMPAL_DIR (project files) if set.
 
@@ -527,9 +594,26 @@ def _maybe_auto_ingest():
         return
     for mine_dir, mode in targets:
         try:
+            if _hooks_daemon_enabled() and _daemon_available():
+                try:
+                    _submit_daemon_job(
+                        "mine",
+                        {"source": mine_dir, "mode": mode, "agent": "mempalace"},
+                        dedupe_key=_daemon_mine_dedupe_key(mine_dir, mode),
+                        wait=False,
+                    )
+                except Exception as exc:
+                    # Daemon accepted context — don't fall back (would double-mine).
+                    _log(f"Daemon mine submission failed: {exc}")
+                continue
             _spawn_mine([_mempalace_python(), "-m", "mempalace", "mine", mine_dir, "--mode", mode])
         except OSError:
             pass
+        except Exception as exc:
+            # Non-daemon spawn path failed. Hooks must never crash the user's
+            # shell — log and continue. Do not label this a daemon failure: the
+            # daemon block above handles its own errors with its own message.
+            _log(f"mine hook failed: {exc}")
 
 
 def _mine_sync():
@@ -546,6 +630,22 @@ def _mine_sync():
     log_path = STATE_DIR / "hook.log"
     for mine_dir, mode in targets:
         try:
+            if _hooks_daemon_enabled() and _daemon_available():
+                try:
+                    job = _submit_daemon_job(
+                        "mine",
+                        {"source": mine_dir, "mode": mode, "agent": "mempalace"},
+                        dedupe_key=_daemon_mine_dedupe_key(mine_dir, mode),
+                        wait=True,
+                        timeout=60,
+                    )
+                    result = job.get("result") or {}
+                    if job.get("state") != "succeeded" or not result.get("success", True):
+                        _log(f"Daemon sync mine failed: {result.get('error', job.get('error'))}")
+                except Exception as exc:
+                    # Daemon accepted context — don't fall back (would double-mine).
+                    _log(f"Daemon sync mine submission failed: {exc}")
+                continue
             with open(log_path, "a") as log_f:
                 subprocess.run(
                     [
@@ -563,6 +663,11 @@ def _mine_sync():
                 )
         except (OSError, subprocess.TimeoutExpired):
             pass
+        except Exception as exc:
+            # Non-daemon sync spawn path failed. Hooks must never crash the
+            # user's shell — log and continue (not a daemon failure; the daemon
+            # block above handles its own errors).
+            _log(f"mine hook failed: {exc}")
 
 
 def _desktop_toast(body: str, title: str = "MemPalace"):
@@ -680,6 +785,41 @@ def _save_diary_direct(
     )
 
     try:
+        if _hooks_daemon_enabled() and _daemon_available():
+            try:
+                job = _submit_daemon_job(
+                    "diary_write",
+                    {
+                        "agent_name": agent_name,
+                        "entry": entry,
+                        "topic": "checkpoint",
+                        "wing": wing,
+                    },
+                    priority=10,
+                    wait=True,
+                    timeout=30,
+                )
+            except Exception as exc:
+                # Daemon accepted context — don't fall back (would double-write).
+                _log(f"Daemon diary checkpoint failed: {exc}")
+                return {"count": 0}
+            result = job.get("result") or {}
+            if job.get("state") == "succeeded" and result.get("success"):
+                _log(f"Diary checkpoint saved: {result.get('entry_id', '?')}")
+                try:
+                    ack_file = STATE_DIR / "last_checkpoint"
+                    ack_file.write_text(
+                        json.dumps({"msgs": len(messages), "ts": now.isoformat()}),
+                        encoding="utf-8",
+                    )
+                except OSError:
+                    pass
+                if toast:
+                    _desktop_toast(f"Checkpoint saved - {len(messages)} messages archived")
+                return {"count": len(messages), "themes": themes}
+            _log(f"Daemon diary checkpoint failed: {result.get('error', job.get('error'))}")
+            return {"count": 0}
+
         from .mcp_server import tool_diary_write
 
         result = tool_diary_write(
@@ -721,6 +861,25 @@ def _ingest_transcript(transcript_path: str):
         return
 
     try:
+        if _hooks_daemon_enabled() and _daemon_available():
+            try:
+                _submit_daemon_job(
+                    "mine",
+                    {
+                        "source": str(path.parent),
+                        "mode": "convos",
+                        "wing": "sessions",
+                        "agent": "mempalace",
+                    },
+                    dedupe_key=_daemon_mine_dedupe_key(str(path.parent), "convos"),
+                    wait=False,
+                )
+                _log(f"Transcript ingest submitted to daemon: {path.name}")
+            except Exception as exc:
+                # Daemon accepted context — don't fall back (would double-mine).
+                _log(f"Daemon transcript ingest failed: {exc}")
+            return
+
         # Route through ``_spawn_mine`` so the per-target PID guard kicks
         # in here too — repeated Stop/PreCompact fires for the same
         # transcript should not stack up parallel ingest mines.
@@ -740,6 +899,11 @@ def _ingest_transcript(transcript_path: str):
         _log(f"Transcript ingest started: {path.name}")
     except OSError:
         pass
+    except Exception as exc:
+        # Non-daemon ingest spawn path failed. Hooks must never crash the
+        # user's shell — log and continue (not a daemon failure; the daemon
+        # block above handles its own errors).
+        _log(f"transcript ingest hook failed: {exc}")
 
 
 SUPPORTED_HARNESSES = {"claude-code", "codex"}
@@ -1021,6 +1185,116 @@ def hook_session_start(data: dict, harness: str):
     _output({})
 
 
+def _clear_session_last_save(session_id: str) -> None:
+    """Drop the per-session save marker once a session has ended.
+
+    ``hook_stop`` writes ``{session_id}_last_save`` but never had a clean-exit
+    cleanup path, so the marker lingered. The session is over by the time
+    ``hook_session_end`` runs, so removing it here keeps ``hook_state/`` from
+    accumulating dead markers. OS errors (including a missing marker, since
+    ``FileNotFoundError`` is an ``OSError``) are swallowed — this is best-effort
+    cleanup, never a reason to fail the hook.
+    """
+    try:
+        (STATE_DIR / f"{session_id}_last_save").unlink()
+    except OSError:
+        pass
+
+
+def hook_session_end(data: dict, harness: str):
+    """Session end hook: one final flush when a session exits cleanly.
+
+    Closes the gap (#1341) where a session that never crosses ``SAVE_INTERVAL``
+    on ``Stop`` and never triggers ``PreCompact`` exits with nothing saved —
+    the common case for short, useful sessions.
+
+    Why background instead of mine inline: Claude Code's hooks reference
+    documents a default SessionEnd timeout of 1.5 seconds, and "timeouts set on
+    plugin-provided hooks do not raise the budget"
+    (https://code.claude.com/docs/en/hooks). A cold ``mempalace`` start alone
+    exceeds 1.5s, so this handler must never mine in the hook foreground. The
+    shell wrapper backgrounds it and returns immediately; the heavy capture is
+    spawned *detached* via ``_ingest_transcript`` / ``_maybe_auto_ingest`` (both
+    route through ``_spawn_mine`` / ``_detached_popen_kwargs``). On POSIX that
+    detached child reliably outlives the session (verified). On Windows only the
+    mine grandchild (spawned with detached-process flags) is designed to break
+    away from the session; the backgrounded hook process and the in-process
+    diary write are best-effort there (no Windows CI coverage yet). This
+    honors the "background everything / hooks under 500ms" budget. SessionEnd
+    has no decision control, so this only ever saves; it never emits a block
+    payload.
+    """
+    if not _palace_root_exists():
+        _output({})
+        return
+
+    # Parse inside the try so a malformed payload (e.g. non-dict stdin that
+    # makes _parse_harness_input raise) still runs the finally cleanup below.
+    session_id = "unknown"
+    try:
+        parsed = _parse_harness_input(data, harness)
+        session_id = parsed["session_id"]
+        transcript_path = parsed["transcript_path"]
+
+        # Read config defensively (mirror hook_stop): a corrupt or unreadable
+        # config must not lose the final save, so default to auto-save on and
+        # toasts off rather than crashing the hook.
+        try:
+            config = MempalaceConfig()
+            auto_save = config.hooks_auto_save
+            toast = config.hook_desktop_toast
+        except Exception:
+            auto_save = True
+            toast = False
+
+        # Respect auto_save config toggle (clean opt-out)
+        if not auto_save:
+            _output({})
+            return
+
+        _log(f"SESSION END for session {session_id}")
+
+        # Validate the harness-provided transcript path before touching it
+        # (extension + ".." traversal check), mirroring the read path that
+        # already runs through _validate_transcript_path. A rejected path skips
+        # the transcript captures but still lets the independent MEMPAL_DIR mine
+        # run.
+        valid_transcript = ""
+        if transcript_path:
+            try:
+                validated = _validate_transcript_path(transcript_path)
+            except OSError:
+                validated = None
+            if validated is None:
+                _log(f"WARNING: transcript_path rejected by validator: {transcript_path!r}")
+            else:
+                valid_transcript = str(validated)
+
+        # Flush. The diary checkpoint (in-process ChromaDB write) runs FIRST,
+        # before any detached mine is spawned, so it never contends for the
+        # palace lock; this handler is already backgrounded by the wrapper, so it
+        # is not under the SessionEnd budget and has time to finish. The detached
+        # transcript ingest follows; re-mining a transcript ``Stop`` already
+        # captured is a near no-op (deterministic convo IDs + ``file_already_mined``
+        # short-circuit + upsert). ``reason`` is intentionally not branched on:
+        # every clean-exit reason (incl. ``/clear`` / ``resume``) warrants the
+        # flush. Order matches ``hook_stop``.
+        if valid_transcript:
+            _save_diary_direct(
+                valid_transcript,
+                session_id,
+                wing=_wing_from_transcript_path(valid_transcript),
+                toast=toast,
+                agent_name=_diary_agent_for_harness(harness),
+            )
+            _ingest_transcript(valid_transcript)
+        _maybe_auto_ingest()
+
+        _output({})
+    finally:
+        _clear_session_last_save(session_id)
+
+
 def hook_precompact(data: dict, harness: str):
     """Precompact hook: mine transcript synchronously, then allow compaction.
 
@@ -1064,6 +1338,7 @@ def run_hook(hook_name: str, harness: str):
     hooks = {
         "session-start": hook_session_start,
         "stop": hook_stop,
+        "session-end": hook_session_end,
         "precompact": hook_precompact,
     }
 
diff --git a/mempalace/instructions/help.md b/mempalace/instructions/help.md
index 5cb70faf9..8461ed354 100644
--- a/mempalace/instructions/help.md
+++ b/mempalace/instructions/help.md
@@ -29,6 +29,7 @@ AI memory system. Store everything, find anything. Local, free, no API key.
 
 ### Palace (write)
 - mempalace_add_drawer -- Add a new memory (drawer)
+- mempalace_checkpoint -- Save a whole session in one call (dedup + file + diary)
 - mempalace_delete_drawer -- Delete a memory (drawer)
 
 ### Knowledge Graph
diff --git a/mempalace/mcp_server.py b/mempalace/mcp_server.py
index 9e2b3c6ab..8d1a3f32b 100644
--- a/mempalace/mcp_server.py
+++ b/mempalace/mcp_server.py
@@ -9,12 +9,13 @@
   mempalace_list_wings      — all wings with drawer counts
   mempalace_list_rooms      — rooms within a wing
   mempalace_get_taxonomy    — full wing → room → count tree
-  mempalace_search          — semantic search, optional wing/room filter
+  mempalace_search          — semantic search, optional wing/room/source_file filter
   mempalace_check_duplicate — check if content already exists before filing
 
 Tools (write):
   mempalace_add_drawer      — file verbatim content into a wing/room
   mempalace_delete_drawer   — remove a drawer by ID
+  mempalace_delete_by_source — bulk-remove all drawers mined from one source_file
 
 Tools (maintenance):
   mempalace_reconnect       — force cache invalidation and reconnect after external writes
@@ -47,6 +48,7 @@
 import logging  # noqa: E402
 import re  # noqa: E402
 import hashlib  # noqa: E402
+import hmac  # noqa: E402
 import sqlite3  # noqa: E402
 import threading  # noqa: E402
 import time  # noqa: E402
@@ -60,6 +62,7 @@
     sanitize_name,
     sanitize_content,
     sanitize_iso_temporal,
+    sqlite_read_uri,
     strip_lone_surrogates,
 )
 from .version import __version__  # noqa: E402
@@ -191,6 +194,23 @@ def _parse_args():
         metavar="NAME",
         help="Storage backend to use (default: config/env/detected/chroma)",
     )
+    parser.add_argument(
+        "--transport",
+        choices=["stdio", "http"],
+        default="stdio",
+        help="Serve MCP over stdio (default) or in-process HTTP",
+    )
+    parser.add_argument(
+        "--host",
+        default="127.0.0.1",
+        help="HTTP host to bind when --transport=http (default: 127.0.0.1)",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=8765,
+        help="HTTP port to bind when --transport=http (default: 8765)",
+    )
     args, unknown = parser.parse_known_args()
     if unknown:
         logger.debug("Ignoring unknown args: %s", unknown)
@@ -224,6 +244,245 @@ def _parse_args():
 _MCP_IDLE_HOURS_DEFAULT = 8.0
 _last_request_time: float = time.monotonic()
 
+# MCP startup/open SQLite integrity gate (#1818).
+#
+# The peer-writer guard prevents new concurrent writers, but an MCP server can
+# still start against a palace that was already left corrupt by a prior writer
+# crash/kill. Run the existing read-only SQLite quick_check once on startup/open
+# and fail loudly instead of silently serving a malformed FTS5/HNSW index.
+_sqlite_integrity_checked = False
+_sqlite_integrity_errors: list[str] = []
+_sqlite_integrity_check_error = ""
+_SQLITE_INTEGRITY_ERROR_CODE = -32002
+_SQLITE_INTEGRITY_ALLOWED_TOOLS = frozenset(
+    {
+        "mempalace_status",
+        "mempalace_reconnect",
+    }
+)
+
+
+# MCP peer-writer guard (#1818).
+#
+# The existing per-operation palace lock serializes individual writes, but it
+# cannot make another long-lived Chroma PersistentClient forget stale in-memory
+# HNSW/FTS state. Hold the same per-palace mine lock for this MCP process
+# lifetime. A peer MCP process can still serve read tools, but mutating tools
+# refuse before touching Chroma or the knowledge graph.
+_MCP_WRITER_LOCK_CM = None
+_MCP_WRITER_READ_ONLY = False
+_MCP_WRITER_LOCK_FAILED = False
+_MCP_WRITER_LOCK_ERROR = ""
+_MCP_ALLOW_PEER_WRITER_ENV = "MEMPALACE_MCP_ALLOW_PEER_WRITER"
+
+_MUTATING_TOOLS = frozenset(
+    {
+        "mempalace_kg_add",
+        "mempalace_kg_invalidate",
+        "mempalace_create_tunnel",
+        "mempalace_delete_tunnel",
+        "mempalace_delete_hallway",
+        "mempalace_add_drawer",
+        "mempalace_delete_drawer",
+        "mempalace_mine",
+        "mempalace_sync",
+        "mempalace_update_drawer",
+        "mempalace_diary_write",
+    }
+)
+
+
+def _truthy_env(name: str) -> bool:
+    return os.environ.get(name, "").strip().lower() in {"1", "true", "yes", "on"}
+
+
+def _acquire_mcp_writer_lock() -> tuple[bool, str]:
+    """Acquire this process's per-palace MCP writer lease.
+
+    Returns (True, "") when this process may write. Returns (False, reason)
+    when another live writer already owns the per-palace lease. Once a server
+    starts read-only it stays read-only for its lifetime; restarting is the
+    safe way to become the writer after the original holder exits.
+    """
+
+    global _MCP_WRITER_LOCK_CM, _MCP_WRITER_READ_ONLY, _MCP_WRITER_LOCK_FAILED
+    global _MCP_WRITER_LOCK_ERROR
+
+    if _truthy_env(_MCP_ALLOW_PEER_WRITER_ENV):
+        return True, ""
+
+    if _MCP_WRITER_LOCK_CM is not None:
+        return True, ""
+
+    if _MCP_WRITER_READ_ONLY:
+        return False, _MCP_WRITER_LOCK_ERROR
+
+    if _MCP_WRITER_LOCK_FAILED:
+        return True, _MCP_WRITER_LOCK_ERROR
+
+    try:
+        from .palace import MineAlreadyRunning, mine_palace_lock
+
+        lock_cm = mine_palace_lock(_config.palace_path)
+        lock_cm.__enter__()
+    except MineAlreadyRunning as exc:
+        _MCP_WRITER_READ_ONLY = True
+        _MCP_WRITER_LOCK_ERROR = (
+            "another mempalace writer already holds the palace lock for "
+            f"{_config.palace_path!r}: {exc}"
+        )
+        return False, _MCP_WRITER_LOCK_ERROR
+    except Exception as exc:
+        _MCP_WRITER_LOCK_FAILED = True
+        _MCP_WRITER_LOCK_ERROR = (
+            "could not acquire MCP peer-writer lock for "
+            f"{_config.palace_path!r}: {exc!r}; continuing without "
+            "peer-writer protection"
+        )
+        logger.warning(_MCP_WRITER_LOCK_ERROR)
+        return True, _MCP_WRITER_LOCK_ERROR
+
+    _MCP_WRITER_LOCK_CM = lock_cm
+    import atexit
+
+    atexit.register(lambda: lock_cm.__exit__(None, None, None))
+    _MCP_WRITER_READ_ONLY = False
+    _MCP_WRITER_LOCK_FAILED = False
+    _MCP_WRITER_LOCK_ERROR = ""
+    return True, ""
+
+
+def _mcp_peer_writer_refusal(req_id, tool_name: str):
+    if tool_name not in _MUTATING_TOOLS:
+        return None
+
+    ok, reason = _acquire_mcp_writer_lock()
+    if ok:
+        return None
+
+    return {
+        "jsonrpc": "2.0",
+        "id": req_id,
+        "error": {
+            "code": -32001,
+            "message": "Peer MCP writer active; this server is read-only for mutating tools",
+            "data": {
+                "tool": tool_name,
+                "palace": _config.palace_path,
+                "reason": reason,
+                "override_env": _MCP_ALLOW_PEER_WRITER_ENV,
+            },
+        },
+    }
+
+
+def _refresh_sqlite_integrity_status() -> None:
+    """Refresh the MCP startup SQLite/FTS5 integrity gate.
+
+    Uses repair.sqlite_integrity_errors(), which is read-only and already backs
+    repair preflight. A failure here is treated as an integrity failure so the
+    server does not proceed silently after a malformed FTS5 index or other
+    SQLite-layer corruption (#1818).
+    """
+
+    global _sqlite_integrity_checked
+    global _sqlite_integrity_errors
+    global _sqlite_integrity_check_error
+
+    if not _config.palace_path or not _is_chroma_backend():
+        _sqlite_integrity_checked = True
+        _sqlite_integrity_errors = []
+        _sqlite_integrity_check_error = ""
+        return
+
+    try:
+        from .repair import sqlite_integrity_errors
+
+        errors = sqlite_integrity_errors(_config.palace_path)
+    except Exception as exc:
+        _sqlite_integrity_check_error = (
+            f"sqlite integrity probe failed: {type(exc).__name__}: {exc}"
+        )
+        _sqlite_integrity_errors = [_sqlite_integrity_check_error]
+    else:
+        _sqlite_integrity_errors = [str(error) for error in errors if str(error)]
+        _sqlite_integrity_check_error = ""
+
+    _sqlite_integrity_checked = True
+
+    if _sqlite_integrity_errors:
+        logger.error(
+            "SQLite integrity check failed for palace=%s: %s",
+            _config.palace_path,
+            "; ".join(_sqlite_integrity_errors[:3]),
+        )
+
+
+def _ensure_sqlite_integrity_status() -> None:
+    if not _sqlite_integrity_checked:
+        _refresh_sqlite_integrity_status()
+
+
+def _sqlite_integrity_payload() -> dict:
+    _ensure_sqlite_integrity_status()
+
+    payload = {
+        "checked": _sqlite_integrity_checked,
+        "ok": not _sqlite_integrity_errors,
+        "palace": _config.palace_path,
+        "sqlite_path": os.path.join(_config.palace_path, "chroma.sqlite3")
+        if _config.palace_path
+        else "",
+        "error_count": len(_sqlite_integrity_errors),
+        "errors": _sqlite_integrity_errors[:10],
+    }
+
+    if len(_sqlite_integrity_errors) > 10:
+        payload["truncated"] = len(_sqlite_integrity_errors) - 10
+
+    if _sqlite_integrity_check_error:
+        payload["check_error"] = _sqlite_integrity_check_error
+
+    return payload
+
+
+def _mcp_sqlite_integrity_refusal(req_id, tool_name: str):
+    if tool_name in _SQLITE_INTEGRITY_ALLOWED_TOOLS:
+        return None
+
+    _ensure_sqlite_integrity_status()
+
+    if not _sqlite_integrity_errors:
+        return None
+
+    return {
+        "jsonrpc": "2.0",
+        "id": req_id,
+        "error": {
+            "code": _SQLITE_INTEGRITY_ERROR_CODE,
+            "message": (
+                "Palace SQLite integrity check failed; refusing tool call "
+                "until the palace is repaired"
+            ),
+            "data": {
+                "tool": tool_name,
+                "palace": _config.palace_path or "",
+                "sqlite_path": (
+                    os.path.join(_config.palace_path, "chroma.sqlite3")
+                    if _config.palace_path
+                    else ""
+                ),
+                "errors": _sqlite_integrity_errors[:10],
+                "error_count": len(_sqlite_integrity_errors),
+                "hint": (
+                    "Stop all MemPalace MCP clients/writers, back up the palace, "
+                    "repair the SQLite/FTS5 corruption offline, then run "
+                    "mempalace_reconnect or restart the MCP server."
+                ),
+            },
+        },
+    }
+
 
 def _mcp_idle_timeout_secs() -> float:
     """Return the configured MCP idle timeout in seconds (0 = disabled)."""
@@ -464,83 +723,12 @@ def _refresh_vector_disabled_flag() -> None:
 # Every write operation is logged to a JSONL file before execution.
 # This provides an audit trail for detecting memory poisoning and
 # enables review/rollback of writes from external or untrusted sources.
-
-_WAL_FILE = Path(os.path.expanduser("~/.mempalace/wal")) / "write_log.jsonl"
-_WAL_INITIALIZED_DIR = None
-
-
-def _ensure_wal() -> None:
-    """Create (and re-harden) the WAL directory lazily, on the first write.
-
-    This must NOT run at import time: a user who removed ``~/.mempalace`` has
-    engaged the documented kill-switch (``hooks_cli._palace_root_exists()``,
-    #1305), and recreating the directory just by importing this module would
-    silently re-arm the autosave/mining hooks they disabled (#1676). Creating
-    it on the first real write keeps the kill-switch contract intact.
-
-    It is deliberately not gated on ``_palace_root_exists()``: by the time a
-    write reaches here the palace is already being recreated by the ChromaDB/KG
-    layer regardless, so gating would only drop audit records, not prevent
-    recreation. Runtime kill-switch enforcement for MCP writes is the broader
-    question tracked in #504.
-
-    Hardening is attempted once per directory and the path cached in
-    ``_WAL_INITIALIZED_DIR`` regardless of outcome (keyed on the path, so a
-    test repointing ``_WAL_FILE`` re-initialises), so a persistent failure on a
-    restricted filesystem does not retry on every write. ``mkdir`` runs only
-    when the initial ``chmod`` raises ``FileNotFoundError`` (EAFP). The parent
-    ``~/.mempalace`` keeps its umask mode, like the other palace directories;
-    the WAL file is created atomically with mode 0o600 by ``_wal_log``.
-    """
-    global _WAL_INITIALIZED_DIR
-    wal_dir = _WAL_FILE.parent
-    if _WAL_INITIALIZED_DIR == wal_dir:
-        return
-    try:
-        wal_dir.chmod(0o700)
-    except FileNotFoundError:
-        try:
-            wal_dir.mkdir(parents=True, exist_ok=True)
-            wal_dir.chmod(0o700)
-        except (OSError, NotImplementedError):
-            pass
-    except (OSError, NotImplementedError):
-        pass
-    # Cache regardless of outcome: one attempt per directory, so a persistent
-    # chmod/mkdir failure (restricted FS) is not retried on every write.
-    _WAL_INITIALIZED_DIR = wal_dir
-
-
-# Keys whose values should be redacted in WAL entries to avoid logging sensitive content
-_WAL_REDACT_KEYS = frozenset(
-    {"content", "content_preview", "document", "entry", "entry_preview", "query", "text"}
-)
-
-
-def _wal_log(operation: str, params: dict, result: dict = None):
-    """Append a write operation to the write-ahead log."""
-    # Redact sensitive content from params before logging
-    safe_params = {}
-    for k, v in params.items():
-        if k in _WAL_REDACT_KEYS:
-            safe_params[k] = f"[REDACTED {len(v)} chars]" if isinstance(v, str) else "[REDACTED]"
-        else:
-            safe_params[k] = v
-    entry = {
-        "timestamp": datetime.now().isoformat(),
-        "operation": operation,
-        "params": safe_params,
-        "result": result,
-    }
-    try:
-        # Dir setup shares the append's exception handler below: any WAL
-        # failure is logged and non-fatal, never crashing the tool call.
-        _ensure_wal()
-        fd = os.open(str(_WAL_FILE), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o600)
-        with os.fdopen(fd, "a", encoding="utf-8") as f:
-            f.write(json.dumps(entry, default=str) + "\n")
-    except Exception as e:
-        logger.error(f"WAL write failed: {e}")
+#
+# The implementation lives in mempalace.wal — a side-effect-free module — so the
+# CLI sync path and the daemon service layer can audit writes without importing
+# this module, whose import installs MCP stdio protection (os.dup2(2, 1) and
+# sys.stdout = sys.stderr) that would misroute their output.
+from .wal import _wal_log  # noqa: E402
 
 
 def _get_client():
@@ -922,7 +1110,22 @@ def _safe_meta(meta):
 
 
 def _fetch_all_metadata(col, where=None):
-    """Paginate col.get() to avoid the 10K silent truncation limit."""
+    """Fetch every matching record's metadata via the backend's best strategy.
+
+    Delegates to BaseCollection.get_all_metadata() (#1796), which Chroma
+    satisfies with the same offset-paginated loop this function used to do
+    inline, and which Qdrant overrides with a single _scroll_all() pass.
+    Routing through one contract method means every backend gets its own
+    correct strategy without this caller needing to know which backend it's
+    talking to.
+    """
+    get_all = getattr(col, "get_all_metadata", None)
+    if callable(get_all):
+        return get_all(where=where)
+
+    # Defensive fallback for any collection object that predates the
+    # get_all_metadata() contract method (e.g. a third-party backend not yet
+    # updated). Preserves the exact previous behavior.
     total = col.count()
     all_meta = []
     offset = 0
@@ -968,6 +1171,41 @@ def _sanitize_optional_name(value: str = None, field_name: str = "name") -> str:
     return sanitize_name(value, field_name)
 
 
+# Bounds the whole stored source_file string (often an absolute path), so it is
+# Linux PATH_MAX rather than the 128-char wing/room NAME limit.
+_MAX_SOURCE_FILE_LENGTH = 4096
+
+
+def _sanitize_optional_source_file(value: str = None) -> str:
+    """Validate an optional source_file search filter (#1815).
+
+    Unlike wing/room, a source_file is a path: ``/``, ``\\`` and ``.`` are
+    legal, so it is NOT run through ``sanitize_name`` (which rejects path
+    characters as traversal attempts). The value is matched verbatim as a
+    ChromaDB metadata-equality / parameterized-SQL value — never used as a
+    filesystem path — so there is no traversal risk to guard against. A null
+    byte or a pathological length can still upset the backend (chromadb
+    add/upsert chokes on null bytes / lone surrogates, #1235), so guard those
+    for parity with ``sanitize_name``. Blank / whitespace-only is "no filter".
+    """
+    if value is None:
+        return None
+    if not isinstance(value, str):
+        raise ValueError("source_file must be a string")
+    value = value.strip()
+    if not value:
+        return None
+    if "\x00" in value:
+        raise ValueError("source_file contains null bytes")
+    if value != strip_lone_surrogates(value):
+        raise ValueError("source_file contains invalid surrogate characters")
+    if len(value) > _MAX_SOURCE_FILE_LENGTH:
+        raise ValueError(
+            f"source_file exceeds maximum length of {_MAX_SOURCE_FILE_LENGTH} characters"
+        )
+    return value
+
+
 # ==================== READ TOOLS ====================
 
 
@@ -991,7 +1229,7 @@ def _tool_status_via_sqlite() -> dict:
     rooms: dict = {}
     total = 0
     try:
-        conn = _sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = _sqlite3.connect(sqlite_read_uri(db_path), uri=True)
         try:
             row = conn.execute(
                 """
@@ -1044,17 +1282,212 @@ def _tool_status_via_sqlite() -> dict:
     return result
 
 
+def _sqlite_taxonomy():
+    """Fast wing→room tally straight from ``chroma.sqlite3`` (#1748 / #1379).
+
+    Returns ``(total, {wing: {room: count}})`` or ``None`` to signal the
+    caller to fall back to the ChromaDB client pagination path. ``None`` means
+    a non-chroma backend, a missing/unbootstrapped palace, or a sqlite error —
+    exactly the cases ``backends.chroma._sqlite_wing_room_counts`` already
+    handles for the CLI ``miner.status()``. The point is to answer the
+    overview tools from the relational metadata without cold-loading the HNSW
+    index, which costs tens of seconds per call on large palaces and is what
+    times them out under the MCP host limit.
+    """
+    if not _is_chroma_backend():
+        return None
+    try:
+        from .backends.chroma import _sqlite_wing_room_counts
+
+        counts = _sqlite_wing_room_counts(_config.palace_path, _config.collection_name)
+    except Exception:
+        logger.debug("sqlite taxonomy fast path failed; falling back", exc_info=True)
+        return None
+    if counts is None:
+        return None
+
+    # Preserve the client path's output contract: drawers missing wing/room
+    # read as "unknown" (the ``m.get("wing", "unknown")`` default), not the
+    # sqlite COALESCE placeholder "?". Without this, the fast path would be an
+    # observable API change for MCP clients on legacy/partial drawers.
+    def _norm(key):
+        return "unknown" if key in (None, "?") else key
+
+    total, wing_rooms = counts
+    normalized: dict = {}
+    for wing, room_counts in wing_rooms.items():
+        dest = normalized.setdefault(_norm(wing), {})
+        for room, n in room_counts.items():
+            rkey = _norm(room)
+            dest[rkey] = dest.get(rkey, 0) + n
+    return total, normalized
+
+
+def _sqlite_graph_stats():
+    """Compute ``graph_stats`` from one grouped sqlite read (#1379, graph_stats
+    half; follow-up to #1748).
+
+    ``graph_stats`` only needs grouped counts, but the client path builds the
+    whole graph by paging every metadata row (``build_graph`` →
+    ``col.get(limit, offset)``) and cold-loads the HNSW index — which times out
+    on six-figure palaces. This reads the same wing/room/hall grouping straight
+    from ``chroma.sqlite3`` and reconstructs the stats.
+
+    Returns the stats dict, or ``None`` to fall back to the client path
+    (non-chroma backend, missing/unbootstrapped palace, sqlite error). The
+    reconstruction mirrors ``palace_graph.build_graph`` /
+    ``palace_graph.graph_stats`` exactly: a node is a room with a non-empty
+    wing and a usable room name (the catch-all ``"general"`` is excluded), and
+    edges are the per-hall cross-wing crossings of multi-wing rooms.
+    """
+    if not _is_chroma_backend():
+        return None
+    import sqlite3 as _sqlite3
+    from collections import Counter, defaultdict
+
+    if not _config.palace_path:
+        return None
+    db_path = os.path.join(_config.palace_path, "chroma.sqlite3")
+    if not os.path.isfile(db_path):
+        return None
+    collection_name = _config.collection_name
+    # Treat any failure as a soft fallback to the client path (sqlite errors,
+    # but also an unexpected schema shape tripping the reconstruction) so
+    # graph_stats degrades to build_graph() rather than raising — mirroring the
+    # sibling sqlite fast paths (_sqlite_taxonomy / _sqlite_wing_room_counts).
+    try:
+        conn = _sqlite3.connect(sqlite_read_uri(db_path), uri=True)
+        try:
+            conn.execute("PRAGMA busy_timeout = 3000")
+            if (
+                conn.execute(
+                    "SELECT 1 FROM collections WHERE name = ?", (collection_name,)
+                ).fetchone()
+                is None
+            ):
+                return None
+            rows = conn.execute(
+                """
+                SELECT
+                    COALESCE(rm.string_value, CAST(rm.int_value AS TEXT),
+                             CAST(rm.float_value AS TEXT), '') AS room,
+                    COALESCE(wm.string_value, CAST(wm.int_value AS TEXT),
+                             CAST(wm.float_value AS TEXT), '') AS wing,
+                    COALESCE(hm.string_value, CAST(hm.int_value AS TEXT),
+                             CAST(hm.float_value AS TEXT), '') AS hall,
+                    COUNT(*) AS n
+                FROM embeddings e
+                JOIN segments s ON e.segment_id = s.id AND s.scope = 'METADATA'
+                JOIN collections c ON s.collection = c.id
+                LEFT JOIN embedding_metadata rm ON rm.id = e.id AND rm.key = 'room'
+                LEFT JOIN embedding_metadata wm ON wm.id = e.id AND wm.key = 'wing'
+                LEFT JOIN embedding_metadata hm ON hm.id = e.id AND hm.key = 'hall'
+                WHERE c.name = ?
+                GROUP BY room, wing, hall
+                """,
+                (collection_name,),
+            ).fetchall()
+        finally:
+            conn.close()
+
+        # Reconstruct build_graph()'s room_data, applying its per-drawer filter
+        # (`if room and room != "general" and wing`).
+        room_data = defaultdict(lambda: {"wings": set(), "halls": set(), "count": 0})
+        for room, wing, hall, n in rows:
+            if not room or room == "general" or not wing:
+                continue
+            node = room_data[room]
+            node["wings"].add(wing)
+            if hall:
+                node["halls"].add(hall)
+            node["count"] += int(n)
+
+        tunnel_rooms = 0
+        total_edges = 0
+        wing_counts = Counter()
+        for data in room_data.values():
+            n_wings = len(data["wings"])
+            for wing in data["wings"]:
+                wing_counts[wing] += 1
+            if n_wings >= 2:
+                tunnel_rooms += 1
+                # Edges per multi-wing room: one per wing-pair per hall, matching
+                # build_graph's nested wa<wb × hall expansion.
+                total_edges += (n_wings * (n_wings - 1) // 2) * len(data["halls"])
+
+        top_tunnels = [
+            {"room": room, "wings": sorted(data["wings"]), "count": data["count"]}
+            # build_graph's graph_stats slices the top 10 by wing-count first,
+            # then keeps the multi-wing ones. An explicit room-name tiebreaker
+            # keeps the fast path deterministic across runs — preferable to
+            # leaning on SQLite's unspecified GROUP BY order. (Exact membership
+            # parity with the client path is unattainable anyway; the two never
+            # run on the same palace, since the backend picks one.)
+            for room, data in sorted(
+                room_data.items(), key=lambda kv: (-len(kv[1]["wings"]), kv[0])
+            )[:10]
+            if len(data["wings"]) >= 2
+        ]
+
+        return {
+            "total_rooms": len(room_data),
+            "tunnel_rooms": tunnel_rooms,
+            "total_edges": total_edges,
+            "rooms_per_wing": dict(wing_counts.most_common()),
+            "top_tunnels": top_tunnels,
+        }
+    except Exception:
+        logger.debug("sqlite graph_stats fast path failed; falling back", exc_info=True)
+        return None
+
+
 def tool_status():
+    _ensure_sqlite_integrity_status()
+    if _sqlite_integrity_errors:
+        result = _tool_status_via_sqlite()
+        if isinstance(result, dict):
+            result["sqlite_integrity"] = _sqlite_integrity_payload()
+            result["sqlite_integrity_failed"] = True
+            result["error"] = "SQLite integrity check failed"
+            result["partial"] = True
+        return result
+
     # Run the safe sqlite/pickle probe before we touch chromadb. In the
     # #1222 failure mode, opening the persistent client to call .count()
     # can segfault — short-circuit to a pure-sqlite path when divergence
     # is detected so status stays reachable.
     db_exists = _backend_db_exists()
     _refresh_vector_disabled_flag()
+    writer_ok, writer_reason = _acquire_mcp_writer_lock()
+    if not writer_ok:
+        logger.warning("%s; mutating MCP tools will run read-only", writer_reason)
 
     if _vector_disabled:
         return _tool_status_via_sqlite()
 
+    # Fast path: tally wing/room straight from sqlite so overview tools stay
+    # responsive on large palaces instead of cold-loading the HNSW index or
+    # paging hundreds of MB of metadata through the client (#1748 / #1379).
+    # ``None`` (non-chroma backend / non-standard layout) falls through to the
+    # client path below.
+    fast = _sqlite_taxonomy()
+    if fast is not None:
+        total, wing_rooms = fast
+        wings = {}
+        rooms = {}
+        for w, room_counts in wing_rooms.items():
+            wings[w] = wings.get(w, 0) + sum(room_counts.values())
+            for r, n in room_counts.items():
+                rooms[r] = rooms.get(r, 0) + n
+        return {
+            "total_drawers": total,
+            "wings": wings,
+            "rooms": rooms,
+            "protocol": PALACE_PROTOCOL,
+            "aaak_dialect": AAAK_SPEC,
+            "backend": _selected_backend_name(),
+        }
+
     # Use create=True only when a palace DB already exists on disk -- this
     # bootstraps the ChromaDB collection on a valid-but-empty palace without
     # accidentally creating a palace in a non-existent directory (#830).
@@ -1121,6 +1554,13 @@ def tool_status():
 
 
 def tool_list_wings():
+    fast = _sqlite_taxonomy()
+    if fast is not None:
+        _total, wing_rooms = fast
+        wings = {}
+        for w, room_counts in wing_rooms.items():
+            wings[w] = wings.get(w, 0) + sum(room_counts.values())
+        return {"wings": wings}
     col = _get_collection()
     if not col:
         return _collection_error_or_no_palace()
@@ -1144,6 +1584,16 @@ def tool_list_rooms(wing: str = None):
         wing = _sanitize_optional_name(wing, "wing")
     except ValueError as e:
         return {"error": str(e)}
+    fast = _sqlite_taxonomy()
+    if fast is not None:
+        _total, wing_rooms = fast
+        rooms = {}
+        for w, room_counts in wing_rooms.items():
+            if wing and w != wing:
+                continue
+            for r, n in room_counts.items():
+                rooms[r] = rooms.get(r, 0) + n
+        return {"wing": wing or "all", "rooms": rooms}
     col = _get_collection()
     if not col:
         return _collection_error_or_no_palace()
@@ -1164,6 +1614,10 @@ def tool_list_rooms(wing: str = None):
 
 
 def tool_get_taxonomy():
+    fast = _sqlite_taxonomy()
+    if fast is not None:
+        _total, wing_rooms = fast
+        return {"taxonomy": {w: dict(room_counts) for w, room_counts in wing_rooms.items()}}
     col = _get_collection()
     if not col:
         return _collection_error_or_no_palace()
@@ -1190,6 +1644,7 @@ def tool_search(
     limit: int = 5,
     wing: str = None,
     room: str = None,
+    source_file: str = None,
     max_distance: float = 1.5,
     min_similarity: float = None,
     context: str = None,
@@ -1198,6 +1653,7 @@ def tool_search(
     try:
         wing = _sanitize_optional_name(wing, "wing")
         room = _sanitize_optional_name(room, "room")
+        source_file = _sanitize_optional_source_file(source_file)
     except ValueError as e:
         return {"error": str(e)}
     # Backwards compat: accept old name
@@ -1216,6 +1672,7 @@ def tool_search(
         palace_path=_config.palace_path,
         wing=wing,
         room=room,
+        source_file=source_file,
         n_results=limit,
         max_distance=dist,
         vector_disabled=_vector_disabled,
@@ -1233,6 +1690,7 @@ def tool_search(
             palace_path=_config.palace_path,
             wing=wing,
             room=room,
+            source_file=source_file,
             n_results=limit,
             max_distance=dist,
             vector_disabled=_vector_disabled,
@@ -1341,6 +1799,12 @@ def tool_find_tunnels(wing_a: str = None, wing_b: str = None):
 
 def tool_graph_stats():
     """Palace graph overview: nodes, tunnels, edges, connectivity."""
+    # Fast path: grouped sqlite read instead of paging all metadata and
+    # cold-loading HNSW via build_graph(), which times out on large palaces
+    # (#1379). Falls through to the client path for non-chroma backends.
+    fast = _sqlite_graph_stats()
+    if fast is not None:
+        return fast
     col = _get_collection()
     if not col:
         return _collection_error_or_no_palace()
@@ -2060,6 +2524,158 @@ def _run():
             _metadata_cache = None
 
 
+def _purge_source_closets(source_file: str, *, commit: bool) -> int:
+    """Count, and optionally delete, closets matching ``source_file`` exactly.
+
+    The closets collection is the searchable AAAK index layer; it is keyed by
+    ``source_file`` independently of the drawers collection, so a drawer-only
+    delete would strand stale index pointers at the deleted source (#1722).
+    Mirrors the closet-purge step in :func:`mempalace.sync.sync_palace` and the
+    re-mine purge in :func:`mempalace.palace.purge_file_closets`.
+
+    Best-effort: a missing or unavailable closet collection yields 0 and never
+    raises, so it can never abort a drawer delete that has already committed.
+    Deletion is pushed down via ``delete(where=...)`` so it survives palaces
+    larger than the 10k ``get()`` truncation; the returned count is the (best
+    effort) number of matching closets observed before the delete.
+    """
+    from .palace import get_closets_collection
+
+    try:
+        closets_col = get_closets_collection(_config.palace_path, create=False)
+    except Exception as exc:
+        logger.warning("Closet purge skipped (collection unavailable): %s", exc)
+        return 0
+    if closets_col is None:
+        return 0
+    try:
+        ids = closets_col.get(where={"source_file": source_file}, include=[]).get("ids") or []
+        count = len(ids)
+        if commit and count:
+            closets_col.delete(where={"source_file": source_file})
+        return count
+    except Exception as exc:
+        logger.warning("Closet purge failed for %s: %s", source_file, exc)
+        return 0
+
+
+def tool_delete_by_source(source_file: str, dry_run: bool = True):
+    """Delete every drawer whose ``source_file`` metadata matches exactly.
+
+    Bulk cleanup for the contamination case in #1722, where benchmark/eval
+    files (ShareGPT dumps, ``results_mempal_*.jsonl``, language config JSON)
+    get mined into the same wing as real user data and drown out semantic
+    search. Previously the only recourse was hand-rolled SQLite ``DELETE``
+    against ``chroma.sqlite3``.
+
+    Matching is exact on the stored ``source_file`` value and pushed down to
+    the backend via ``delete(where=...)`` — the same idiom used by the miner
+    and diary ingest paths — so there is no client-side id list and the
+    SQLite "too many variables" limit cannot be hit, regardless of how many
+    drawers share the source (the reporter had 55k).
+
+    Also purges the matching closets (the AAAK index layer) so deleting the
+    drawers doesn't strand stale index pointers at the dead source (#1722).
+
+    Defaults to a dry run: it reports the drawer match count, the closet match
+    count, and a small sample so the caller can confirm the blast radius before
+    anything is removed. Pass ``dry_run=False`` to commit the deletion
+    (irreversible).
+    """
+    global _metadata_cache
+    if not isinstance(source_file, str) or not source_file.strip():
+        return {"success": False, "error": "source_file must be a non-empty string"}
+    # Mirror the ingestion-side normalization (tool_add_drawer strips lone
+    # surrogates from source_file before storing) so exact matching still hits
+    # rows mined from non-ASCII paths that arrived via a cp1252 stdin (#1488).
+    source_file = strip_lone_surrogates(source_file)
+
+    col = _get_collection()
+    if not col:
+        return _collection_error_or_no_palace()
+
+    where = {"source_file": source_file}
+    try:
+        # Paginated to survive palaces larger than the 10k get() truncation.
+        metas = _fetch_all_metadata(col, where=where)
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+    match_count = len(metas)
+    # Distinct (wing, room) pairs so the caller sees where the hits live.
+    sample = []
+    seen = set()
+    for meta in metas:
+        meta = _safe_meta(meta)
+        # Default missing wing/room to "" for consistency with the rest of the
+        # file (drawers are always stored with both, but be defensive).
+        wing = meta.get("wing", "")
+        room = meta.get("room", "")
+        key = (wing, room)
+        if key in seen:
+            continue
+        seen.add(key)
+        sample.append({"wing": wing, "room": room})
+        if len(sample) >= 5:
+            break
+
+    if dry_run:
+        closet_match_count = _purge_source_closets(source_file, commit=False)
+        return {
+            "success": True,
+            "dry_run": True,
+            "source_file": source_file,
+            "match_count": match_count,
+            "closet_match_count": closet_match_count,
+            "sample": sample,
+            "hint": (
+                "No drawers were deleted. Re-run with dry_run=false to remove "
+                f"these {match_count} drawer(s) and {closet_match_count} index "
+                "entr(y/ies)."
+                if match_count
+                else "No drawers match this source_file."
+            ),
+        }
+
+    if match_count == 0:
+        # Idempotent: deleting an absent source is a no-op, not an error.
+        return {
+            "success": True,
+            "dry_run": False,
+            "source_file": source_file,
+            "deleted": 0,
+        }
+
+    _wal_log(
+        "delete_by_source",
+        {"source_file": source_file, "match_count": match_count, "sample": sample},
+    )
+    try:
+        col.delete(where=where)
+        _metadata_cache = None
+        # Purge the matching closets too so the AAAK index doesn't keep stale
+        # pointers at the now-deleted drawers (#1722). Done after the drawer
+        # delete and intentionally best-effort: the drawers are already gone,
+        # so a closet-purge hiccup must not turn a successful delete into an
+        # error — it just leaves index cruft a later `repair` / re-mine clears.
+        closets_deleted = _purge_source_closets(source_file, commit=True)
+        logger.info(
+            "Deleted %d drawer(s) and %d closet(s) from source: %s",
+            match_count,
+            closets_deleted,
+            source_file,
+        )
+        return {
+            "success": True,
+            "dry_run": False,
+            "source_file": source_file,
+            "deleted": match_count,
+            "closets_deleted": closets_deleted,
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
 def tool_sync(project_dir: str = None, wing: str = None, apply: bool = False):
     """Prune drawers whose source files are gitignored, missing, or moved (#1252)."""
     global _metadata_cache
@@ -2761,6 +3377,24 @@ def tool_reconnect():
             except Exception:
                 pass
         _kg_by_path.clear()
+    _refresh_sqlite_integrity_status()
+    if _sqlite_integrity_errors:
+        result = {
+            "success": False,
+            "message": "SQLite integrity check failed after reconnect",
+            "sqlite_integrity": _sqlite_integrity_payload(),
+            "vector_disabled": _vector_disabled,
+            "vector_disabled_reason": _vector_disabled_reason,
+            "hint": (
+                "Stop all MemPalace MCP clients/writers, back up the palace, "
+                "repair the SQLite/FTS5 corruption offline, then run "
+                "mempalace_reconnect or restart the MCP server."
+            ),
+        }
+        if close_errors:
+            result["error"] = "; ".join(close_errors)
+        return result
+
     try:
         col = _get_collection()
         if col is None:
@@ -2798,6 +3432,79 @@ def tool_reconnect():
         return {"success": False, "error": str(e)}
 
 
+def tool_checkpoint(items, diary=None, dedup_threshold=0.9):
+    """Batch session save in a single call.
+
+    Semantic-dedups each item, files the non-duplicates as drawers, then
+    writes one diary entry. Collapses the per-item ``check_duplicate`` /
+    ``add_drawer`` / ``diary_write`` sequence into one MCP request so the
+    host UI renders a single tool-call card (and keeps its spinner up for
+    the whole save) instead of one card per underlying call.
+
+    ``items`` is a list of ``{"wing", "room", "content"}`` dicts. ``diary``
+    is an optional ``{"agent_name", "entry", "topic"?, "wing"?}`` dict.
+    Reuses the existing single-item handlers so dedup/idempotency/WAL
+    behaviour is identical to calling them directly.
+    """
+    # Inputs come from MCP clients and handle_request does not validate
+    # nested schemas, so guard every field here. A single malformed item
+    # must record an error and be skipped, never raise and abort the whole
+    # batch (the already-filed items in this call would otherwise be lost
+    # from the response).
+    try:
+        dedup_threshold = float(dedup_threshold)
+    except (ValueError, TypeError):
+        return {"error": "dedup_threshold must be a number"}
+
+    out = {"added": [], "duplicates": [], "errors": []}
+    if not isinstance(items, list):
+        return {"error": "items must be a list of {wing, room, content} objects"}
+    for item in items:
+        if not isinstance(item, dict):
+            out["errors"].append({"item": item, "error": "item must be an object"})
+            continue
+        wing = item.get("wing")
+        room = item.get("room")
+        content = item.get("content")
+        # Non-empty strings only: a non-string here would raise deep in
+        # sanitize_content / strip_lone_surrogates.
+        if not all(isinstance(v, str) and v for v in (wing, room, content)):
+            out["errors"].append(
+                {"item": item, "error": "wing, room, content must be non-empty strings"}
+            )
+            continue
+        dup = tool_check_duplicate(content, threshold=dedup_threshold)
+        if dup.get("is_duplicate"):
+            out["duplicates"].append({"room": room, "matches": dup.get("matches", [])})
+            continue
+        # On a dedup error (genuine index failure — content is guaranteed a
+        # string by the guard above) we still file rather than drop the
+        # memory: verbatim recall is the priority and add_drawer's own
+        # idempotency blocks exact duplicates.
+        res = tool_add_drawer(wing=wing, room=room, content=content, added_by="checkpoint")
+        if res.get("success"):
+            out["added"].append(res)
+        else:
+            out["errors"].append(res)
+    if diary is not None:
+        if not isinstance(diary, dict):
+            out["errors"].append({"diary": diary, "error": "diary must be an object"})
+        else:
+            entry = diary.get("entry") or diary.get("content")
+            if not isinstance(entry, str) or not entry:
+                out["errors"].append(
+                    {"diary": diary, "error": "diary entry must be a non-empty string"}
+                )
+            else:
+                out["diary"] = tool_diary_write(
+                    agent_name=diary.get("agent_name", "cursor-ide"),
+                    entry=entry,
+                    topic=diary.get("topic", "session-checkpoint"),
+                    wing=diary.get("wing", ""),
+                )
+    return out
+
+
 # ==================== MCP PROTOCOL ====================
 
 TOOLS = {
@@ -3059,6 +3766,15 @@ def tool_reconnect():
                 },
                 "wing": {"type": "string", "description": "Filter by wing (optional)"},
                 "room": {"type": "string", "description": "Filter by room (optional)"},
+                "source_file": {
+                    "type": "string",
+                    "description": (
+                        "Filter to one exact source_file (optional). Matches the full "
+                        "stored path exactly (leading/trailing whitespace trimmed); no "
+                        "glob or basename matching. Pass the value from a result's "
+                        "'source_path' field; the displayed 'source_file' is only a basename."
+                    ),
+                },
                 "max_distance": {
                     "type": "number",
                     "description": "Max cosine distance threshold (0=identical, 2=opposite). Results further than this are dropped. Lower = stricter. Default 1.5. Set to 0 to disable.",
@@ -3108,6 +3824,52 @@ def tool_reconnect():
         },
         "handler": tool_add_drawer,
     },
+    "mempalace_checkpoint": {
+        "description": "Save a whole session in one call: semantic-dedups each item, files non-duplicates as drawers, then writes one diary entry. Use this instead of many separate check_duplicate/add_drawer/diary_write calls — it renders as a single tool-call card in the host UI.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "items": {
+                    "type": "array",
+                    "description": "Verbatim items to file. Each is {wing, room, content} — content is the exact words, never summarized.",
+                    "items": {
+                        "type": "object",
+                        "properties": {
+                            "wing": {"type": "string", "description": "Wing (project name)"},
+                            "room": {
+                                "type": "string",
+                                "description": "Room (short topic: decisions, backend...)",
+                            },
+                            "content": {
+                                "type": "string",
+                                "description": "Verbatim content to store",
+                            },
+                        },
+                        "required": ["wing", "room", "content"],
+                    },
+                },
+                "diary": {
+                    "type": "object",
+                    "description": "Optional diary entry written after filing: {agent_name, entry, topic?, wing?}. entry is AAAK-format.",
+                    "properties": {
+                        "agent_name": {
+                            "type": "string",
+                            "description": "Agent name (e.g. cursor-ide)",
+                        },
+                        "entry": {"type": "string", "description": "Diary entry in AAAK format"},
+                        "topic": {"type": "string", "description": "Topic tag (optional)"},
+                        "wing": {"type": "string", "description": "Target wing (optional)"},
+                    },
+                },
+                "dedup_threshold": {
+                    "type": "number",
+                    "description": "Similarity threshold 0-1 for the per-item dedup check (default 0.9)",
+                },
+            },
+            "required": ["items"],
+        },
+        "handler": tool_checkpoint,
+    },
     "mempalace_delete_drawer": {
         "description": "Delete a drawer by ID. Irreversible.",
         "input_schema": {
@@ -3173,6 +3935,24 @@ def tool_reconnect():
         },
         "handler": tool_mine,
     },
+    "mempalace_delete_by_source": {
+        "description": "Bulk-delete every drawer mined from one source_file (exact match). Use to clean up benchmark/test data accidentally mined into a user wing (#1722). Returns a dry-run match count and sample by default; pass dry_run=false to commit. Irreversible.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "source_file": {
+                    "type": "string",
+                    "description": "Exact source_file metadata value to remove (e.g. the full path that was mined)",
+                },
+                "dry_run": {
+                    "type": "boolean",
+                    "description": "Preview the match count without deleting; default true. Pass false to actually delete.",
+                },
+            },
+            "required": ["source_file"],
+        },
+        "handler": tool_delete_by_source,
+    },
     "mempalace_sync": {
         "description": "Prune drawers whose source files are gitignored, deleted, or moved. Returns dry-run report by default; pass apply=true to commit deletions.",
         "input_schema": {
@@ -3365,6 +4145,25 @@ def _internal_tool_error(req_id, tool_name: str, exc: BaseException = None) -> d
     }
 
 
+def _mcp_tool_preflight_refusal(req_id, tool_name: str):
+    """Run MCP request preflight gates outside handle_request complexity."""
+
+    sqlite_integrity_error = _mcp_sqlite_integrity_refusal(req_id, tool_name)
+    if sqlite_integrity_error is not None:
+        return sqlite_integrity_error
+
+    return _mcp_peer_writer_refusal(req_id, tool_name)
+
+
+def _decorate_mcp_tool_result(tool_name: str, result):
+    """Attach MCP transport-only diagnostics outside handle_request complexity."""
+
+    if tool_name == "mempalace_status" and isinstance(result, dict):
+        result.setdefault("sqlite_integrity", _sqlite_integrity_payload())
+
+    return result
+
+
 def handle_request(request):
     global _last_request_time
     if not isinstance(request, dict):
@@ -3483,6 +4282,10 @@ def handle_request(request):
                     "error": {"code": -32602, "message": f"Invalid value for parameter '{key}'"},
                 }
         tool_args.pop("wait_for_previous", None)
+        preflight_error = _mcp_tool_preflight_refusal(req_id, tool_name)
+        if preflight_error is not None:
+            return preflight_error
+
         # 'content' is an accepted alias for diary_write's 'entry' (callers often
         # reuse add_drawer's 'content' name). Map it in here, before dispatch, so a
         # content-only call still satisfies the required 'entry' param while the
@@ -3495,7 +4298,8 @@ def handle_request(request):
             if "entry" not in tool_args or tool_args["entry"] is None:
                 tool_args["entry"] = content_val
         try:
-            result = TOOLS[tool_name]["handler"](**tool_args)
+            result = _decorate_mcp_tool_result(tool_name, TOOLS[tool_name]["handler"](**tool_args))
+
             return {
                 "jsonrpc": "2.0",
                 "id": req_id,
@@ -3751,22 +4555,238 @@ def _watchdog() -> None:
     t.start()
 
 
-def main():
-    """MCP server entry point for the ``mempalace-mcp`` console script.
+def _json_rpc_parse_error(req_id=None):
+    return {
+        "jsonrpc": "2.0",
+        "id": req_id,
+        "error": {"code": -32700, "message": "Parse error"},
+    }
+
 
-    Side effect: pops ``PYTHONPATH`` from ``os.environ`` (see #1423) so
-    any subprocess this server spawns inherits a clean env. Host
-    applications that call ``main()`` programmatically should be aware
-    that the parent process loses ``PYTHONPATH`` as well. Library imports
-    (``import mempalace.searcher`` from a host app) do NOT trigger this
-    side effect; only the CLI/MCP entry points pop the env var.
+# Module-level constants for the HTTP transport.
+# Defined here (not inside main()) so _serve_http() / _build_http_server()
+# can reference them as free names without a NameError.
+_HTTP_REQUEST_LOCK = threading.Lock()
+_HTTP_MAX_REQUEST_BYTES = 16 * 1024 * 1024
+# Host literals that always denote this machine. Used both to decide whether a
+# bind is loopback (skip the network-exposure warning) and to pin the Host
+# header against DNS rebinding when serving on loopback.
+_HTTP_LOOPBACK_HOSTS = ("127.0.0.1", "localhost", "::1", "[::1]")
+
+
+def _http_is_loopback(host: str) -> bool:
+    """Whether ``host`` binds only to this machine."""
+    return (host or "").strip().lower() in _HTTP_LOOPBACK_HOSTS
+
+
+def _http_allowed_host_values(bind_host: str, port: int) -> set:
+    """Host-header values accepted when Host pinning is enforced.
+
+    DNS-rebinding defense: a browser tricked into POSTing to ``127.0.0.1`` by a
+    malicious page still carries the *attacker's* domain in the ``Host`` header,
+    so we pin ``Host`` to the loopback literals (and the bound host) with and
+    without the port. Computed from the *actual* bound port so an ephemeral
+    ``port=0`` bind (tests) still matches.
     """
-    # Drop leaked PYTHONPATH so any subprocess this server spawns starts
-    # with a clean env. The sys.path filter in mempalace/__init__.py
-    # already protects this process from the same ABI mismatch; here we
-    # extend the protection to children.
-    os.environ.pop("PYTHONPATH", None)
+    names = set(_HTTP_LOOPBACK_HOSTS)
+    if bind_host:
+        names.add(bind_host.strip().lower())
+    values = set()
+    for name in names:
+        values.add(name)
+        values.add(f"{name}:{port}")
+    return values
+
+
+def _http_origin_allowed(origin: str) -> bool:
+    """Whether a browser ``Origin`` header may call the transport.
+
+    Non-browser MCP clients omit ``Origin`` entirely (allowed). When an
+    ``Origin`` *is* present it must be a loopback origin — this is what stops a
+    page at ``https://evil.example`` from reaching a DNS-rebound localhost
+    server and reading the palace.
+    """
+    from urllib.parse import urlparse
+
+    try:
+        host = (urlparse(origin).hostname or "").strip().lower()
+    except Exception:
+        return False
+    return host in ("127.0.0.1", "localhost", "::1")
+
+
+def _build_http_server(host: str, port: int):
+    """Construct (but do not start) the MCP HTTP server.
+
+    Split out from :func:`_serve_http` so tests can bind an ephemeral port,
+    exercise the *real* handler, and shut it down — the previous test reached
+    for Starlette/uvicorn (neither a dependency) and so was silently skipped in
+    CI. Returns a bound ``ThreadingHTTPServer`` whose request policy (Host
+    allowlist, Origin check, optional bearer token) is attached as attributes.
+    """
+    from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+    from urllib.parse import urlparse
+
+    auth_token = os.environ.get("MEMPALACE_MCP_HTTP_TOKEN", "").strip()
+
+    class _MCPHTTPServer(ThreadingHTTPServer):
+        daemon_threads = True
+        allow_reuse_address = True
+
+    class _Handler(BaseHTTPRequestHandler):
+        protocol_version = "HTTP/1.1"
+        timeout = 10
+
+        def log_message(self, fmt, *args):
+            logger.info("HTTP %s - " + fmt, self.client_address[0], *args)
+
+        def _send_bytes(self, status: int, body: bytes, content_type: str) -> None:
+            self.send_response(status)
+            self.send_header("Content-Type", content_type)
+            self.send_header("Content-Length", str(len(body)))
+            self.send_header("Connection", "close")
+            self.end_headers()
+            self.wfile.write(body)
+            self.close_connection = True
+
+        def _send_json(self, status: int, payload: dict) -> None:
+            body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
+            self._send_bytes(status, body, "application/json; charset=utf-8")
+
+        def _request_rejected(self, require_auth: bool) -> bool:
+            """Enforce the transport's access policy before any dispatch.
+
+            The palace is the most sensitive data MemPalace holds and ``/mcp``
+            is unauthenticated by default, so this guards the two ways a local
+            HTTP server leaks to the network: DNS rebinding (Host/Origin) and,
+            when the operator opts in, a missing/incorrect bearer token.
+            """
+            srv = self.server
+            if srv.enforce_host_pin:
+                host_hdr = (self.headers.get("Host") or "").strip().lower()
+                if host_hdr not in srv.allowed_hosts:
+                    logger.warning("HTTP request rejected: Host %r not allowed", host_hdr)
+                    self.send_error(403, "Forbidden")
+                    return True
+            origin = self.headers.get("Origin")
+            if origin and not _http_origin_allowed(origin):
+                logger.warning("HTTP request rejected: cross-origin %r", origin)
+                self.send_error(403, "Forbidden")
+                return True
+            if require_auth and srv.auth_token:
+                provided = self.headers.get("Authorization", "")
+                if not hmac.compare_digest(provided, f"Bearer {srv.auth_token}"):
+                    logger.warning("HTTP request rejected: missing/invalid bearer token")
+                    self.send_error(401, "Unauthorized")
+                    return True
+            return False
+
+        def do_GET(self):
+            # Liveness probe is policy-gated for Host/Origin but never requires
+            # the token, so an orchestrator's health check works without creds.
+            if self._request_rejected(require_auth=False):
+                return
+            path = urlparse(self.path).path
+            if path == "/healthz":
+                self._send_bytes(200, b"ok\n", "text/plain; charset=utf-8")
+                return
+
+            self.send_error(404, "Not Found")
+
+        def do_POST(self):
+            if self._request_rejected(require_auth=True):
+                return
+            path = urlparse(self.path).path
+            if path != "/mcp":
+                self.send_error(404, "Not Found")
+                return
+
+            try:
+                content_length = int(self.headers.get("Content-Length", "0") or "0")
+            except (TypeError, ValueError):
+                content_length = 0
+
+            if content_length < 0 or content_length > _HTTP_MAX_REQUEST_BYTES:
+                self._send_json(
+                    413,
+                    {
+                        "jsonrpc": "2.0",
+                        "id": None,
+                        "error": {"code": -32600, "message": "Request too large"},
+                    },
+                )
+                return
+
+            try:
+                raw = self.rfile.read(content_length)
+                request = json.loads(raw.decode("utf-8"))
+            except Exception as exc:
+                logger.warning("HTTP JSON-RPC read or parse error: %s", exc)
+                self._send_json(400, _json_rpc_parse_error())
+                return
+
+            # Preserve the single-process / single-palace-handle behavior that
+            # stdio deployments rely on. HTTP gives us a safer transport, not
+            # concurrent Chroma/HNSW mutation.
+            with _HTTP_REQUEST_LOCK:
+                response = handle_request(request)
+
+            if response is None:
+                # JSON-RPC notifications intentionally have no response body.
+                self.send_response(202)
+                self.send_header("Content-Length", "0")
+                self.send_header("Connection", "close")
+                self.end_headers()
+                self.close_connection = True
+                return
+
+            self._send_json(200, response)
+
+    httpd = _MCPHTTPServer((host, port), _Handler)
+    bound_port = httpd.server_address[1]
+    # Pin Host only on a loopback bind (the security-critical default). A
+    # deliberately network-exposed bind is the operator's call and may sit
+    # behind a proxy that rewrites Host, so we relax the pin there and lean on
+    # the Origin check + optional token instead.
+    httpd.enforce_host_pin = _http_is_loopback(host)
+    httpd.allowed_hosts = _http_allowed_host_values(host, bound_port)
+    httpd.auth_token = auth_token
+    return httpd
+
+
+def _serve_http(host: str, port: int) -> None:
+    """Serve JSON-RPC over HTTP in-process.
+
+    This transport intentionally reuses the same ``handle_request`` dispatcher
+    as stdio. The only change is the framing layer: HTTP mode avoids a
+    long-lived stdout pipe for operators who run MemPalace behind an HTTP MCP
+    client/proxy for days at a time.
+    """
+    try:
+        httpd = _build_http_server(host, port)
+    except OSError as exc:
+        logger.error("Failed to start MCP HTTP server on %s:%s: %s", host, port, exc)
+        sys.exit(1)
+
+    bound_port = httpd.server_address[1]
+    if not _http_is_loopback(host):
+        logger.warning(
+            "MemPalace MCP HTTP server bound to non-loopback host %s — the palace "
+            "is now reachable from the network and /mcp is unauthenticated unless "
+            "you set MEMPALACE_MCP_HTTP_TOKEN. Bind 127.0.0.1 to keep it local.",
+            host,
+        )
+    with httpd:
+        logger.info("MemPalace MCP HTTP server listening on http://%s:%s/mcp", host, bound_port)
+        try:
+            httpd.serve_forever(poll_interval=0.5)
+        except KeyboardInterrupt:
+            logger.info("MemPalace MCP HTTP server shutting down")
+
+
+def _run_stdio_loop() -> None:
     _restore_stdout()
+
     # Force UTF-8 on stdio. MCP JSON-RPC is UTF-8, but Python on Windows
     # defaults stdin/stdout to the system codepage (e.g. cp1251), which
     # corrupts non-ASCII payloads and surfaces as generic -32000 errors on
@@ -3777,28 +4797,37 @@ def main():
                 stream.reconfigure(encoding="utf-8", errors="replace")
             except (AttributeError, OSError):
                 pass
+
     logger.info("MemPalace MCP Server starting...")
+
     # Pre-flight: probe HNSW capacity before any tool call so the warning
     # is visible at startup rather than on first use (#1222). Pure
     # filesystem read; never opens a chromadb client.
+    _refresh_sqlite_integrity_status()
     _refresh_vector_disabled_flag()
+
     # Opt-in: pre-load the embedder so the first chromadb-write tool call
     # does not pay the ONNX/CoreML cold-load tax under the MCP client
     # timeout (#1495). Default off — preserves current startup latency.
     _maybe_eager_warmup_embedder()
+
     # Idle auto-exit: release ChromaDB file handles from stale servers
     # that outlived their Claude Code session (#1552).
     _start_idle_exit_watchdog()
+
     while True:
         try:
             line = sys.stdin.readline()
             if not line:
                 break
+
             line = line.strip()
             if not line:
                 continue
+
             request = json.loads(line)
             response = handle_request(request)
+
             if response is not None:
                 sys.stdout.write(json.dumps(response, ensure_ascii=False) + "\n")
                 sys.stdout.flush()
@@ -3808,5 +4837,65 @@ def main():
             logger.error(f"Server error: {e}")
 
 
+def _run_http_loop() -> None:
+    # In HTTP mode there is no JSON-RPC stdio channel. Keeping the import-time
+    # stdout->stderr guard in place means any accidental print from a dependency
+    # still cannot masquerade as an HTTP response.
+    logger.info("MemPalace MCP HTTP server starting...")
+
+    # The HTTP transport exists for long-lived deployments. Do the cheap
+    # filesystem-only probe before binding, but never make the listener wait on
+    # optional embedder/HNSW warmup. Operators and tests should see /healthz as
+    # soon as the process is alive.
+    _refresh_vector_disabled_flag()
+    _start_idle_exit_watchdog()
+
+    raw_warmup = os.environ.get("MEMPALACE_EAGER_WARMUP", "").strip().lower()
+    if raw_warmup in _WARMUP_TRUTHY:
+
+        def _warmup_with_lock():
+            with _HTTP_REQUEST_LOCK:
+                _maybe_eager_warmup_embedder()
+
+        threading.Thread(
+            target=_warmup_with_lock,
+            name="mcp-http-eager-warmup",
+            daemon=True,
+        ).start()
+    elif raw_warmup and raw_warmup not in _WARMUP_FALSY:
+        # Keep the same warning behavior as stdio mode for typo values.
+        _maybe_eager_warmup_embedder()
+
+    _serve_http(_args.host, _args.port)
+
+
+def main():
+    """MCP server entry point for the ``mempalace-mcp`` console script.
+
+    Side effect: pops ``PYTHONPATH`` from ``os.environ`` (see #1423) so any
+    subprocess this server spawns inherits a clean env. Host applications that
+    call ``main()`` programmatically should be aware that the parent process
+    loses ``PYTHONPATH`` as well. Library imports do NOT trigger this side
+    effect; only the CLI/MCP entry point does.
+
+    Transports:
+    - ``stdio`` remains the default for existing Claude/MCP deployments.
+    - ``http`` is opt-in and serves JSON-RPC POSTs at ``/mcp`` in the same
+      process, avoiding the long-lived stdio framing failure surface from
+      #1801.
+    """
+
+    # Drop leaked PYTHONPATH so any subprocess this server spawns starts
+    # with a clean env. The sys.path filter in mempalace/__init__.py
+    # already protects this process from the same ABI mismatch; here we
+    # extend the protection to children.
+    os.environ.pop("PYTHONPATH", None)
+
+    if _args.transport == "http":
+        _run_http_loop()
+    else:
+        _run_stdio_loop()
+
+
 if __name__ == "__main__":
     main()
diff --git a/mempalace/miner.py b/mempalace/miner.py
index aed181070..538cdb23a 100644
--- a/mempalace/miner.py
+++ b/mempalace/miner.py
@@ -47,6 +47,34 @@
 
 logger = logging.getLogger("mempalace_mcp")
 
+PHP_EXTENSIONS = {
+    # Compound Blade templates such as ``view.blade.php`` are covered by the
+    # final ``.php`` suffix.
+    ".php",
+    ".php3",
+    ".php4",
+    ".php5",
+    ".php7",
+    ".php8",
+    ".phtml",
+    ".phps",
+    ".phpt",
+    ".inc",
+    ".aw",
+    ".fcgi",
+    ".ctp",
+    ".module",
+    ".install",
+    ".profile",
+    ".theme",
+    ".engine",
+    ".twig",
+    ".blade",
+    ".tpl",
+    ".latte",
+    ".volt",
+}
+
 READABLE_EXTENSIONS = {
     ".txt",
     ".md",
@@ -64,12 +92,21 @@
     ".java",
     ".go",
     ".rs",
+    ".swift",
+    ".kt",
+    ".kts",
     ".rb",
     ".sh",
     ".csv",
     ".sql",
     ".toml",
-}
+    # C# / .NET
+    ".cs",
+    ".csproj",
+    ".sln",
+    ".razor",
+    ".cshtml",
+} | PHP_EXTENSIONS
 
 SKIP_FILENAMES = {
     "entities.json",
diff --git a/mempalace/normalize.py b/mempalace/normalize.py
index 9f9fe0a90..53c186af6 100644
--- a/mempalace/normalize.py
+++ b/mempalace/normalize.py
@@ -9,6 +9,9 @@
     - Claude Code JSONL (with tool_use/tool_result block capture)
     - OpenAI Codex CLI JSONL
     - Gemini CLI JSONL (~/.gemini/tmp/<project_hash>/chats/session-*.jsonl)
+    - Pi agent JSONL
+    - Gemini CLI / Google AI Studio JSON sessions (contents / messages / flat list)
+    - Continue.dev session JSON (~/.continue/sessions/*.json)
     - Slack JSON export
     - Plain text (pass through for paragraph chunking)
 
@@ -162,12 +165,22 @@ def _try_normalize_json(content: str) -> Optional[str]:
     if normalized:
         return normalized
 
+    normalized = _try_pi_jsonl(content)
+    if normalized:
+        return normalized
+
     try:
         data = json.loads(content)
     except json.JSONDecodeError:
         return None
 
-    for parser in (_try_claude_ai_json, _try_chatgpt_json, _try_slack_json):
+    for parser in (
+        _try_gemini_json,
+        _try_claude_ai_json,
+        _try_chatgpt_json,
+        _try_continue_json,
+        _try_slack_json,
+    ):
         normalized = parser(data)
         if normalized:
             return normalized
@@ -353,6 +366,135 @@ def _try_gemini_jsonl(content: str) -> Optional[str]:
     return None
 
 
+def _try_pi_jsonl(content: str) -> Optional[str]:
+    """Pi agent sessions (~/.config/pi/agent/sessions/{cwd}/{timestamp}_{uuid}.jsonl).
+
+    Pi stores sessions as JSONL with a tree-structured message history.
+    User messages have role "user" with content as string or [{type, text}] blocks.
+    Assistant messages have role "assistant" with content as [{type, text}] blocks
+    (may also include "thinking" blocks which are skipped by _extract_content).
+    Tool results (role "toolResult") are skipped — operational, not conversation.
+
+    Format documented at github.com/badlogic/pi-mono session.md.
+    """
+    lines = [line.strip() for line in content.strip().split("\n") if line.strip()]
+    messages = []
+    has_session_header = False
+    for line in lines:
+        try:
+            entry = json.loads(line)
+        except json.JSONDecodeError:
+            continue
+        if not isinstance(entry, dict):
+            continue
+
+        entry_type = entry.get("type", "")
+        if entry_type == "session" and "version" in entry:
+            has_session_header = True
+            continue
+
+        if entry_type != "message":
+            continue
+
+        message = entry.get("message", {})
+        if not isinstance(message, dict):
+            continue
+
+        role = message.get("role", "")
+        text = _extract_content(message.get("content", ""))
+
+        if role == "user" and text:
+            messages.append(("user", text))
+        elif role == "assistant" and text:
+            messages.append(("assistant", text))
+
+    if len(messages) >= 2 and has_session_header:
+        return _messages_to_transcript(messages)
+    return None
+
+
+def _try_gemini_json(data) -> Optional[str]:
+    """Gemini CLI / Google AI Studio JSON sessions.
+
+    Handles three layouts:
+
+    1. **Gemini API contents format** — used by Gemini CLI session files
+       (``~/.gemini/sessions/*.json``):
+       ``{"contents": [{"role": "user", "parts": [{"text": "..."}]}, ...]}``
+
+    2. **Messages wrapper** — exports that wrap the conversation under a
+       ``messages`` key:
+       ``{"messages": [{"role": "user", "content": "..."}, {"role": "model", "content": "..."}]}``
+
+    3. **Flat messages list** — top-level array form:
+       ``[{"role": "user", "content": "..."}, {"role": "model", "content": "..."}]``
+
+    Gemini uses ``"model"`` as the assistant role (not ``"assistant"``).
+    Detection requires at least one ``role="model"`` entry to disambiguate
+    from Claude/ChatGPT exports that use ``"assistant"``. This parser is
+    placed *before* ``_try_claude_ai_json`` in the dispatch chain so that
+    the layout-2 ``{"messages": [...]}`` wrapper does not get silently
+    claimed by the Claude parser, which would drop the model turns.
+    """
+    contents = None
+
+    # Layout 1: {"contents": [...]}
+    if isinstance(data, dict) and "contents" in data:
+        contents = data["contents"]
+    # Layout 2a: {"messages": [...]}
+    elif isinstance(data, dict) and "messages" in data:
+        contents = data["messages"]
+    # Layout 2b: top-level list
+    elif isinstance(data, list):
+        contents = data
+
+    if not isinstance(contents, list) or len(contents) < 2:
+        return None
+
+    messages = []
+    has_model_role = False
+    for item in contents:
+        if not isinstance(item, dict):
+            continue
+        role = item.get("role", "")
+
+        # Extract text — try "parts" first (Gemini API), then "content" (flat).
+        text = ""
+        parts = item.get("parts")
+        if isinstance(parts, list):
+            text_parts = []
+            for p in parts:
+                if isinstance(p, str):
+                    text_parts.append(p)
+                elif isinstance(p, dict) and "text" in p:
+                    text_parts.append(p["text"])
+            text = " ".join(text_parts).strip()
+        else:
+            text = _extract_content(item.get("content", ""))
+
+        if not text:
+            continue
+
+        if role == "user":
+            messages.append(("user", text))
+        elif role == "model":
+            messages.append(("assistant", text))
+            has_model_role = True
+        elif role == "assistant":
+            # Defensive: some hand-crafted exports use "assistant" even
+            # for Gemini sessions. Accept but don't flip has_model_role.
+            messages.append(("assistant", text))
+
+    # Disambiguator: must have seen at least one role="model" entry.
+    # This prevents the Gemini parser from claiming Claude/ChatGPT data.
+    if not has_model_role:
+        return None
+
+    if len(messages) >= 2:
+        return _messages_to_transcript(messages)
+    return None
+
+
 def _try_claude_ai_json(data) -> Optional[str]:
     """Claude.ai JSON export: flat messages list or privacy export with chat_messages."""
     if isinstance(data, dict):
@@ -485,6 +627,61 @@ def _try_slack_json(data) -> Optional[str]:
     return None
 
 
+def _try_continue_json(data) -> Optional[str]:
+    """Continue.dev session JSON (~/.continue/sessions/*.json).
+
+    Sessions contain a ``history`` array of ``{role, content}`` message objects,
+    plus optional metadata (``title``, ``sessionId``, ``dateCreated``).
+    System messages are skipped.  Tool-call messages (role ``tool``) are
+    formatted inline when they contain text content.
+    """
+    if not isinstance(data, dict) or "history" not in data:
+        return None
+    history = data["history"]
+    if not isinstance(history, list):
+        return None
+
+    messages = []
+    for item in history:
+        if not isinstance(item, dict):
+            continue
+        role = item.get("role", "")
+        content = item.get("content", "")
+
+        # Extract text from string or list-of-blocks content
+        if isinstance(content, list):
+            parts = []
+            for block in content:
+                if isinstance(block, dict):
+                    if block.get("type") == "text":
+                        parts.append(block.get("text", ""))
+                elif isinstance(block, str):
+                    parts.append(block)
+            text = "\n".join(p for p in parts if p).strip()
+        elif isinstance(content, str):
+            text = content.strip()
+        else:
+            continue
+
+        if not text:
+            continue
+
+        if role == "user":
+            messages.append(("user", text))
+        elif role == "assistant":
+            messages.append(("assistant", text))
+        elif role == "tool":
+            # Append tool output to the previous assistant turn if possible
+            if messages and messages[-1][0] == "assistant":
+                prev_role, prev_text = messages[-1]
+                messages[-1] = (prev_role, prev_text + "\n" + f"[tool] {text}")
+        # Skip system and other roles
+
+    if len(messages) >= 2:
+        return _messages_to_transcript(messages)
+    return None
+
+
 def _extract_content(content, tool_use_map: dict = None) -> str:
     """Pull text from content — handles str, list of blocks, or dict.
 
diff --git a/mempalace/project_scanner.py b/mempalace/project_scanner.py
index 521bfa296..d92b8167b 100644
--- a/mempalace/project_scanner.py
+++ b/mempalace/project_scanner.py
@@ -3,8 +3,8 @@
 
 For a codebase with build manifests or git history, this beats regex-based
 entity detection by a wide margin: the project's own name is already written
-down in package.json / pyproject.toml / Cargo.toml / go.mod, and the people
-who worked on it are in `git log`.
+down in package.json / pyproject.toml / Cargo.toml / go.mod / pom.xml /
+Gradle manifests, and the people who worked on it are in `git log`.
 
 This module is used as the primary signal in `mempalace init`. The regex
 detector in entity_detector.py stays as a fallback for prose-only folders
@@ -21,6 +21,7 @@
 import os
 import re
 import subprocess
+import xml.etree.ElementTree as ET
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional
@@ -164,11 +165,63 @@ def _parse_gomod(path: Path) -> Optional[str]:
     return None
 
 
+def _xml_local_name(tag: str) -> str:
+    return tag.rsplit("}", 1)[-1] if "}" in tag else tag
+
+
+def _parse_pom(path: Path) -> Optional[str]:
+    try:
+        root = ET.parse(path).getroot()
+    except (ET.ParseError, OSError):
+        return None
+    for child in root:
+        if isinstance(child.tag, str) and _xml_local_name(child.tag) == "artifactId":
+            name = (child.text or "").strip()
+            return name or None
+    return None
+
+
+_GRADLE_ROOT_PROJECT_NAME_PATTERNS = [
+    re.compile(r"""(?m)^\s*rootProject\.name\s*=\s*(["'])(?P<name>[^"']+)\1"""),
+    re.compile(r"""(?m)^\s*rootProject\.name\.set\(\s*(["'])(?P<name>[^"']+)\1\s*\)"""),
+]
+
+
+def _parse_gradle_root_project_name(path: Path) -> Optional[str]:
+    try:
+        text = path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return None
+    for pattern in _GRADLE_ROOT_PROJECT_NAME_PATTERNS:
+        match = pattern.search(text)
+        if match:
+            name = match.group("name").strip()
+            return name or None
+    return None
+
+
+def _parse_gradle(path: Path) -> Optional[str]:
+    if path.name.startswith("build.gradle"):
+        for settings_name in ("settings.gradle.kts", "settings.gradle"):
+            name = _parse_gradle_root_project_name(path.with_name(settings_name))
+            if name:
+                return name
+    name = _parse_gradle_root_project_name(path)
+    if name:
+        return name
+    return path.parent.name or None
+
+
 MANIFEST_PRIORITY = {
     "pyproject.toml": 0,
     "package.json": 1,
     "Cargo.toml": 2,
     "go.mod": 3,
+    "pom.xml": 4,
+    "settings.gradle": 5,
+    "settings.gradle.kts": 6,
+    "build.gradle": 7,
+    "build.gradle.kts": 8,
 }
 # Sentinel so unknown manifests always sort after the known manifest types above.
 UNKNOWN_MANIFEST_PRIORITY = max(MANIFEST_PRIORITY.values()) + 1
@@ -177,6 +230,18 @@ def _parse_gomod(path: Path) -> Optional[str]:
     "pyproject.toml": _parse_pyproject,
     "Cargo.toml": _parse_cargo,
     "go.mod": _parse_gomod,
+    "pom.xml": _parse_pom,
+    "settings.gradle": _parse_gradle,
+    "settings.gradle.kts": _parse_gradle,
+    "build.gradle": _parse_gradle,
+    "build.gradle.kts": _parse_gradle,
+}
+JAVA_MANIFESTS = {
+    "pom.xml",
+    "settings.gradle",
+    "settings.gradle.kts",
+    "build.gradle",
+    "build.gradle.kts",
 }
 
 
@@ -461,10 +526,12 @@ def scan(root: str | os.PathLike) -> tuple[list[ProjectInfo], list[PersonInfo]]:
 
     for repo in repos:
         manifests = _collect_manifest_names(repo)
-        if manifests:
-            manifest_file, proj_name, _ = manifests[0]
+        root_manifest = next((entry for entry in manifests if entry[2] == repo), None)
+        if root_manifest:
+            manifest_file, proj_name, _ = root_manifest
         else:
             manifest_file, proj_name = None, repo.name
+        extra_manifests = [entry for entry in manifests if entry != root_manifest]
 
         authors = _git_authors(repo)
         non_bot_authors = [(name, email) for name, email in authors if not _is_bot(name, email)]
@@ -501,17 +568,35 @@ def scan(root: str | os.PathLike) -> tuple[list[ProjectInfo], list[PersonInfo]]:
         if existing is None or proj.user_commits > existing.user_commits:
             projects[proj_name] = proj
 
+        for extra_manifest, extra_name, extra_dir in extra_manifests:
+            if extra_manifest not in JAVA_MANIFESTS:
+                continue
+            existing = projects.get(extra_name)
+            if existing is not None and (
+                existing.manifest is not None or existing.repo_root != repo
+            ):
+                continue
+            projects[extra_name] = ProjectInfo(
+                name=extra_name,
+                repo_root=extra_dir,
+                manifest=extra_manifest,
+                has_git=True,
+                total_commits=total_commits,
+                user_commits=user_commits,
+                is_mine=is_mine,
+            )
+
     people = _dedupe_people(all_commits)
 
     # Handle case: root has manifests but no git repo anywhere
     if not repos:
         manifests = _collect_manifest_names(root_path)
-        for manifest_file, proj_name, _dirpath in manifests:
+        for manifest_file, proj_name, dirpath in manifests:
             if proj_name in projects:
                 continue
             projects[proj_name] = ProjectInfo(
                 name=proj_name,
-                repo_root=root_path,
+                repo_root=dirpath,
                 manifest=manifest_file,
                 has_git=False,
             )
@@ -605,8 +690,8 @@ def discover_entities(
     plugs into ``confirm_entities`` unchanged.
 
     Order of signal preference:
-      1. Package manifests (package.json, pyproject.toml, Cargo.toml, go.mod)
-         → canonical project names
+      1. Package manifests (package.json, pyproject.toml, Cargo.toml, go.mod,
+         pom.xml, Gradle manifests) → canonical project names
       2. Git commit authors → real people with real commit counts
       3. Claude Code conversation dirs (~/.claude/projects/) → per-session
          project names (pulled from each session's ``cwd`` metadata)
diff --git a/mempalace/repair.py b/mempalace/repair.py
index 46de6228d..8319e3618 100644
--- a/mempalace/repair.py
+++ b/mempalace/repair.py
@@ -43,6 +43,7 @@
 from chromadb.errors import NotFoundError as ChromaNotFoundError
 
 from .backends.chroma import ChromaBackend, hnsw_capacity_status
+from .config import sqlite_read_uri
 
 
 COLLECTION_NAME = "mempalace_drawers"
@@ -476,7 +477,7 @@ def sqlite_drawer_count(palace_path: str, collection_name: Optional[str] = None)
     try:
         import sqlite3
 
-        conn = sqlite3.connect(f"file:{sqlite_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(sqlite_path), uri=True)
         try:
             row = conn.execute(
                 """
@@ -516,7 +517,7 @@ def sqlite_integrity_errors(palace_path: str) -> list[str]:
         return []
 
     try:
-        with sqlite3.connect(f"file:{sqlite_path}?mode=ro", uri=True) as conn:
+        with sqlite3.connect(sqlite_read_uri(sqlite_path), uri=True) as conn:
             rows = conn.execute("PRAGMA quick_check").fetchall()
     except sqlite3.Error as e:
         return [f"PRAGMA quick_check failed: {e}"]
@@ -563,6 +564,40 @@ def print_sqlite_integrity_abort(palace_path: str, errors: list[str]) -> None:
     print("    6. Re-run `mempalace repair --yes`.")
 
 
+def index_read_recovery_guidance() -> str:
+    """Recovery guidance for a failed drawer-index read in the legacy paths.
+
+    Both ``cmd_repair`` (cli.py) and :func:`rebuild_index` read the drawers
+    collection via ``Collection.count()`` as their first step. The common
+    reason that read raises is the chromadb compactor failing to apply the
+    WAL into the HNSW segment (``InternalError: Failed to apply logs to the
+    hnsw segment writer``, issues #1308 / #1843): the on-disk HNSW index is
+    corrupt while the rows stay intact in ``chroma.sqlite3``, so
+    :func:`rebuild_from_sqlite` (``repair --mode from-sqlite``) recovers them
+    and re-mining would needlessly drop drawers added through the MCP server
+    and diary entries that have no source file.
+
+    The other thing that strands this read is a live MemPalace server or
+    mine still holding the palace open, so the guidance says to stop it and
+    retry before assuming corruption. Worded conditionally because the bare
+    ``except Exception`` cannot prove which case it caught. Returned as a
+    pre-indented block so the ``print``-based CLI path and the
+    ``progress``-callable rebuild path emit it unchanged.
+    """
+    return (
+        "  If a MemPalace server or mine is still running against this palace,\n"
+        "  stop it and retry. Otherwise the drawer index is likely corrupt\n"
+        "  (for example a failed chromadb HNSW compaction) while your drawer\n"
+        "  rows remain in chroma.sqlite3. Rebuild the index from SQLite rather\n"
+        "  than re-mining:\n"
+        "\n"
+        "      mempalace repair --mode from-sqlite --archive-existing\n"
+        "\n"
+        "  (Re-mining from source files would drop drawers added via the MCP\n"
+        "  server and diary entries, which have no source file.)"
+    )
+
+
 def maybe_repair_poisoned_max_seq_id_before_rebuild(
     palace_path: str,
     *,
@@ -791,7 +826,7 @@ def rebuild_index(
         total = col.count()
     except Exception as e:
         progress(f"  Error reading palace: {e}")
-        progress("  Palace may need to be re-mined from source files.")
+        progress(index_read_recovery_guidance())
         return
 
     progress(f"  Drawers found: {total}")
@@ -1013,7 +1048,7 @@ def extract_via_sqlite(palace_path: str, collection_name: str) -> Iterator[tuple
     if not os.path.isfile(sqlite_path):
         return
 
-    conn = sqlite3.connect(f"file:{sqlite_path}?mode=ro", uri=True)
+    conn = sqlite3.connect(sqlite_read_uri(sqlite_path), uri=True)
     try:
         seg_row = conn.execute(
             """
@@ -1059,6 +1094,37 @@ def extract_via_sqlite(palace_path: str, collection_name: str) -> Iterator[tuple
         conn.close()
 
 
+def _preserve_knowledge_graph_sqlite(source_palace: str, dest_palace: str) -> list[str]:
+    """Copy KG SQLite sidecars when rebuilding a palace from chroma.sqlite3.
+
+    rebuild_from_sqlite reconstructs Chroma collections into a fresh
+    destination directory. The knowledge graph is a separate SQLite database,
+    so it must be copied explicitly or the repair succeeds while silently
+    dropping KG state (#1816).
+    """
+
+    copied: list[str] = []
+
+    for suffix in ("", "-wal", "-shm"):
+        filename = f"knowledge_graph.sqlite3{suffix}"
+        src = os.path.join(source_palace, filename)
+        dst = os.path.join(dest_palace, filename)
+
+        if not os.path.isfile(src):
+            continue
+        if os.path.abspath(src) == os.path.abspath(dst):
+            continue
+
+        os.makedirs(dest_palace, exist_ok=True)
+        shutil.copy2(src, dst)
+        copied.append(filename)
+
+    if copied:
+        print(" Preserved knowledge graph: " + ", ".join(copied))
+
+    return copied
+
+
 def rebuild_from_sqlite(
     source_palace: str,
     dest_palace: str,
@@ -1205,6 +1271,7 @@ def rebuild_from_sqlite(
             )
 
     os.makedirs(dest_palace, exist_ok=True)
+    _preserve_knowledge_graph_sqlite(source_palace, dest_palace)
 
     # Backend lifetime is wrapped in try/finally so the dest palace's
     # PersistentClient handle (opened lazily inside ``create_collection``
@@ -1308,7 +1375,15 @@ def status(palace_path=None, collection_name: Optional[str] = None) -> dict:
             print(f"    note:           {info['message']}")
 
     if drawers["diverged"] or closets["diverged"]:
-        print("\n  Recommended: run `mempalace repair` to rebuild the index.")
+        print(
+            "\n  Recommended: rebuild the index from SQLite rather than re-mining:\n"
+            "\n      mempalace repair --mode from-sqlite --archive-existing\n"
+            "\n  A diverged index usually means the HNSW segment is out of sync with\n"
+            "  chroma.sqlite3 (for example a failed chromadb HNSW compaction). The\n"
+            "  drawer rows are intact in SQLite, so --mode from-sqlite recovers them.\n"
+            "  Do not re-mine from source files: that would drop drawers added via\n"
+            "  the MCP server and diary entries, which have no source file (#1843)."
+        )
     print()
     return {"drawers": drawers, "closets": closets}
 
diff --git a/mempalace/searcher.py b/mempalace/searcher.py
index 43796c322..eac488f13 100644
--- a/mempalace/searcher.py
+++ b/mempalace/searcher.py
@@ -23,6 +23,7 @@
     PalaceNotFoundError,
     UnsupportedCapabilityError,
 )
+from .config import sqlite_read_uri
 from .palace import (
     _open_collection_or_explain,
     get_closets_collection,
@@ -225,15 +226,24 @@ def _hybrid_rank(
     return results
 
 
-def build_where_filter(wing: str = None, room: str = None) -> dict:
-    """Build ChromaDB where filter for wing/room filtering."""
-    if wing and room:
-        return {"$and": [{"wing": wing}, {"room": room}]}
-    elif wing:
-        return {"wing": wing}
-    elif room:
-        return {"room": room}
-    return {}
+def build_where_filter(wing: str = None, room: str = None, source_file: str = None) -> dict:
+    """Build a ChromaDB where filter from optional wing/room/source_file.
+
+    ChromaDB needs a ``$and`` only when ≥2 clauses are present; a single
+    clause is returned bare and zero clauses yield an empty filter (#1815).
+    """
+    clauses = []
+    if wing:
+        clauses.append({"wing": wing})
+    if room:
+        clauses.append({"room": room})
+    if source_file:
+        clauses.append({"source_file": source_file})
+    if not clauses:
+        return {}
+    if len(clauses) == 1:
+        return clauses[0]
+    return {"$and": clauses}
 
 
 def _extract_drawer_ids_from_closet(closet_doc: str) -> list:
@@ -475,6 +485,7 @@ def _bm25_only_via_sqlite(
     palace_path: str,
     wing: str = None,
     room: str = None,
+    source_file: str = None,
     n_results: int = 5,
     max_candidates: int = 500,
     _include_internal: bool = False,
@@ -510,7 +521,7 @@ def _bm25_only_via_sqlite(
     def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
         clauses = []
         params = []
-        for key, value in (("wing", wing), ("room", room)):
+        for key, value in (("wing", wing), ("room", room), ("source_file", source_file)):
             if not value:
                 continue
             clauses.append(
@@ -533,7 +544,7 @@ def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
         return "".join(clauses), params
 
     try:
-        conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
+        conn = sqlite3.connect(sqlite_read_uri(db_path), uri=True)
     except sqlite3.Error as e:
         return {"error": f"sqlite open failed: {e}"}
 
@@ -623,7 +634,7 @@ def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
         if not candidate_ids:
             return {
                 "query": query,
-                "filters": {"wing": wing, "room": room},
+                "filters": {"wing": wing, "room": room, "source_file": source_file},
                 "total_before_filter": 0,
                 "results": [],
                 "fallback": "bm25_only_via_sqlite",
@@ -659,6 +670,8 @@ def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
             continue
         if room and meta.get("room") != room:
             continue
+        if source_file and meta.get("source_file") != source_file:
+            continue
         full_source = meta.get("source_file", "") or ""
         candidates.append(
             {
@@ -666,6 +679,7 @@ def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
                 "wing": meta.get("wing", "unknown"),
                 "room": meta.get("room", "unknown"),
                 "source_file": Path(full_source).name if full_source else "?",
+                "source_path": full_source,
                 "created_at": meta.get("filed_at", "unknown"),
                 # No vector distance available in BM25-only mode.
                 "similarity": None,
@@ -701,7 +715,7 @@ def _metadata_filter_sql(row_id_expr: str) -> tuple[str, list[str]]:
 
     return {
         "query": query,
-        "filters": {"wing": wing, "room": room},
+        "filters": {"wing": wing, "room": room, "source_file": source_file},
         "total_before_filter": len(candidates),
         "results": hits,
         "fallback": "bm25_only_via_sqlite",
@@ -717,6 +731,7 @@ def _merge_bm25_union_candidates(
     room: str,
     n_results: int,
     max_distance: float = 0.0,
+    source_file: str = None,
 ) -> None:
     """Append top-K backend lexical candidates into ``hits`` in place.
 
@@ -743,7 +758,7 @@ def _merge_bm25_union_candidates(
     if max_distance > 0.0:
         return
 
-    where = build_where_filter(wing, room)
+    where = build_where_filter(wing, room, source_file)
     try:
         lexical = drawers_col.lexical_search(
             query=query,
@@ -766,6 +781,7 @@ def _merge_bm25_union_candidates(
                 "wing": meta.get("wing", "unknown"),
                 "room": meta.get("room", "unknown"),
                 "source_file": Path(full_source).name if full_source else "?",
+                "source_path": full_source,
                 "created_at": meta.get("filed_at", "unknown"),
                 "similarity": None,
                 "distance": None,
@@ -831,6 +847,7 @@ def _apply_candidate_strategy(
     room: str,
     n_results: int,
     max_distance: float = 0.0,
+    source_file: str = None,
 ) -> None:
     """Dispatch to the registered merger for ``strategy``.
 
@@ -839,7 +856,16 @@ def _apply_candidate_strategy(
     """
     merger = _CANDIDATE_MERGERS[strategy]
     if merger is not None:
-        merger(hits, drawers_col, query, wing, room, n_results, max_distance=max_distance)
+        merger(
+            hits,
+            drawers_col,
+            query,
+            wing,
+            room,
+            n_results,
+            max_distance=max_distance,
+            source_file=source_file,
+        )
 
 
 def _finalize_candidate_hits(
@@ -852,6 +878,7 @@ def _finalize_candidate_hits(
     room: str,
     n_results: int,
     max_distance: float,
+    source_file: str = None,
 ) -> tuple:
     try:
         _apply_candidate_strategy(
@@ -863,6 +890,7 @@ def _finalize_candidate_hits(
             room,
             n_results,
             max_distance=max_distance,
+            source_file=source_file,
         )
     except UnsupportedCapabilityError:
         return [], {
@@ -904,6 +932,7 @@ def _vector_disabled_search(
     room: str,
     n_results: int,
     collection_name: str,
+    source_file: str = None,
 ) -> dict:
     try:
         backend_name = resolve_backend_name(palace_path)
@@ -923,6 +952,7 @@ def _vector_disabled_search(
         palace_path,
         wing=wing,
         room=room,
+        source_file=source_file,
         n_results=n_results,
         collection_name=collection_name,
     )
@@ -956,7 +986,9 @@ def _open_search_collection(palace_path: str, collection_name: str):
         }
 
 
-def _query_drawers_with_filter_fallback(drawers_col, dkwargs, query, n_results, wing, room):
+def _query_drawers_with_filter_fallback(
+    drawers_col, dkwargs, query, n_results, wing, room, source_file=None
+):
     """Run the filtered drawer query, falling back to an unfiltered query plus a
     Python-side post-filter when ChromaDB raises on the filtered query.
 
@@ -964,7 +996,7 @@ def _query_drawers_with_filter_fallback(drawers_col, dkwargs, query, n_results,
     "Error finding id" even when unfiltered search works fine — it happens when
     drawers are ingested via two different paths (e.g. bulk import vs MCP tool
     calls), leaving the vector index inconsistent with the metadata store. We
-    retry unfiltered (over-fetching) and re-apply the wing/room filter in Python.
+    retry unfiltered (over-fetching) and re-apply the wing/room/source_file filter in Python.
     See #1245 / #1035.
     """
     where = dkwargs.get("where")
@@ -993,6 +1025,8 @@ def _query_drawers_with_filter_fallback(drawers_col, dkwargs, query, n_results,
                 continue
             if room and meta.get("room") != room:
                 continue
+            if source_file and meta.get("source_file") != source_file:
+                continue
             fdocs.append(doc)
             fmetas.append(meta)
             fdists.append(dist)
@@ -1004,6 +1038,7 @@ def search_memories(
     palace_path: str,
     wing: str = None,
     room: str = None,
+    source_file: str = None,
     n_results: int = 5,
     max_distance: float = 0.0,
     vector_disabled: bool = False,
@@ -1019,6 +1054,8 @@ def search_memories(
         palace_path: Path to the ChromaDB palace directory.
         wing: Optional wing filter.
         room: Optional room filter.
+        source_file: Optional exact source_file filter. Matches the full
+            stored source_file value verbatim (#1815).
         n_results: Max results to return.
         max_distance: Max cosine distance threshold. The palace collection uses
             cosine distance (hnsw:space=cosine) — 0 = identical, 2 = opposite.
@@ -1058,6 +1095,7 @@ def search_memories(
             room=room,
             n_results=n_results,
             collection_name=collection_name,
+            source_file=source_file,
         )
 
     drawers_col, open_error = _open_search_collection(palace_path, collection_name)
@@ -1065,7 +1103,7 @@ def search_memories(
         return open_error
 
     metric = _metric_for_collection(drawers_col)
-    where = build_where_filter(wing, room)
+    where = build_where_filter(wing, room, source_file)
 
     # Hybrid retrieval: always query drawers directly (the floor), then use
     # closet hits to boost rankings. Closets are a ranking SIGNAL, never a
@@ -1083,7 +1121,7 @@ def search_memories(
         if where:
             dkwargs["where"] = where
         drawer_results = _query_drawers_with_filter_fallback(
-            drawers_col, dkwargs, query, n_results, wing, room
+            drawers_col, dkwargs, query, n_results, wing, room, source_file
         )
     except Exception as e:
         return {"error": f"Search error: {e}"}
@@ -1155,7 +1193,10 @@ def search_memories(
             "text": doc,
             "wing": meta.get("wing", "unknown"),
             "room": meta.get("room", "unknown"),
+            # source_file is the basename (display); source_path is the full
+            # stored value, the round-trippable key for the source_file filter.
             "source_file": Path(source).name if source else "?",
+            "source_path": source,
             "created_at": meta.get("filed_at", "unknown"),
             "similarity": round(_distance_to_similarity(effective_dist, metric), 3),
             "distance": round(dist, 4),
@@ -1253,13 +1294,14 @@ def search_memories(
         room=room,
         n_results=n_results,
         max_distance=max_distance,
+        source_file=source_file,
     )
     if strategy_error:
         return strategy_error
 
     return {
         "query": query,
-        "filters": {"wing": wing, "room": room},
+        "filters": {"wing": wing, "room": room, "source_file": source_file},
         "total_before_filter": len(_first_or_empty(drawer_results, "documents")),
         "results": hits,
     }
diff --git a/mempalace/service.py b/mempalace/service.py
new file mode 100644
index 000000000..fc0bccaf8
--- /dev/null
+++ b/mempalace/service.py
@@ -0,0 +1,404 @@
+"""Shared service operations used by daemon-backed entry points.
+
+The MCP server remains the owner of MCP transport details. This module owns the
+small, transport-neutral execution surface the daemon needs: classify known
+tools and execute durable background jobs without printing directly to the
+caller's terminal.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import io
+import os
+import sys
+from typing import Any
+
+from .config import MempalaceConfig
+
+_EXPLICIT_BACKEND_ENV = "MEMPALACE_BACKEND_EXPLICIT"
+_PALACE_PATH_ENV = "MEMPALACE_PALACE_PATH"
+_BACKEND_ENV = "MEMPALACE_BACKEND"
+# Env vars a job may mutate via _apply_backend / palace_path injection. They are
+# snapshotted per job and restored afterward so a job that switches the backend
+# (e.g. qdrant) cannot poison every later job in the same daemon process —
+# including mcp_tool jobs, which read MempalaceConfig (and thus the leaked env).
+_PER_JOB_ENV = (_PALACE_PATH_ENV, _BACKEND_ENV, _EXPLICIT_BACKEND_ENV)
+
+
+READ_TOOLS = frozenset(
+    {
+        "mempalace_status",
+        "mempalace_list_wings",
+        "mempalace_list_rooms",
+        "mempalace_get_taxonomy",
+        "mempalace_get_aaak_spec",
+        "mempalace_traverse",
+        "mempalace_find_tunnels",
+        "mempalace_graph_stats",
+        "mempalace_list_tunnels",
+        "mempalace_list_hallways",
+        "mempalace_follow_tunnels",
+        "mempalace_search",
+        "mempalace_check_duplicate",
+        "mempalace_get_drawer",
+        "mempalace_list_drawers",
+        "mempalace_diary_read",
+        "mempalace_memories_filed_away",
+        "mempalace_kg_query",
+        "mempalace_kg_stats",
+        "mempalace_kg_timeline",
+    }
+)
+
+WRITE_TOOLS = frozenset(
+    {
+        "mempalace_add_drawer",
+        "mempalace_checkpoint",
+        "mempalace_delete_drawer",
+        "mempalace_update_drawer",
+        "mempalace_diary_write",
+        "mempalace_kg_add",
+        "mempalace_kg_invalidate",
+        "mempalace_create_tunnel",
+        "mempalace_delete_tunnel",
+        "mempalace_delete_hallway",
+        "mempalace_hook_settings",
+    }
+)
+
+MAINTENANCE_TOOLS = frozenset({"mempalace_mine", "mempalace_sync", "mempalace_reconnect"})
+
+
+def classify_tool(name: str) -> str:
+    """Return ``read``, ``write``, ``maintenance``, or ``unknown`` for an MCP tool."""
+    if name in READ_TOOLS:
+        return "read"
+    if name in WRITE_TOOLS:
+        return "write"
+    if name in MAINTENANCE_TOOLS:
+        return "maintenance"
+    return "unknown"
+
+
+def _apply_backend(backend: str | None) -> None:
+    if not backend:
+        return
+    backend_name = str(backend).strip().lower()
+    from .backends import get_backend_class
+
+    get_backend_class(backend_name)
+    os.environ[_EXPLICIT_BACKEND_ENV] = backend_name
+    os.environ[_BACKEND_ENV] = backend_name
+
+
+def _capture(fn):
+    stdout = io.StringIO()
+    stderr = io.StringIO()
+    with contextlib.redirect_stdout(stdout), contextlib.redirect_stderr(stderr):
+        result = fn()
+    return result, stdout.getvalue(), stderr.getvalue()
+
+
+def execute_job(kind: str, payload: dict[str, Any]) -> dict[str, Any]:
+    """Execute one daemon job and return a JSON-serializable result."""
+
+    def _run():
+        if kind == "mine":
+            return run_mine(payload)
+        if kind == "sync":
+            return run_sync(payload)
+        if kind == "diary_write":
+            return run_diary_write(payload)
+        if kind == "mcp_tool":
+            return run_mcp_tool(payload)
+        return {"success": False, "error": f"unknown daemon job kind: {kind}", "exit_code": 2}
+
+    # Per-job env isolation: snapshot the backend/palace env vars and restore
+    # them after the job so one job's _apply_backend / palace_path injection
+    # can't leak into the next job in the same long-lived process.
+    saved_env = {key: os.environ.get(key) for key in _PER_JOB_ENV}
+    try:
+        result, stdout, stderr = _capture(_run)
+    finally:
+        for key, value in saved_env.items():
+            if value is None:
+                os.environ.pop(key, None)
+            else:
+                os.environ[key] = value
+    if result is None:
+        result = {}
+    if not isinstance(result, dict):
+        result = {"success": True, "value": result}
+    result.setdefault("success", True)
+    result.setdefault("exit_code", 0 if result.get("success") else 1)
+    if stdout:
+        result["stdout"] = stdout
+    if stderr:
+        result["stderr"] = stderr
+    return result
+
+
+def run_mine(payload: dict[str, Any]) -> dict[str, Any]:
+    """Run the same mine operation as the CLI, without daemon transport concerns."""
+    palace_path = os.path.abspath(
+        os.path.expanduser(payload.get("palace_path") or MempalaceConfig().palace_path)
+    )
+    os.environ["MEMPALACE_PALACE_PATH"] = palace_path
+    _apply_backend(payload.get("backend"))
+
+    source = payload.get("source") or payload.get("dir")
+    mode = payload.get("mode") or "projects"
+    wing = payload.get("wing")
+    agent = payload.get("agent") or "mempalace"
+    limit = int(payload.get("limit") or 0)
+    dry_run = bool(payload.get("dry_run"))
+
+    if payload.get("redetect_origin"):
+        from .cli import _run_pass_zero
+
+        _run_pass_zero(project_dir=source, palace_dir=palace_path, llm_provider=None)
+
+    from .palace import MineAlreadyRunning, MineValidationError
+
+    try:
+        if mode == "convos":
+            from .convo_miner import mine_convos
+
+            mine_convos(
+                convo_dir=source,
+                palace_path=palace_path,
+                wing=wing,
+                agent=agent,
+                limit=limit,
+                dry_run=dry_run,
+                extract_mode=payload.get("extract") or "exchange",
+            )
+        elif mode == "extract":
+            from .format_miner import mine_formats
+
+            mine_formats(
+                format_dir=source,
+                palace_path=palace_path,
+                wing=wing,
+                agent=agent,
+                limit=limit,
+                dry_run=dry_run,
+            )
+        elif mode == "projects":
+            include_ignored = payload.get("include_ignored") or []
+            from .miner import mine
+
+            mine(
+                project_dir=source,
+                palace_path=palace_path,
+                wing_override=wing,
+                agent=agent,
+                limit=limit,
+                dry_run=dry_run,
+                respect_gitignore=not bool(payload.get("no_gitignore")),
+                include_ignored=include_ignored,
+                max_chunks_per_file=payload.get("max_chunks_per_file"),
+            )
+        else:
+            return {"success": False, "error": f"invalid mine mode: {mode}", "exit_code": 2}
+    except MineAlreadyRunning as exc:
+        return {
+            "success": False,
+            "error": str(exc),
+            "error_class": "LockHeldByOtherProcess",
+            "exit_code": 1,
+        }
+    except MineValidationError as exc:
+        return {
+            "success": False,
+            "error": str(exc),
+            "error_class": "MineValidationError",
+            "exit_code": 1,
+        }
+    except SystemExit as exc:
+        code = exc.code if isinstance(exc.code, int) else 1
+        return {
+            "success": code == 0,
+            "error": str(exc),
+            "error_class": "SystemExit",
+            "exit_code": code,
+        }
+    except Exception as exc:
+        return {"success": False, "error": f"mine failed: {exc}", "exit_code": 1}
+
+    return {"success": True, "kind": "mine", "mode": mode, "dry_run": dry_run, "exit_code": 0}
+
+
+def run_sync(payload: dict[str, Any]) -> dict[str, Any]:
+    """Run sync and render the same operator-facing summary shape as the CLI."""
+    palace_path = os.path.abspath(
+        os.path.expanduser(payload.get("palace_path") or MempalaceConfig().palace_path)
+    )
+    os.environ["MEMPALACE_PALACE_PATH"] = palace_path
+    _apply_backend(payload.get("backend"))
+
+    from .backends import detect_backend_for_path
+    from .palace import MineAlreadyRunning, _backend_artifact_label, resolve_backend_name
+
+    if not os.path.isdir(palace_path):
+        print(f"\n  No palace found at {palace_path}")
+        return {"success": True, "exit_code": 0}
+
+    try:
+        backend_name = resolve_backend_name(palace_path)
+    except Exception as exc:
+        return {
+            "success": False,
+            "error": f"Could not resolve palace backend: {exc}",
+            "exit_code": 1,
+        }
+
+    if detect_backend_for_path(palace_path) is None:
+        print(
+            f"\n  Palace dir at {palace_path} exists but has no "
+            f"{_backend_artifact_label(backend_name)} yet."
+        )
+        print("  Run: mempalace mine <dir>")
+        return {"success": True, "exit_code": 0}
+
+    project_dirs = []
+    if payload.get("dir"):
+        project_dirs.append(os.path.expanduser(str(payload["dir"])))
+    project_dirs.extend(os.path.expanduser(str(root)) for root in payload.get("root") or [])
+    project_dirs = project_dirs or None
+    dry_run = bool(payload.get("dry_run", True))
+
+    print(f"\n{'=' * 55}")
+    print("  MemPalace Sync — Gitignore-aware drawer prune")
+    print(f"{'=' * 55}")
+    print(f"  Palace:   {palace_path}")
+    if payload.get("wing"):
+        print(f"  Wing:     {payload['wing']}")
+    if project_dirs:
+        for project_dir in project_dirs:
+            print(f"  Project:  {project_dir}")
+    print(
+        "  Mode:     DRY RUN (no deletions)" if dry_run else "  Mode:     APPLY (deleting drawers)"
+    )
+    print(f"{'-' * 55}\n")
+
+    try:
+        from .sync import sync_palace
+        from .wal import _wal_log
+
+        report = sync_palace(
+            palace_path=palace_path,
+            project_dirs=project_dirs,
+            wing=payload.get("wing"),
+            dry_run=dry_run,
+            wal_log=_wal_log,
+        )
+    except MineAlreadyRunning as exc:
+        return {
+            "success": False,
+            "error": str(exc),
+            "error_class": "LockHeldByOtherProcess",
+            "exit_code": 1,
+        }
+    except ValueError as exc:
+        return {"success": False, "error": str(exc), "exit_code": 2}
+    except Exception as exc:
+        return {"success": False, "error": f"sync failed: {exc}", "exit_code": 1}
+
+    removed_suffix = "(would remove)" if dry_run else "(removed)"
+    print(f"  Scanned:        {report['scanned']}")
+    print(f"  Kept:           {report['kept']}")
+    print(f"  Gitignored:     {report['gitignored']}  {removed_suffix}")
+    print(f"  Missing:        {report['missing']}  {removed_suffix}")
+    print(f"  No source:      {report['no_source']}  (kept)")
+    print(f"  Out of scope:   {report['out_of_scope']}  (kept)")
+
+    by_source = report.get("by_source") or {}
+    if by_source:
+        top = sorted(by_source.items(), key=lambda kv: -kv[1])[:5]
+        label = "Top sources to remove" if dry_run else "Top sources removed"
+        print(f"\n  {label}:")
+        for src, n in top:
+            print(f"    {src}  ({n})")
+
+    if dry_run:
+        if report["gitignored"] + report["missing"] > 0:
+            print("\n  Re-run with --apply to commit these deletions.")
+    else:
+        print(
+            f"\n  Removed {report['removed_drawers']} drawers, {report['removed_closets']} closets."
+        )
+
+    print(f"\n{'=' * 55}\n")
+    return {"success": True, "report": report, "exit_code": 0}
+
+
+def run_diary_write(payload: dict[str, Any]) -> dict[str, Any]:
+    palace_path = payload.get("palace_path")
+    if palace_path:
+        os.environ["MEMPALACE_PALACE_PATH"] = os.path.abspath(os.path.expanduser(palace_path))
+    _apply_backend(payload.get("backend"))
+
+    from .mcp_server import tool_diary_write
+
+    result = tool_diary_write(
+        agent_name=payload.get("agent_name") or "mempalace",
+        entry=payload.get("entry") or "",
+        topic=payload.get("topic") or "general",
+        wing=payload.get("wing") or "",
+    )
+    result.setdefault("exit_code", 0 if result.get("success") else 1)
+    return result
+
+
+def run_mcp_tool(payload: dict[str, Any]) -> dict[str, Any]:
+    """Execute an MCP tool by name over the daemon queue.
+
+    The daemon is a durable, retried write surface — not a general MCP transport.
+    Restrict ``mcp_tool`` to write-classified tools only: read tools would
+    exfiltrate verbatim palace content into the queue DB and the job result
+    (stored world-readable-by-default without the perms fix, and returned over
+    /jobs), and maintenance tools already have their own dedicated kinds
+    (mine/sync). No internal caller currently uses ``mcp_tool``; this allowlist
+    bounds the blast radius of the generic escape hatch.
+    """
+    name = payload.get("name")
+    arguments = payload.get("arguments") or {}
+    if not isinstance(arguments, dict):
+        return {"success": False, "error": "arguments must be an object", "exit_code": 2}
+    classification = classify_tool(name) if name else "unknown"
+    if classification != "write":
+        return {
+            "success": False,
+            "error": f"daemon mcp_tool only accepts write tools; {name!r} is {classification}",
+            "exit_code": 2,
+        }
+    from .mcp_server import TOOLS
+
+    if name not in TOOLS:
+        return {"success": False, "error": f"unknown MCP tool: {name}", "exit_code": 2}
+    result = TOOLS[name]["handler"](**arguments)
+    if isinstance(result, dict):
+        # Several write tools signal failure with a bare {"error": ...} and no
+        # explicit success flag (e.g. tool_create_tunnel / tool_delete_tunnel
+        # validation paths). Infer failure from the "error" key so the daemon
+        # does not persist a failed write as succeeded with exit_code 0.
+        if "success" not in result:
+            result["success"] = "error" not in result
+        result.setdefault("exit_code", 0 if result.get("success") else 1)
+        return result
+    return {"success": True, "value": result, "exit_code": 0}
+
+
+def print_job_result(result: dict[str, Any]) -> int:
+    """Replay captured daemon job output and return the intended process exit code."""
+    stdout = result.get("stdout")
+    stderr = result.get("stderr")
+    if stdout:
+        print(stdout, end="")
+    if stderr:
+        print(stderr, end="", file=sys.stderr)
+    if not result.get("success", True) and result.get("error") and not stderr:
+        print(f"mempalace: {result['error']}", file=sys.stderr)
+    return int(result.get("exit_code", 0 if result.get("success", True) else 1) or 0)
diff --git a/mempalace/version.py b/mempalace/version.py
index 36716249c..cbc4bd01d 100644
--- a/mempalace/version.py
+++ b/mempalace/version.py
@@ -1,3 +1,3 @@
 """Single source of truth for the MemPalace package version."""
 
-__version__ = "3.4.1"
+__version__ = "3.5.0"
diff --git a/mempalace/wal.py b/mempalace/wal.py
new file mode 100644
index 000000000..bdeb2a56d
--- /dev/null
+++ b/mempalace/wal.py
@@ -0,0 +1,97 @@
+"""Side-effect-free write-ahead log for MemPalace write operations.
+
+This lives in its own module so callers that only need WAL audit logging — the
+CLI ``sync`` path and the daemon's ``service`` layer — can obtain ``_wal_log``
+without importing :mod:`mempalace.mcp_server`. Importing ``mcp_server`` runs its
+module-level stdio protection (``os.dup2(2, 1)`` and ``sys.stdout = sys.stderr``,
+required so the MCP stdio JSON stream isn't corrupted by C-level library
+banners). In a non-MCP process — e.g. the daemon worker or ``mempalace sync`` —
+that redirect is an unwanted import side effect that misroutes operator output,
+so the WAL machinery is kept here, free of any such side effects.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+_WAL_FILE = Path(os.path.expanduser("~/.mempalace/wal")) / "write_log.jsonl"
+_WAL_INITIALIZED_DIR = None
+
+# Keys whose values should be redacted in WAL entries to avoid logging sensitive content
+_WAL_REDACT_KEYS = frozenset(
+    {"content", "content_preview", "document", "entry", "entry_preview", "query", "text"}
+)
+
+
+def _ensure_wal() -> None:
+    """Create (and re-harden) the WAL directory lazily, on the first write.
+
+    This must NOT run at import time: a user who removed ``~/.mempalace`` has
+    engaged the documented kill-switch (``hooks_cli._palace_root_exists()``,
+    #1305), and recreating the directory just by importing this module would
+    silently re-arm the autosave/mining hooks they disabled (#1676). Creating
+    it on the first real write keeps the kill-switch contract intact.
+
+    It is deliberately not gated on ``_palace_root_exists()``: by the time a
+    write reaches here the palace is already being recreated by the ChromaDB/KG
+    layer regardless, so gating would only drop audit records, not prevent
+    recreation. Runtime kill-switch enforcement for MCP writes is the broader
+    question tracked in #504.
+
+    Hardening is attempted once per directory and the path cached in
+    ``_WAL_INITIALIZED_DIR`` regardless of outcome (keyed on the path, so a
+    test repointing ``_WAL_FILE`` re-initialises), so a persistent failure on a
+    restricted filesystem does not retry on every write. ``mkdir`` runs only
+    when the initial ``chmod`` raises ``FileNotFoundError`` (EAFP). The parent
+    ``~/.mempalace`` keeps its umask mode, like the other palace directories;
+    the WAL file is created atomically with mode 0o600 by ``_wal_log``.
+    """
+    global _WAL_INITIALIZED_DIR
+    wal_dir = _WAL_FILE.parent
+    if _WAL_INITIALIZED_DIR == wal_dir:
+        return
+    try:
+        wal_dir.chmod(0o700)
+    except FileNotFoundError:
+        try:
+            wal_dir.mkdir(parents=True, exist_ok=True)
+            wal_dir.chmod(0o700)
+        except (OSError, NotImplementedError):
+            pass
+    except (OSError, NotImplementedError):
+        pass
+    # Cache regardless of outcome: one attempt per directory, so a persistent
+    # chmod/mkdir failure (restricted FS) is not retried on every write.
+    _WAL_INITIALIZED_DIR = wal_dir
+
+
+def _wal_log(operation: str, params: dict, result: dict = None):
+    """Append a write operation to the write-ahead log."""
+    # Redact sensitive content from params before logging
+    safe_params = {}
+    for k, v in params.items():
+        if k in _WAL_REDACT_KEYS:
+            safe_params[k] = f"[REDACTED {len(v)} chars]" if isinstance(v, str) else "[REDACTED]"
+        else:
+            safe_params[k] = v
+    entry = {
+        "timestamp": datetime.now().isoformat(),
+        "operation": operation,
+        "params": safe_params,
+        "result": result,
+    }
+    try:
+        # Dir setup shares the append's exception handler below: any WAL
+        # failure is logged and non-fatal, never crashing the tool call.
+        _ensure_wal()
+        fd = os.open(str(_WAL_FILE), os.O_WRONLY | os.O_APPEND | os.O_CREAT, 0o600)
+        with os.fdopen(fd, "a", encoding="utf-8") as f:
+            f.write(json.dumps(entry, default=str) + "\n")
+    except Exception as e:
+        logger.error(f"WAL write failed: {e}")
diff --git a/pyproject.toml b/pyproject.toml
index 4d27ed500..a0c81c2d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mempalace"
-version = "3.4.1"
+version = "3.5.0"
 description = "Give your AI a memory — mine projects and conversations into a searchable palace. No API key required."
 readme = "README.md"
 requires-python = ">=3.9"
@@ -74,7 +74,11 @@ sqlite_exact = "mempalace.backends.sqlite_exact:SQLiteExactBackend"
 dev = [
     "pytest>=7.0",
     "pytest-cov>=4.0",
-    "ruff==0.15.15",
+    # Retries known-transient ChromaDB-on-Windows HNSW compaction failures in CI
+    # (wired only into the test-windows job in ci.yml, scoped to the specific
+    # error). Local/Linux/macOS runs never rerun, so real failures stay loud.
+    "pytest-rerunfailures>=12.0",
+    "ruff==0.15.18",
     "psutil>=5.9",
     # Property-based testing — generates hundreds of random inputs per
     # test to find counterexamples the hand-written positive tests miss.
@@ -131,7 +135,11 @@ extract = [
 dev = [
     "pytest>=7.0",
     "pytest-cov>=4.0",
-    "ruff==0.15.15",
+    # Retries known-transient ChromaDB-on-Windows HNSW compaction failures in CI
+    # (wired only into the test-windows job in ci.yml, scoped to the specific
+    # error). Local/Linux/macOS runs never rerun, so real failures stay loud.
+    "pytest-rerunfailures>=12.0",
+    "ruff==0.15.18",
     "psutil>=5.9",
     "hypothesis>=6.0",
     "pre-commit>=3.0",
diff --git a/skills/mempalace-recall/SKILL.md b/skills/mempalace-recall/SKILL.md
index ee8cbf458..ae354f538 100644
--- a/skills/mempalace-recall/SKILL.md
+++ b/skills/mempalace-recall/SKILL.md
@@ -90,6 +90,20 @@ question — not a system prompt or pasted conversation) plus optional
 - **MCP error / server down.** Surface the error and suggest the user
   run `mempalace status` or re-run `/mempalace-init`. Never fall back to
   guessing.
+- **Palace index corrupt / compactor error.** If the server reports an
+  HNSW segment-writer error, a ChromaDB compaction failure, or stays
+  "Not connected" after a write, the vector index is out of sync with
+  `chroma.sqlite3` while the drawer rows remain intact. Tell the user to
+  stop the server and rebuild from SQLite — do not re-mine, which drops
+  MCP-added drawers and diary entries (#1843):
+
+  ```bash
+  mempalace repair --mode from-sqlite --archive-existing --yes
+  mempalace repair-status
+  ```
+
+  Do not attempt an in-process repair from the agent. Full steps are in
+  the shared protocol's "Recovering a corrupt index" section.
 - **Conflicting facts.** Trust the knowledge graph's time-valid answer;
   invalidate-then-add rather than overwriting silently.
 
diff --git a/skills/mempalace/SKILL.md b/skills/mempalace/SKILL.md
index b318af014..22f5a646f 100644
--- a/skills/mempalace/SKILL.md
+++ b/skills/mempalace/SKILL.md
@@ -42,6 +42,6 @@ search-before-answer so the agent reads the palace instead of guessing.
 
 ## Cursor-specific notes
 
-- The `mempalace-mcp` server is auto-registered by this plugin. Once installed, all 33 MemPalace MCP tools (`mempalace_search`, `mempalace_add_drawer`, `mempalace_diary_write`, `mempalace_check_duplicate`, `mempalace_diary_read`, etc.) are available to the agent without any further configuration.
+- The `mempalace-mcp` server is auto-registered by this plugin. Once installed, all 34 MemPalace MCP tools (`mempalace_search`, `mempalace_add_drawer`, `mempalace_diary_write`, `mempalace_check_duplicate`, `mempalace_diary_read`, etc.) are available to the agent without any further configuration.
 - For automatic background saving every N agent turns plus session-start memory recall, also install the Cursor hooks separately by running `hooks/cursor/install.sh --scope user` from a cloned MemPalace repo. See [`website/guide/cursor-hooks.md`](../../website/guide/cursor-hooks.md) for the full walkthrough.
 - The recommended `agent_name` when calling `mempalace_diary_write` from a Cursor session is `cursor-ide` (matches the precedent of `claude-code` and `codex`).
diff --git a/tests/test_claude_plugin_hook_config.py b/tests/test_claude_plugin_hook_config.py
index d995ac306..367cae3fb 100644
--- a/tests/test_claude_plugin_hook_config.py
+++ b/tests/test_claude_plugin_hook_config.py
@@ -19,8 +19,14 @@
 # timeout of 60s in mempalace/hooks_cli.py. The hook-level floor of 60
 # keeps the inner bound from being truncated, and the ceiling of 90
 # bounds the worst case at ~30s above that.
+# SessionEnd backgrounds all of its work in the shell wrapper — the foreground
+# only forks the detached child and returns in milliseconds — so its timeout is
+# a generous backstop on a near-instant operation, not a synchronous-work bound
+# like Stop/PreCompact. A bound is still required (#1465) so a wedged fork can
+# never fall back to the 600s command default.
 EVENT_TIMEOUT_BOUNDS: dict[str, tuple[int, int]] = {
     "Stop": (10, 30),
+    "SessionEnd": (5, 30),
     "PreCompact": (60, 90),
 }
 
@@ -86,3 +92,22 @@ def test_no_unbounded_events_in_plugin_config(hook_config: dict) -> None:
         "Add a (floor, ceiling) entry to EVENT_TIMEOUT_BOUNDS in this test "
         "after deciding the worst-case freeze the event can tolerate."
     )
+
+
+def test_session_end_hook_uses_background_wrapper(hook_config: dict) -> None:
+    """Claude SessionEnd should use the backgrounding wrapper, not PreCompact."""
+    events = hook_config.get("hooks", {})
+
+    assert "SessionEnd" in events
+    assert "PreCompact" in events
+    assert events["SessionEnd"] != events["PreCompact"]
+
+    commands = [
+        hook["command"]
+        for entry in events["SessionEnd"]
+        for hook in entry.get("hooks", [])
+        if hook.get("type") == "command"
+    ]
+
+    assert any("mempal-session-end-hook.sh" in command for command in commands)
+    assert not any("mempal-precompact-hook.sh" in command for command in commands)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index d8977dc9b..827de3a2f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -17,6 +17,7 @@
     cmd_hook,
     cmd_init,
     cmd_instructions,
+    cmd_daemon,
     cmd_mine,
     cmd_repair,
     cmd_search,
@@ -166,6 +167,13 @@ def test_cmd_hook_calls_run_hook():
         mock_run.assert_called_once_with(hook_name="session-start", harness="claude-code")
 
 
+def test_cmd_hook_session_end_calls_run_hook():
+    args = argparse.Namespace(hook="session-end", harness="claude-code")
+    with patch("mempalace.hooks_cli.run_hook") as mock_run:
+        cmd_hook(args)
+        mock_run.assert_called_once_with(hook_name="session-end", harness="claude-code")
+
+
 # ── cmd_init ───────────────────────────────────────────────────────────
 
 
@@ -621,6 +629,64 @@ def test_cmd_mine_include_ignored_comma_split(mock_config_cls):
         assert call_kwargs["include_ignored"] == ["a.txt", "b.txt", "c.txt"]
 
 
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_mine_daemon_background_submits_job(mock_config_cls, capsys):
+    mock_config_cls.return_value.palace_path = "/fake/palace"
+    args = argparse.Namespace(
+        dir="/src",
+        palace=None,
+        mode="projects",
+        wing=None,
+        agent="mempalace",
+        limit=0,
+        dry_run=False,
+        no_gitignore=False,
+        include_ignored=["a.txt,b.txt"],
+        extract="exchange",
+        daemon=True,
+        background=True,
+        backend=None,
+        global_backend=None,
+        max_chunks_per_file=None,
+        redetect_origin=False,
+    )
+    with patch("mempalace.daemon.submit_job", return_value={"id": "job-1"}) as mock_submit:
+        with patch("mempalace.miner.mine") as mock_mine:
+            cmd_mine(args)
+
+    mock_mine.assert_not_called()
+    mock_submit.assert_called_once()
+    call_kwargs = mock_submit.call_args.kwargs
+    assert call_kwargs["palace_path"] == "/fake/palace"
+    assert call_kwargs["wait"] is False
+    payload = mock_submit.call_args.args[1]
+    assert payload["include_ignored"] == ["a.txt", "b.txt"]
+    assert "job-1" in capsys.readouterr().out
+
+
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_mine_background_requires_daemon(mock_config_cls, capsys):
+    mock_config_cls.return_value.palace_path = "/fake/palace"
+    args = argparse.Namespace(
+        dir="/src",
+        palace=None,
+        mode="projects",
+        wing=None,
+        agent="mempalace",
+        limit=0,
+        dry_run=False,
+        no_gitignore=False,
+        include_ignored=[],
+        extract="exchange",
+        daemon=False,
+        background=True,
+    )
+    with pytest.raises(SystemExit) as excinfo:
+        cmd_mine(args)
+    assert excinfo.value.code == 2
+    assert "--background requires --daemon" in capsys.readouterr().err
+
+
 @patch("mempalace.cli.MempalaceConfig")
 def test_cmd_mine_exits_nonzero_on_lock_holder(mock_config_cls, capsys):
     """Regression #1264: lock contention must exit non-zero with a clear message.
@@ -872,6 +938,18 @@ def test_main_hook_run_dispatches():
         mock_cmd.assert_called_once()
 
 
+def test_main_hook_run_dispatches_session_end():
+    with (
+        patch(
+            "sys.argv",
+            ["mempalace", "hook", "run", "--hook", "session-end", "--harness", "claude-code"],
+        ),
+        patch("mempalace.cli.cmd_hook") as mock_cmd,
+    ):
+        main()
+        mock_cmd.assert_called_once()
+
+
 def test_main_instructions_no_subcommand_prints_help(capsys):
     with patch("sys.argv", ["mempalace", "instructions"]):
         main()
@@ -957,6 +1035,35 @@ def test_cmd_repair_error_reading(mock_config_cls, tmp_path, capsys):
     assert "Error reading palace" in out
 
 
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_repair_error_reading_points_to_from_sqlite_not_remine(
+    mock_config_cls, tmp_path, capsys
+):
+    """When the drawer-index read fails (the chromadb HNSW compactor cannot
+    apply WAL logs to the segment), legacy repair must point the user at
+    ``repair --mode from-sqlite`` — the rows are intact in chroma.sqlite3 —
+    and must NOT advise re-mining from source files, which silently drops
+    drawers added via the MCP server and diary entries (#1843)."""
+    palace_dir = tmp_path / "palace"
+    palace_dir.mkdir()
+    sqlite3.connect(str(palace_dir / "chroma.sqlite3")).close()
+    mock_config_cls.return_value.palace_path = str(palace_dir)
+    mock_config_cls.return_value.collection_name = "mempalace_drawers"
+    args = argparse.Namespace(palace=None)
+    mock_col = MagicMock()
+    mock_col.count.side_effect = Exception(
+        "Error executing plan: Error sending backfill request to compactor: "
+        "Failed to apply logs to the hnsw segment writer"
+    )
+    mock_backend = MagicMock()
+    mock_backend.get_collection.return_value = mock_col
+    with patch("mempalace.backends.chroma.ChromaBackend", return_value=mock_backend):
+        cmd_repair(args)
+    out = capsys.readouterr().out
+    assert "mempalace repair --mode from-sqlite --archive-existing" in out
+    assert "may need to be re-mined" not in out
+
+
 @patch("mempalace.cli.MempalaceConfig")
 def test_cmd_repair_zero_drawers(mock_config_cls, tmp_path, capsys):
     palace_dir = tmp_path / "palace"
@@ -1260,6 +1367,94 @@ def test_cmd_sync_palace_dir_no_db(mock_config_cls, tmp_path, capsys):
     assert list(tmp_path.iterdir()) == []
 
 
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_sync_daemon_background_submits_job(mock_config_cls, capsys):
+    from mempalace.cli import cmd_sync
+
+    mock_config_cls.return_value.palace_path = "/fake/palace"
+    args = argparse.Namespace(
+        palace=None,
+        dir="/project",
+        root=["/extra"],
+        wing="wing_a",
+        dry_run=False,
+        daemon=True,
+        background=True,
+        backend=None,
+        global_backend=None,
+    )
+    with patch("mempalace.daemon.submit_job", return_value={"id": "sync-job"}) as mock_submit:
+        cmd_sync(args)
+
+    mock_submit.assert_called_once()
+    assert mock_submit.call_args.args[0] == "sync"
+    payload = mock_submit.call_args.args[1]
+    assert payload == {"dir": "/project", "root": ["/extra"], "wing": "wing_a", "dry_run": False}
+    assert mock_submit.call_args.kwargs["wait"] is False
+    assert "sync-job" in capsys.readouterr().out
+
+
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_daemon_jobs_reads_durable_queue_when_stopped(
+    mock_config_cls, tmp_path, monkeypatch, capsys
+):
+    from mempalace.daemon import QueueStore, queue_path
+
+    palace_dir = tmp_path / "palace"
+    state_root = tmp_path / "state"
+    palace_dir.mkdir()
+    monkeypatch.setenv("MEMPALACE_DAEMON_STATE_ROOT", str(state_root))
+    mock_config_cls.return_value.palace_path = str(palace_dir)
+    job = QueueStore(queue_path(str(palace_dir))).enqueue("mine", {"source": "/src"})
+
+    args = argparse.Namespace(
+        palace=None,
+        backend=None,
+        global_backend=None,
+        daemon_action="jobs",
+        limit=20,
+    )
+    with patch("mempalace.daemon.get_client_if_running", return_value=None):
+        cmd_daemon(args)
+
+    out = capsys.readouterr().out
+    assert job.id in out
+    assert "queued" in out
+    assert "mine" in out
+
+
+@patch("mempalace.cli.MempalaceConfig")
+def test_cmd_daemon_wait_reads_finished_job_when_stopped(
+    mock_config_cls, tmp_path, monkeypatch, capsys
+):
+    from mempalace.daemon import QueueStore, queue_path
+
+    palace_dir = tmp_path / "palace"
+    state_root = tmp_path / "state"
+    palace_dir.mkdir()
+    monkeypatch.setenv("MEMPALACE_DAEMON_STATE_ROOT", str(state_root))
+    mock_config_cls.return_value.palace_path = str(palace_dir)
+    store = QueueStore(queue_path(str(palace_dir)))
+    queued = store.enqueue("mine", {"source": "/src"})
+    store.finish(
+        queued.id,
+        state="succeeded",
+        result={"success": True, "stdout": "done\n", "exit_code": 0},
+    )
+
+    args = argparse.Namespace(
+        palace=None,
+        backend=None,
+        global_backend=None,
+        daemon_action="wait",
+        job_id=queued.id,
+    )
+    with patch("mempalace.daemon.get_client_if_running", return_value=None):
+        cmd_daemon(args)
+
+    assert "done" in capsys.readouterr().out
+
+
 @patch("mempalace.cli.MempalaceConfig")
 def test_cmd_compress_no_palace(mock_config_cls, tmp_path, capsys):
     """cmd_compress exits non-zero with a 'No palace found' message on a missing dir.
diff --git a/tests/test_config.py b/tests/test_config.py
index 06ecba162..acf818c5d 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,5 +1,6 @@
 import os
 import json
+import sqlite3
 import tempfile
 
 import pytest
@@ -10,6 +11,7 @@
     sanitize_iso_temporal,
     sanitize_kg_value,
     sanitize_name,
+    sqlite_read_uri,
 )
 
 
@@ -100,6 +102,79 @@ def test_embedding_device_env_overrides_config(tmp_path, monkeypatch):
     assert cfg.embedding_device == "coreml"
 
 
+def test_embedding_threads_defaults_to_half_cpus(monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_THREADS", raising=False)
+    monkeypatch.setattr("os.cpu_count", lambda: 10)
+    cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+    # unset / "auto" → half the logical CPUs so a background mine stays tame
+    assert cfg.embedding_threads == 5
+
+
+def test_embedding_threads_auto_keyword(tmp_path, monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_THREADS", raising=False)
+    monkeypatch.setattr("os.cpu_count", lambda: 8)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_threads": "auto"}, f)
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_threads == 4
+
+
+def test_embedding_threads_positive_value_from_config(tmp_path, monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_THREADS", raising=False)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_threads": 3}, f)
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_threads == 3
+
+
+def test_embedding_threads_zero_means_uncapped(tmp_path, monkeypatch):
+    monkeypatch.delenv("MEMPALACE_EMBEDDING_THREADS", raising=False)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_threads": 0}, f)
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_threads == 0
+
+
+def test_embedding_threads_env_overrides_config(tmp_path, monkeypatch):
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"embedding_threads": 2}, f)
+    monkeypatch.setenv("MEMPALACE_EMBEDDING_THREADS", "6")
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_threads == 6
+
+
+def test_embedding_threads_invalid_falls_back_to_auto(tmp_path, monkeypatch):
+    monkeypatch.setattr("os.cpu_count", lambda: 4)
+    monkeypatch.setenv("MEMPALACE_EMBEDDING_THREADS", "not-a-number")
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.embedding_threads == 2
+
+
+def test_sqlite_read_uri_opens_path_with_spaces(tmp_path):
+    """sqlite_read_uri must open a read-only DB whose path contains spaces,
+    which a bare f"file:{path}?mode=ro" mis-parses (especially on Windows)."""
+    db_dir = tmp_path / "palace with spaces"
+    db_dir.mkdir()
+    db_path = db_dir / "chroma.sqlite3"
+    setup = sqlite3.connect(str(db_path))
+    setup.execute("CREATE TABLE t (x INTEGER)")
+    setup.execute("INSERT INTO t VALUES (42)")
+    setup.commit()
+    setup.close()
+
+    uri = sqlite_read_uri(str(db_path))
+    assert "%20" in uri  # the space is percent-encoded, not left raw
+
+    conn = sqlite3.connect(uri, uri=True)
+    try:
+        assert conn.execute("SELECT x FROM t").fetchone()[0] == 42
+        # mode=ro is still honored through the encoded URI
+        with pytest.raises(sqlite3.OperationalError):
+            conn.execute("INSERT INTO t VALUES (1)")
+    finally:
+        conn.close()
+
+
 def test_env_override():
     raw = "/env/palace"
     os.environ["MEMPALACE_PALACE_PATH"] = raw
@@ -690,6 +765,36 @@ def test_hooks_auto_save_env_override_true():
         del os.environ["MEMPALACE_HOOKS_AUTO_SAVE"]
 
 
+def test_hook_use_daemon_default_false(monkeypatch):
+    monkeypatch.delenv("MEMPALACE_HOOKS_DAEMON", raising=False)
+    cfg = MempalaceConfig(config_dir=tempfile.mkdtemp())
+    assert cfg.hook_use_daemon is False
+
+
+def test_hook_use_daemon_from_config(monkeypatch, tmp_path):
+    monkeypatch.delenv("MEMPALACE_HOOKS_DAEMON", raising=False)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"hooks": {"daemon": True}}, f)
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.hook_use_daemon is True
+
+
+def test_hook_use_daemon_string_config(monkeypatch, tmp_path):
+    monkeypatch.delenv("MEMPALACE_HOOKS_DAEMON", raising=False)
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"hooks": {"daemon": "yes"}}, f)
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.hook_use_daemon is True
+
+
+def test_hook_use_daemon_env_override(monkeypatch, tmp_path):
+    with open(tmp_path / "config.json", "w") as f:
+        json.dump({"hooks": {"daemon": False}}, f)
+    monkeypatch.setenv("MEMPALACE_HOOKS_DAEMON", "yes")
+    cfg = MempalaceConfig(config_dir=str(tmp_path))
+    assert cfg.hook_use_daemon is True
+
+
 # --- max_backups (backup retention) ---
 
 
diff --git a/tests/test_cursor_hooks_shell.py b/tests/test_cursor_hooks_shell.py
index 557e4e426..cf7537e36 100644
--- a/tests/test_cursor_hooks_shell.py
+++ b/tests/test_cursor_hooks_shell.py
@@ -349,11 +349,10 @@ def test_threshold_emits_followup_message(self, tmp_path):
             f"third invocation must emit a followup_message; got {response!r}"
         )
         msg = response["followup_message"]
-        # Followup must reference the real MCP tool names (regression
-        # guard against future typos that would silently fail).
-        assert "mempalace_add_drawer" in msg
-        assert "mempalace_check_duplicate" in msg
-        assert "mempalace_diary_write" in msg
+        # Followup must reference the real MCP tool name (regression
+        # guard against future typos that would silently fail). The save
+        # is driven by a single batch checkpoint call.
+        assert "mempalace_checkpoint" in msg
         assert "cursor-ide" in msg, "diary entries must be tagged agent_name=cursor-ide"
 
     def test_threshold_followup_references_inferred_wing(self, tmp_path):
diff --git a/tests/test_daemon.py b/tests/test_daemon.py
new file mode 100644
index 000000000..e3868a0fa
--- /dev/null
+++ b/tests/test_daemon.py
@@ -0,0 +1,857 @@
+import os
+import threading
+import time
+
+import pytest
+
+from mempalace import daemon
+from mempalace import service
+
+# POSIX file-mode bits (0600/0700) are not representable on Windows: os.chmod
+# can only toggle the read-only attribute, so a "private" file still reports
+# 0o666. The daemon relies on the user-profile directory ACLs for privacy
+# there, so the owner-only assertions only make sense on POSIX.
+_posix_only_perms = pytest.mark.skipif(
+    os.name == "nt",
+    reason="POSIX 0600/0700 file-mode bits are not representable on Windows (ACL-based privacy)",
+)
+
+# Env keys run_server mutates from its background thread, plus umask. If a
+# lifecycle test times out before the server comes up, run_server's finally
+# never runs and those mutations leak into the rest of the suite — every later
+# test that reads MempalaceConfig().palace_path sees a stale deleted tmp path and
+# fails (the 60+ test cascade seen on slow CI runners). The fixtures below force a
+# clean baseline around every daemon test so a leaked thread can't poison the
+# process for tests/test_mcp_server.py and friends (which have no such guard).
+_LEAK_ENV_KEYS = ("MEMPALACE_PALACE_PATH", "MEMPALACE_BACKEND", "MEMPALACE_BACKEND_EXPLICIT")
+
+
+@pytest.fixture(scope="module")
+def _clean_env_snapshot():
+    """Capture the true pre-suite values once, before any daemon test runs."""
+    return {key: os.environ.get(key) for key in _LEAK_ENV_KEYS}
+
+
+@pytest.fixture(autouse=True)
+def _isolate_process_global_state(_clean_env_snapshot):
+    """Restore the process-global env + umask to the pre-suite baseline after every
+    daemon test, even if a leaked run_server thread is still holding them mutated.
+    """
+    prev_umask = os.umask(0o022)
+    os.umask(prev_umask)  # read current umask without changing it
+    yield
+    for key, value in _clean_env_snapshot.items():
+        if value is None:
+            os.environ.pop(key, None)
+        else:
+            os.environ[key] = value
+    os.umask(prev_umask)
+
+
+def _raise_not_ready(*a, **kw):
+    """Stand-in for DaemonClient when the spawned daemon must never come up."""
+    raise daemon.DaemonError("not ready")
+
+
+def test_prune_terminal_drops_old_terminal_jobs_keeps_active(tmp_path, monkeypatch):
+    """Terminal jobs older than the retention window are pruned; queued/running
+    and fresh terminal jobs are untouched. Bounded queue growth for the DB that
+    holds verbatim payloads."""
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+
+    old_term = store.enqueue("mine", {"source": "old"})
+    store.finish(old_term.id, state="succeeded", result={"success": True})
+    fresh_term = store.enqueue("mine", {"source": "fresh"})
+    store.finish(fresh_term.id, state="succeeded", result={"success": True})
+    queued = store.enqueue("mine", {"source": "queued"})
+
+    from datetime import datetime, timedelta, timezone
+
+    cutoff = (datetime.now(timezone.utc) - timedelta(days=30)).isoformat()
+    with store._lock, store._connect() as conn:
+        conn.execute(
+            "UPDATE jobs SET finished_at = ? WHERE id = ?",
+            (cutoff, old_term.id),
+        )
+
+    pruned = store.prune_terminal(older_than_days=7)
+    assert pruned == 1
+    # The old terminal job is gone; the fresh terminal and queued jobs survive.
+    with pytest.raises(daemon.DaemonError):
+        store.get(old_term.id)
+    assert store.get(fresh_term.id).state == "succeeded"
+    assert store.get(queued.id).state == "queued"
+
+
+def test_queue_dedupes_and_recovers_running_jobs(tmp_path, monkeypatch):
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+    first = store.enqueue("mine", {"source": "a"}, dedupe_key="same")
+    second = store.enqueue("mine", {"source": "a"}, dedupe_key="same")
+
+    assert second.id == first.id
+
+    claimed = store.claim_next()
+    assert claimed.id == first.id
+    assert claimed.state == "running"
+
+    recovered = store.recover_running()
+    assert recovered == 1
+    assert store.get(first.id).state == "queued"
+
+
+def test_daemon_http_lifecycle_executes_job(tmp_path, monkeypatch):
+    calls = []
+
+    def fake_execute(kind, payload):
+        calls.append((kind, payload))
+        return {"success": True, "exit_code": 0, "stdout": "done\n"}
+
+    client, thread, palace, holders = _start_server(tmp_path, monkeypatch, fake_execute)
+
+    health = client.health()
+    assert health["ok"] is True
+    assert health["palace_path"] == daemon.canonical_palace_path(str(palace))
+
+    job = client.submit("mine", {"source": "src"}, dedupe_key="job")
+    finished = client.wait(job["id"], timeout=5)
+
+    assert finished["state"] == "succeeded"
+    assert finished["result"]["stdout"] == "done\n"
+    assert calls == [("mine", {"source": "src", "palace_path": str(palace.resolve())})]
+
+    _stop_server(client, thread, holders)
+
+
+def test_submit_job_uses_client_and_waits(monkeypatch, tmp_path):
+    palace = tmp_path / "palace"
+    palace.mkdir()
+
+    class DummyClient:
+        def __init__(self):
+            self.submitted = None
+
+        def submit(self, kind, payload, dedupe_key=None, priority=0):
+            self.submitted = (kind, payload, dedupe_key, priority)
+            return {"id": "job-1", "state": "queued"}
+
+        def wait(self, job_id, timeout=daemon.DEFAULT_WAIT_TIMEOUT):
+            assert job_id == "job-1"
+            return {
+                "id": "job-1",
+                "state": "succeeded",
+                "result": {"success": True, "exit_code": 0},
+            }
+
+    dummy = DummyClient()
+    monkeypatch.setattr(daemon, "ensure_client", lambda *a, **kw: dummy)
+
+    job = daemon.submit_job(
+        "mine",
+        {"source": "src"},
+        palace_path=str(palace),
+        dedupe_key="dedupe",
+        wait=True,
+    )
+
+    assert job["state"] == "succeeded"
+    assert dummy.submitted[0] == "mine"
+    # palace_path is overridden (not trusted from the payload), never appended.
+    assert dummy.submitted[1]["palace_path"] == daemon.canonical_palace_path(str(palace))
+    assert dummy.submitted[2] == "dedupe"
+
+
+def test_service_tool_classification():
+    assert service.classify_tool("mempalace_search") == "read"
+    assert service.classify_tool("mempalace_add_drawer") == "write"
+    assert service.classify_tool("mempalace_checkpoint") == "write"
+    assert service.classify_tool("mempalace_mine") == "maintenance"
+    assert service.classify_tool("unknown") == "unknown"
+
+
+# --- helpers for HTTP-lifecycle tests ---
+
+
+def _capture_httpd(monkeypatch):
+    """Capture the httpd instance run_server creates.
+
+    run_server defines a local ``class _Server(ThreadingHTTPServer)``; by
+    monkeypatching ``daemon.ThreadingHTTPServer`` before run_server runs, that
+    subclass inherits from a capturing base that records each instance. The
+    httpd can then be force-stopped from the test thread (see _stop_server) so a
+    slow/failed ``client.shutdown()`` POST can never leave the server thread
+    alive — which on Windows hangs the interpreter at process exit on an open
+    listening socket.
+    """
+    holders: list = []
+    base = daemon.ThreadingHTTPServer
+
+    class _CapturingServer(base):
+        def __init__(self, *args, **kwargs):
+            super().__init__(*args, **kwargs)
+            holders.append(self)
+
+    monkeypatch.setattr(daemon, "ThreadingHTTPServer", _CapturingServer)
+    return holders
+
+
+def _stop_server(client, thread, holders, *, join_timeout=5.0):
+    """Shut the daemon down deterministically and assert the thread died.
+
+    First try the normal path (POST /shutdown). If the server thread is still
+    alive afterwards — the POST was slow, lost, or the drain overran the join —
+    call httpd.shutdown() directly from this thread (stdlib-safe: it is a
+    different thread than serve_forever) to force serve_forever to return, then
+    re-join. The assert turns a leak into a visible failure instead of a silent
+    interpreter-exit hang.
+    """
+    try:
+        client.shutdown()
+    except Exception:  # noqa: BLE001 - best-effort; we force-shutdown below
+        pass
+    thread.join(timeout=join_timeout)
+    if thread.is_alive() and holders:
+        httpd = holders[-1]
+        try:
+            httpd.shutdown()
+        except Exception:  # noqa: BLE001
+            pass
+        try:
+            httpd.server_close()
+        except Exception:  # noqa: BLE001
+            pass
+        thread.join(timeout=join_timeout)
+    assert not thread.is_alive(), "daemon server thread did not shut down"
+
+
+def _start_server(tmp_path, monkeypatch, execute_fn):
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    monkeypatch.setattr(service, "execute_job", execute_fn)
+    holders = _capture_httpd(monkeypatch)
+
+    # Capture any exception run_server raises in its thread. Without this a
+    # startup crash is invisible: the poll below would just spin for 30s and
+    # fail with a bare ``assert client is not None`` giving no cause.
+    server_error: list = []
+
+    def _serve():
+        try:
+            daemon.run_server(palace_path=str(palace), port=0)
+        except BaseException as exc:  # noqa: BLE001 - re-surfaced to the test thread
+            server_error.append(exc)
+
+    thread = threading.Thread(target=_serve, name="test-daemon-server", daemon=True)
+    thread.start()
+    client = None
+    deadline = time.monotonic() + 30
+    while time.monotonic() < deadline:
+        if server_error:
+            raise AssertionError(f"run_server crashed during startup: {server_error[0]!r}")
+        client = daemon.get_client_if_running(str(palace))
+        if client is not None:
+            break
+        time.sleep(0.05)
+    if client is None:
+        raise AssertionError(
+            "daemon did not become ready within 30s "
+            f"(thread_alive={thread.is_alive()}, httpd_bound={bool(holders)}, "
+            f"endpoint_exists={daemon.endpoint_path(str(palace)).exists()})"
+        )
+    return client, thread, palace, holders
+
+
+# --- ship-blocker regressions ---
+
+
+def test_systemexit_in_job_does_not_kill_worker(tmp_path, monkeypatch):
+    """A SystemExit (BaseException, not Exception) must be caught, the job
+    marked failed, and the worker kept alive for the next job. Regression for
+    the critical worker-death bug."""
+    state = {"first": True}
+
+    def fake_execute(kind, payload):
+        if state["first"]:
+            state["first"] = False
+            raise SystemExit("boom")
+        return {"success": True, "exit_code": 0}
+
+    client, thread, palace, holders = _start_server(tmp_path, monkeypatch, fake_execute)
+    try:
+        first = client.submit("mine", {"source": "src"})
+        finished_first = client.wait(first["id"], timeout=5)
+        assert finished_first["state"] == "failed"
+        assert finished_first["error"]["error_class"] == "SystemExit"
+
+        # Worker must still be alive — health reports it and a second job runs.
+        assert client.health()["worker_alive"] is True
+        second = client.submit("mine", {"source": "src2"})
+        finished_second = client.wait(second["id"], timeout=5)
+        assert finished_second["state"] == "succeeded"
+    finally:
+        _stop_server(client, thread, holders)
+
+
+def test_shutdown_cancels_active_job(tmp_path, monkeypatch):
+    """POST /shutdown must not leave an in-flight job 'running' for blind
+    re-queue on next start. The worker is drained (bounded), then the active
+    job is marked 'cancelled' so recover_running won't re-run it.
+
+    In production the serve process exits immediately after run_server returns,
+    killing the daemon worker thread before it can overwrite the cancelled
+    state. The test mirrors that by asserting the cancelled state *before*
+    releasing the blocked worker.
+    """
+    block = threading.Event()
+
+    def fake_execute(kind, payload):
+        # Simulate a long-running job that never finishes on its own.
+        block.wait(30)
+        return {"success": True, "exit_code": 0}
+
+    monkeypatch.setattr(daemon, "SHUTDOWN_DRAIN_SECONDS", 0.2)
+    client, thread, palace, holders = _start_server(tmp_path, monkeypatch, fake_execute)
+    job = client.submit("mine", {"source": "src"}, dedupe_key="x")
+    # Wait until the worker has claimed it (state flips to running).
+    deadline = time.monotonic() + 5
+    while time.monotonic() < deadline:
+        if client.get_job(job["id"])["state"] == "running":
+            break
+        time.sleep(0.02)
+    assert client.get_job(job["id"])["state"] == "running"
+
+    _stop_server(client, thread, holders)
+
+    # The interrupted job must be cancelled (terminal), not left running.
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+    final = store.get(job["id"])
+    assert final.state == "cancelled"
+    # And recover_running must not re-queue a cancelled job.
+    assert store.recover_running() == 0
+
+    # Release the blocked worker so it (and the daemon thread) can exit.
+    block.set()
+
+
+def test_recover_running_dead_letters_exhausted_jobs(tmp_path, monkeypatch):
+    """A job that has crashed MAX_ATTEMPTS times must be dead-lettered to
+    'failed', not re-queued — non-idempotent kinds (diary_write) would
+    otherwise duplicate verbatim content on every restart."""
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+    job = store.enqueue("diary_write", {"entry": "x"})
+    # Simulate MAX_ATTEMPTS claims that each crashed (running, attempts=MAX).
+    with store._lock, store._connect() as conn:
+        conn.execute(
+            "UPDATE jobs SET state='running', attempts=? WHERE id=?",
+            (daemon.MAX_ATTEMPTS, job.id),
+        )
+
+    recovered = store.recover_running()
+    assert recovered == 0  # not re-queued
+    final = store.get(job.id)
+    assert final.state == "failed"
+    assert final.attempts == daemon.MAX_ATTEMPTS
+
+
+def test_claim_next_does_not_reclaim_running_job(tmp_path, monkeypatch):
+    """The conditional UPDATE (WHERE state='queued') means a job already
+    flipped to 'running' cannot be claimed again — the cross-process
+    double-execution guard."""
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+    job = store.enqueue("mine", {"source": "src"})
+    first = store.claim_next()
+    assert first.id == job.id
+    # Manually re-mark it queued but leave a second claim attempt: claim_next
+    # should still only ever return one running job per claim. After finishing
+    # the first, the next claim returns None (queue empty).
+    store.finish(first.id, state="succeeded", result={"success": True})
+    assert store.claim_next() is None
+
+
+@_posix_only_perms
+def test_queue_db_file_is_owner_only(tmp_path, monkeypatch):
+    """The queue DB holds verbatim payloads — it must be 0600, not the sqlite
+    default 0644. Regression for the privacy-principle violation."""
+    import os as _os
+
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    store = daemon.QueueStore(daemon.queue_path(str(palace)))
+    store.enqueue("diary_write", {"entry": "secret verbatim content"})
+    mode = _os.stat(str(store.path)).st_mode & 0o777
+    assert mode == 0o600, f"queue.sqlite3 is {oct(mode)}, expected 0600"
+
+
+@_posix_only_perms
+def test_token_file_is_owner_only(tmp_path, monkeypatch):
+    import os as _os
+
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    daemon.ensure_token(str(palace))
+    token_path = daemon.state_dir(str(palace)) / "token"
+    assert (_os.stat(str(token_path)).st_mode & 0o777) == 0o600
+
+
+def test_health_rejects_missing_and_wrong_token(tmp_path, monkeypatch):
+    from urllib import error as urlerror
+    from urllib import request as urlrequest
+
+    client, thread, palace, holders = _start_server(
+        tmp_path, monkeypatch, lambda k, p: {"success": True}
+    )
+    try:
+        base = f"http://127.0.0.1:{client.port}"
+        # No Authorization header → 401.
+        with pytest.raises(urlerror.HTTPError):
+            urlrequest.urlopen(urlrequest.Request(base + "/health"), timeout=3)
+        # Wrong token → 401.
+        with pytest.raises(urlerror.HTTPError):
+            urlrequest.urlopen(
+                urlrequest.Request(base + "/health", headers={"Authorization": "Bearer wrong"}),
+                timeout=3,
+            )
+    finally:
+        _stop_server(client, thread, holders)
+
+
+def test_worker_overrides_client_palace_path(tmp_path, monkeypatch):
+    """An authenticated client must not be able to retarget the daemon at a
+    different palace by stuffing palace_path into the payload."""
+    seen = {}
+
+    def fake_execute(kind, payload):
+        seen["palace_path"] = payload.get("palace_path")
+        return {"success": True, "exit_code": 0}
+
+    client, thread, palace, holders = _start_server(tmp_path, monkeypatch, fake_execute)
+    try:
+        job = client.submit(
+            "mine", {"source": "src", "palace_path": "/tmp/other-palace"}, dedupe_key="p"
+        )
+        client.wait(job["id"], timeout=5)
+    finally:
+        _stop_server(client, thread, holders)
+    assert seen["palace_path"] == daemon.canonical_palace_path(str(palace))
+    assert seen["palace_path"] != "/tmp/other-palace"
+
+
+def test_mcp_tool_allowlist_rejects_non_write_tools(tmp_path, monkeypatch):
+    """The daemon queue is a durable write surface; read/maintenance/unknown
+    tools must be rejected so verbatim content can't be exfiltrated into the
+    queue or retried destructively."""
+    # read tool → rejected
+    out = service.run_mcp_tool({"name": "mempalace_search", "arguments": {}})
+    assert out["success"] is False
+    assert "only accepts write tools" in out["error"]
+    # maintenance tool → rejected (has its own kinds: mine/sync)
+    out = service.run_mcp_tool({"name": "mempalace_mine", "arguments": {}})
+    assert out["success"] is False
+    # unknown tool → rejected
+    out = service.run_mcp_tool({"name": "mempalace_bogus", "arguments": {}})
+    assert out["success"] is False
+    # write tool → passes the allowlist (handler not called here since TOOLS
+    # won't have it under the test name; but classification must let it through)
+    assert service.classify_tool("mempalace_add_drawer") == "write"
+
+
+def test_execute_job_isolates_env_per_job(monkeypatch):
+    """A job that mutates MEMPALACE_BACKEND must not leak into the next job's
+    env. Regression for the per-job isolation bug (_apply_backend poisoning)."""
+    import os as _os
+
+    monkeypatch.delenv("MEMPALACE_BACKEND", raising=False)
+    monkeypatch.delenv("MEMPALACE_PALACE_PATH", raising=False)
+
+    def fake_mine(payload):
+        _os.environ["MEMPALACE_BACKEND"] = "leaked-backend"
+        return {"success": True, "exit_code": 0}
+
+    monkeypatch.setattr(service, "run_mine", fake_mine)
+    service.execute_job("mine", {"palace_path": "/tmp/p", "source": "s"})
+    assert _os.environ.get("MEMPALACE_BACKEND") is None
+
+
+def test_daemon_client_raises_on_endpoint_missing_port(tmp_path, monkeypatch):
+    """A malformed endpoint.json must raise DaemonError, not a bare KeyError."""
+    import json as _json
+
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    daemon.ensure_token(str(palace))
+    # endpoint with no port
+    daemon.state_dir(str(palace)).mkdir(parents=True, exist_ok=True)
+    (daemon.state_dir(str(palace)) / "endpoint.json").write_text(
+        _json.dumps({"host": "127.0.0.1", "pid": 1}) + "\n", encoding="utf-8"
+    )
+
+    with pytest.raises(daemon.DaemonError):
+        daemon.DaemonClient(str(palace))
+
+
+def test_pid_alive_probe_is_signal_free_and_correct():
+    """``_pid_alive`` must be a pure liveness probe.
+
+    On Windows ``os.kill(pid, 0)`` is NOT harmless — signal 0 is
+    ``CTRL_C_EVENT``, so it emits a console Ctrl-C to the target's process
+    group. The daemon client polls a same-process endpoint, so that Ctrl-C was
+    delivered back to the interpreter and surfaced as a spurious
+    ``KeyboardInterrupt`` that hung the whole test session on CI runners (which,
+    unlike a detached dev shell, have an attached console). Assert the probe is
+    both correct and emits no SIGINT even when hammered like the poll loop.
+    """
+    import signal
+
+    assert daemon._pid_alive(os.getpid()) is True
+    assert daemon._pid_alive(0) is False
+    assert daemon._pid_alive(-1) is False
+    # A pid that is almost certainly not running.
+    assert daemon._pid_alive(2_000_000_000) is False
+
+    # pytest runs tests on the main thread, so installing a SIGINT handler is
+    # allowed. If the probe regresses to os.kill(pid, 0) on Windows, the repeated
+    # calls below deliver CTRL_C_EVENT and this handler fires.
+    fired = []
+    previous = signal.getsignal(signal.SIGINT)
+    signal.signal(signal.SIGINT, lambda *_: fired.append(1))
+    try:
+        for _ in range(25):
+            daemon._pid_alive(os.getpid())
+        time.sleep(0.25)
+    finally:
+        signal.signal(signal.SIGINT, previous)
+    assert fired == [], "_pid_alive delivered a console control event (CTRL_C_EVENT)"
+
+
+def test_start_daemon_kills_orphan_on_readiness_timeout(tmp_path, monkeypatch):
+    """If the spawned daemon never becomes ready, start_daemon must kill and
+    reap the orphaned subprocess rather than leaking it with the port/token."""
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    daemon.ensure_token(str(palace))
+
+    monkeypatch.setattr(daemon, "get_client_if_running", lambda *a, **kw: None)
+
+    class FakeProc:
+        def __init__(self):
+            self.killed = False
+            self.returncode = None
+
+        def poll(self):
+            return self.returncode  # None == still alive
+
+        def kill(self):
+            self.killed = True
+
+        def wait(self):
+            self.returncode = -9
+            return self.returncode
+
+    fake = FakeProc()
+
+    def fake_popen(*a, **kw):
+        return fake
+
+    monkeypatch.setattr(daemon.subprocess, "Popen", fake_popen)
+    monkeypatch.setattr(daemon, "DaemonClient", _raise_not_ready)
+    monkeypatch.setattr(daemon.time, "sleep", lambda *a, **kw: None)
+
+    with pytest.raises(daemon.DaemonError):
+        daemon.start_daemon(str(palace), timeout=0.05)
+    assert fake.killed is True
+
+
+# --- service.run_* happy-path coverage ---
+# These close the draft PR's follow-up ("Add focused happy-path tests for
+# service.run_mine / run_diary_write / run_mcp_tool") and, now that the daemon
+# tests complete reliably, keep service.py's coverage above the CI gate. The
+# capsys-using tests come first; the two that import mempalace.mcp_server (which
+# rebinds sys.stdout) come last and do not use capsys, so the rebind can't break
+# capture in this file or later files (capsys activates after the rebind).
+
+
+def test_print_job_result_replays_stdout_stderr_and_returns_exit_code(capsys):
+    from mempalace import service
+
+    code = service.print_job_result(
+        {"success": False, "error": "boom", "stdout": "out\n", "stderr": "err\n", "exit_code": 3}
+    )
+    assert code == 3
+    captured = capsys.readouterr()
+    assert "out" in captured.out
+    assert "err" in captured.err
+
+
+def test_print_job_result_prints_error_to_stderr_when_no_stderr(capsys):
+    from mempalace import service
+
+    code = service.print_job_result({"success": False, "error": "boom", "exit_code": 1})
+    assert code == 1
+    captured = capsys.readouterr()
+    assert "mempalace: boom" in captured.err
+
+
+def test_run_sync_returns_success_when_palace_dir_missing(tmp_path):
+    from mempalace import service
+
+    result = service.run_sync({"palace_path": str(tmp_path / "nope"), "dry_run": True})
+    assert result["success"] is True
+    assert result["exit_code"] == 0
+
+
+def test_run_sync_returns_success_when_palace_has_no_backend_artifact(tmp_path):
+    from mempalace import service
+
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    result = service.run_sync({"palace_path": str(palace), "dry_run": True})
+    assert result["success"] is True
+    assert result["exit_code"] == 0
+
+
+def test_run_mine_invalid_mode_returns_structured_error(tmp_path):
+    from mempalace import service
+
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    out = service.run_mine({"palace_path": str(palace), "mode": "bogus"})
+    assert out["success"] is False
+    assert "invalid mine mode" in out["error"]
+    assert out["exit_code"] == 2
+
+
+def test_run_mcp_tool_rejects_non_dict_arguments():
+    from mempalace import service
+
+    out = service.run_mcp_tool({"name": "mempalace_add_drawer", "arguments": "nope"})
+    assert out["success"] is False
+    assert "must be an object" in out["error"]
+    assert out["exit_code"] == 2
+
+
+def test_run_mcp_tool_dispatches_write_tool(monkeypatch):
+    import mempalace.mcp_server as mcp
+    from mempalace import service
+
+    captured = {}
+
+    def fake_handler(**arguments):
+        captured["arguments"] = arguments
+        return {"success": True, "written": True}
+
+    monkeypatch.setattr(mcp, "TOOLS", {"mempalace_add_drawer": {"handler": fake_handler}})
+    out = service.run_mcp_tool({"name": "mempalace_add_drawer", "arguments": {"x": 1}})
+    assert out["success"] is True
+    assert out["written"] is True
+    assert out["exit_code"] == 0
+    assert captured["arguments"] == {"x": 1}
+
+
+def test_run_diary_write_forwards_args_and_sets_exit_code(monkeypatch):
+    import mempalace.mcp_server as mcp
+    from mempalace import service
+
+    captured = {}
+
+    def fake_diary(agent_name, entry, topic, wing):
+        captured.update(agent_name=agent_name, entry=entry, topic=topic, wing=wing)
+        return {"success": True}
+
+    monkeypatch.setattr(mcp, "tool_diary_write", fake_diary)
+    out = service.run_diary_write(
+        {"agent_name": "alice", "entry": "hello", "topic": "t", "wing": "w"}
+    )
+    assert out["success"] is True
+    assert out["exit_code"] == 0
+    assert captured == {"agent_name": "alice", "entry": "hello", "topic": "t", "wing": "w"}
+
+
+def test_run_mine_applies_backend_before_mode_validation(tmp_path):
+    """Covers _apply_backend (env set + get_backend_class validation) on the daemon
+    path; the invalid mode short-circuits before any mining runs."""
+    from mempalace import service
+
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    out = service.run_mine({"palace_path": str(palace), "mode": "bogus", "backend": "chroma"})
+    assert out["success"] is False
+    assert out["exit_code"] == 2
+
+
+def test_execute_job_dispatches_diary_write_mcp_tool_and_unknown(monkeypatch):
+    """Covers execute_job's kind dispatch for diary_write, mcp_tool, and the
+    unknown-kind fallback."""
+    import mempalace.mcp_server as mcp
+    from mempalace import service
+
+    monkeypatch.setattr(mcp, "tool_diary_write", lambda **kw: {"success": True})
+    monkeypatch.setattr(
+        mcp, "TOOLS", {"mempalace_add_drawer": {"handler": lambda **kw: {"success": True}}}
+    )
+    assert service.execute_job("diary_write", {"entry": "x"})["success"] is True
+    assert (
+        service.execute_job("mcp_tool", {"name": "mempalace_add_drawer", "arguments": {}})[
+            "success"
+        ]
+        is True
+    )
+    unknown = service.execute_job("bogus_kind", {})
+    assert unknown["success"] is False
+    assert unknown["exit_code"] == 2
+
+
+def test_run_sync_structured_errors_on_sync_failures(tmp_path, monkeypatch):
+    """Covers run_sync's three exception handlers (MineAlreadyRunning, ValueError,
+    generic Exception) so a failing sync_palace returns a structured error instead
+    of propagating."""
+    import mempalace.sync as sync_module
+    from mempalace import service
+    from mempalace.palace import MineAlreadyRunning
+
+    palace = tmp_path / "palace"
+    palace.mkdir()
+    (palace / "chroma.sqlite3").touch()
+
+    def _raise(exc):
+        def fn(**kw):
+            raise exc
+
+        return fn
+
+    monkeypatch.setattr(sync_module, "sync_palace", _raise(MineAlreadyRunning("locked")))
+    r = service.run_sync({"palace_path": str(palace), "dry_run": True})
+    assert r["success"] is False
+    assert r["error_class"] == "LockHeldByOtherProcess"
+
+    monkeypatch.setattr(sync_module, "sync_palace", _raise(ValueError("bad scope")))
+    r = service.run_sync({"palace_path": str(palace), "dry_run": True})
+    assert r["success"] is False
+    assert r["exit_code"] == 2
+
+    monkeypatch.setattr(sync_module, "sync_palace", _raise(RuntimeError("boom")))
+    r = service.run_sync({"palace_path": str(palace), "dry_run": True})
+    assert r["success"] is False
+    assert "sync failed" in r["error"]
+
+
+# --- post-merge review follow-ups (Copilot review on #1826) ---
+
+
+@_posix_only_perms
+def test_run_server_tightens_umask_before_building_queue(tmp_path, monkeypatch):
+    """The owner-only umask must be active BEFORE the queue DB is built.
+
+    SQLite's WAL/SHM sidecars hold un-checkpointed verbatim payloads and are
+    created with the process umask, so a loose umask at DaemonRuntime/QueueStore
+    construction time would leave them world-readable. Capture the umask at the
+    moment DaemonRuntime is constructed and assert it is already 0o077.
+    """
+    monkeypatch.setenv(daemon.STATE_ROOT_ENV, str(tmp_path / "state"))
+    palace = tmp_path / "palace"
+    palace.mkdir()
+
+    captured = {}
+
+    def _spy_runtime(*args, **kwargs):
+        current = os.umask(0o022)
+        os.umask(current)  # restore without changing
+        captured["umask"] = current
+        raise RuntimeError("stop before binding a real socket")
+
+    monkeypatch.setattr(daemon, "DaemonRuntime", _spy_runtime)
+    with pytest.raises(RuntimeError):
+        daemon.run_server(str(palace), port=0)
+    assert captured["umask"] == 0o077
+
+
+def test_negative_content_length_is_rejected_without_blocking(tmp_path, monkeypatch):
+    """A POST with Content-Length: -1 must get a prompt 400, not hang the worker.
+
+    rfile.read(-1) would read until the client closes the socket (an auth-gated
+    DoS) and bypass the MAX_BODY_BYTES cap. The recv timeout below turns a
+    regression into a failure instead of a hang.
+    """
+    import socket
+
+    client, thread, palace, holders = _start_server(
+        tmp_path, monkeypatch, lambda k, p: {"success": True}
+    )
+    try:
+        sock = socket.create_connection((client.host, client.port), timeout=5)
+        sock.settimeout(5)
+        request = (
+            "POST /jobs HTTP/1.1\r\n"
+            "Host: daemon\r\n"
+            f"Authorization: Bearer {client.token}\r\n"
+            "Content-Length: -1\r\n"
+            "Connection: close\r\n\r\n"
+        )
+        sock.sendall(request.encode("ascii"))
+        status_line = sock.recv(4096).decode("latin-1").split("\r\n", 1)[0]
+        sock.close()
+        assert "400" in status_line, f"expected 400, got {status_line!r}"
+    finally:
+        _stop_server(client, thread, holders)
+
+
+def test_run_mcp_tool_marks_bare_error_dict_as_failure(monkeypatch):
+    """A write tool that returns {"error": ...} with no success flag must be
+    recorded as a failed job, not succeeded (Copilot review)."""
+    import mempalace.mcp_server as mcp
+    from mempalace import service
+
+    monkeypatch.setattr(
+        mcp,
+        "TOOLS",
+        {"mempalace_create_tunnel": {"handler": lambda **kw: {"error": "bad endpoint"}}},
+    )
+    out = service.run_mcp_tool({"name": "mempalace_create_tunnel", "arguments": {}})
+    assert out["success"] is False
+    assert out["exit_code"] == 1
+    assert out["error"] == "bad endpoint"
+
+    # A result with neither an explicit success flag nor an error is a success.
+    monkeypatch.setattr(
+        mcp, "TOOLS", {"mempalace_create_tunnel": {"handler": lambda **kw: {"tunnel_id": "t1"}}}
+    )
+    out = service.run_mcp_tool({"name": "mempalace_create_tunnel", "arguments": {}})
+    assert out["success"] is True
+    assert out["exit_code"] == 0
+
+
+def test_get_client_if_running_uses_short_probe_timeout(monkeypatch):
+    """The hook liveness precheck must pass a short health timeout so a wedged
+    daemon can't stall the hook past its budget (Copilot review)."""
+    captured = {}
+
+    class _FakeClient:
+        def __init__(self, palace_path):
+            pass
+
+        def health(self, *, timeout):
+            captured["timeout"] = timeout
+            return {"ok": True}
+
+    monkeypatch.setattr(daemon, "DaemonClient", _FakeClient)
+
+    assert daemon.HOOK_PROBE_TIMEOUT <= 0.5
+    client = daemon.get_client_if_running("/p", health_timeout=daemon.HOOK_PROBE_TIMEOUT)
+    assert client is not None
+    assert captured["timeout"] == daemon.HOOK_PROBE_TIMEOUT
diff --git a/tests/test_embedding.py b/tests/test_embedding.py
index d05075d69..3c533254e 100644
--- a/tests/test_embedding.py
+++ b/tests/test_embedding.py
@@ -73,7 +73,7 @@ def fake_import(name, *args, **kwargs):
 
 def test_get_embedding_function_caches_by_resolved_provider_tuple(monkeypatch):
     class DummyEF:
-        def __init__(self, preferred_providers):
+        def __init__(self, preferred_providers, intra_op_num_threads=0):
             self.preferred_providers = preferred_providers
 
     monkeypatch.setattr(embedding, "_build_ef_class", lambda: DummyEF)
@@ -81,13 +81,109 @@ def __init__(self, preferred_providers):
         embedding, "_resolve_providers", lambda device: (["CPUExecutionProvider"], "cpu")
     )
 
-    first = embedding.get_embedding_function("cpu")
-    second = embedding.get_embedding_function("auto")
+    first = embedding.get_embedding_function("cpu", "minilm")
+    second = embedding.get_embedding_function("auto", "minilm")
 
     assert first is second
     assert first.preferred_providers == ["CPUExecutionProvider"]
 
 
+def test_intra_op_session_options_caps_threads():
+    so = embedding._intra_op_session_options(3)
+    assert so is not None
+    assert so.intra_op_num_threads == 3
+
+
+def test_intra_op_session_options_uncapped_returns_none():
+    assert embedding._intra_op_session_options(0) is None
+    assert embedding._intra_op_session_options(-1) is None
+
+
+def test_get_embedding_function_threads_cap_passed_to_minilm_ef(monkeypatch):
+    captured = {}
+
+    class DummyEF:
+        def __init__(self, preferred_providers, intra_op_num_threads=0):
+            captured["threads"] = intra_op_num_threads
+
+    monkeypatch.setattr(embedding, "_build_ef_class", lambda: DummyEF)
+    monkeypatch.setattr(
+        embedding, "_resolve_providers", lambda device: (["CPUExecutionProvider"], "cpu")
+    )
+    monkeypatch.setattr(embedding, "_resolve_intra_op_threads", lambda: 2)
+
+    embedding.get_embedding_function("cpu", "minilm")
+
+    assert captured["threads"] == 2
+
+
+def test_get_embedding_function_threads_cap_passed_to_embeddinggemma(monkeypatch):
+    captured = {}
+
+    class DummyGemma:
+        def __init__(self, preferred_providers=None, intra_op_num_threads=0):
+            captured["threads"] = intra_op_num_threads
+
+    monkeypatch.setattr(embedding, "EmbeddinggemmaONNX", DummyGemma)
+    monkeypatch.setattr(
+        embedding, "_resolve_providers", lambda device: (["CPUExecutionProvider"], "cpu")
+    )
+    monkeypatch.setattr(embedding, "_resolve_intra_op_threads", lambda: 4)
+
+    embedding.get_embedding_function("cpu", "embeddinggemma")
+
+    assert captured["threads"] == 4
+
+
+def test_minilm_ef_model_override_applies_thread_cap(monkeypatch):
+    """The ``_MempalaceONNX.model`` override must construct the ORT session
+    with the configured ``intra_op_num_threads`` (#1068). We stub
+    ``InferenceSession`` to capture the ``SessionOptions`` it receives, so the
+    test never downloads or loads the real model."""
+    import onnxruntime as ort
+
+    captured = {}
+
+    def fake_session(model_path, providers=None, sess_options=None):
+        captured["sess_options"] = sess_options
+        captured["providers"] = providers
+        return object()
+
+    monkeypatch.setattr(ort, "InferenceSession", fake_session)
+
+    ef_cls = embedding._build_ef_class()
+    ef = ef_cls(preferred_providers=["CPUExecutionProvider"], intra_op_num_threads=2)
+    _ = ef.model  # triggers the cached_property build
+
+    assert captured["sess_options"] is not None
+    assert captured["sess_options"].intra_op_num_threads == 2
+    assert "CoreMLExecutionProvider" not in captured["providers"]
+
+
+def test_minilm_ef_model_override_falls_back_when_uncapped(monkeypatch):
+    """With no cap (0), the override must defer to the parent build via
+    ``super().model`` — not reach into ``cached_property`` internals (#1068
+    review). Proves super() resolves the parent descriptor without error."""
+    import onnxruntime as ort
+
+    captured = {}
+
+    def fake_session(model_path, providers=None, sess_options=None):
+        captured["sess_options"] = sess_options
+        return object()
+
+    monkeypatch.setattr(ort, "InferenceSession", fake_session)
+
+    ef_cls = embedding._build_ef_class()
+    ef = ef_cls(preferred_providers=["CPUExecutionProvider"], intra_op_num_threads=0)
+    session = ef.model  # cap <= 0 → super().model (upstream builder)
+
+    assert session is not None
+    # Upstream leaves intra_op at ORT's default (0 = unset), confirming we
+    # deferred to it rather than applying our cap.
+    assert captured["sess_options"].intra_op_num_threads == 0
+
+
 def test_describe_device_uses_resolved_effective_device(monkeypatch):
     monkeypatch.setattr(
         embedding,
diff --git a/tests/test_embeddinggemma.py b/tests/test_embeddinggemma.py
index 6ad398a9f..54d78b3ae 100644
--- a/tests/test_embeddinggemma.py
+++ b/tests/test_embeddinggemma.py
@@ -347,7 +347,7 @@ def test_cache_key_separates_models(monkeypatch):
     """
 
     class DummyMiniLM:
-        def __init__(self, preferred_providers=None):
+        def __init__(self, preferred_providers=None, intra_op_num_threads=0):
             self.kind = "minilm"
 
     monkeypatch.setattr(embedding, "_build_ef_class", lambda: DummyMiniLM)
diff --git a/tests/test_fact_checker.py b/tests/test_fact_checker.py
index 89d83663a..80231b892 100644
--- a/tests/test_fact_checker.py
+++ b/tests/test_fact_checker.py
@@ -19,6 +19,9 @@
 from __future__ import annotations
 
 import json
+import os
+import subprocess
+import sys
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -260,32 +263,45 @@ def test_registry_confusion_path_isolated_from_kg(self, tmp_path, monkeypatch):
 
 
 class TestCLI:
-    def test_exits_nonzero_when_issues_found(self, tmp_path, monkeypatch, capsys):
+    def test_exits_nonzero_when_issues_found(self, tmp_path):
         """The CLI exit code is how shell scripts / hooks know to act —
-        pin it explicitly."""
-        registry = tmp_path / "known_entities.json"
-        registry.write_text(json.dumps({"people": ["Milla", "Mila"]}))
-        from mempalace import fact_checker, miner
+        pin it explicitly.
 
-        monkeypatch.setattr(miner, "_ENTITY_REGISTRY_PATH", str(registry))
-        miner._ENTITY_REGISTRY_CACHE.update({"mtime": None, "names": frozenset(), "raw": {}})
-
-        # Simulate argv: "Mila said hi"
-        monkeypatch.setattr(
-            "sys.argv",
-            ["fact_checker", "Mila said hi", "--palace", str(tmp_path / "palace")],
+        Uses a fresh subprocess so that the already-imported
+        ``mempalace.fact_checker`` module in the test process does not
+        collide with runpy re-executing it as ``__main__``, which produced
+        a spurious RuntimeWarning from <frozen runpy>.
+        """
+        # Place the registry where the subprocess's miner will find it:
+        # $HOME/.mempalace/known_entities.json.  We give the subprocess a
+        # private HOME so we don't touch the developer's real registry.
+        fake_home = tmp_path / "home"
+        mempalace_dir = fake_home / ".mempalace"
+        mempalace_dir.mkdir(parents=True)
+        (mempalace_dir / "known_entities.json").write_text(
+            json.dumps({"people": ["Milla", "Mila"]})
         )
-        with pytest.raises(SystemExit) as excinfo:
-            # Re-exec the __main__ block via runpy.
-            import runpy
 
-            runpy.run_module("mempalace.fact_checker", run_name="__main__")
+        env = {**os.environ, "HOME": str(fake_home), "USERPROFILE": str(fake_home)}
+        result = subprocess.run(
+            [
+                sys.executable,
+                "-m",
+                "mempalace.fact_checker",
+                "Mila said hi",
+                "--palace",
+                str(tmp_path / "palace"),
+            ],
+            capture_output=True,
+            text=True,
+            env=env,
+        )
         # Issues found → exit code 1.
-        assert excinfo.value.code == 1
-        out = capsys.readouterr().out
-        assert "similar_name" in out
-        # Silence unused import warning.
-        _ = (MagicMock, patch, fact_checker)
+        assert result.returncode == 1
+        assert "similar_name" in result.stdout
+        # Silence unused import warning (MagicMock, patch still used by
+        # other tests in the class).
+        _ = (MagicMock, patch)
 
     def test_reconfigures_stdio_to_utf8_on_windows(self):
         """Windows fact_checker --stdin must decode payload as UTF-8.
diff --git a/tests/test_hnsw_capacity.py b/tests/test_hnsw_capacity.py
index 53775b096..7838993c5 100644
--- a/tests/test_hnsw_capacity.py
+++ b/tests/test_hnsw_capacity.py
@@ -11,6 +11,7 @@
 import os
 import pickle
 import sqlite3
+import time
 
 import pytest
 
@@ -588,7 +589,9 @@ def test_bm25_fallback_handles_short_query(palace_with_drawers):
 
 
 def test_repair_status_reports_diverged(tmp_path, capsys):
-    """The status command prints DIVERGED and recommends rebuild."""
+    """The status command prints DIVERGED and recommends the from-sqlite
+    rebuild (not a re-mine), since a diverged index means the rows are
+    intact in sqlite but the HNSW segment is out of sync (#1843)."""
     from mempalace.repair import status as repair_status
 
     seg = "seg-status"
@@ -597,7 +600,8 @@ def test_repair_status_reports_diverged(tmp_path, capsys):
     out = repair_status(palace_path=str(tmp_path))
     captured = capsys.readouterr().out
     assert "DIVERGED" in captured
-    assert "mempalace repair`" in captured
+    assert "mempalace repair --mode from-sqlite --archive-existing" in captured
+    assert "Do not re-mine" in captured
     assert out["drawers"]["diverged"] is True
 
 
@@ -640,3 +644,54 @@ class _Cfg:
     # ops×2 (incident + repair runbook), design×1 (metaphor).
     assert out["wings"].get("ops") == 2
     assert out["wings"].get("design") == 1
+
+
+def test_capacity_status_flags_small_gap_with_explicit_low_sync_threshold(tmp_path):
+    """New palaces use a low explicit sync threshold, so 57 missing rows is unsafe."""
+    seg = "seg-1816-explicit-low-sync"
+    _seed_chroma_db(str(tmp_path), sqlite_count=1768, segment_id=seg, sync_threshold=2)
+    _write_pickle(str(tmp_path), seg, hnsw_count=1711)
+
+    info = hnsw_capacity_status(str(tmp_path), COLLECTION)
+
+    assert info["divergence"] == 57
+    assert info["threshold"] == 4
+    assert info["status"] == "diverged"
+    assert info["diverged"] is True
+    assert "repair" in info["message"].lower()
+
+
+def test_capacity_status_flags_stale_below_floor_divergence(tmp_path):
+    """A persistent below-floor sqlite>HNSW gap must not be treated as fresh lag."""
+    from mempalace.backends import chroma
+
+    seg = "seg-1816-stale-below-floor"
+    _seed_chroma_db(str(tmp_path), sqlite_count=1768, segment_id=seg)
+    _write_pickle(str(tmp_path), seg, hnsw_count=1711)
+
+    pickle_path = tmp_path / seg / "index_metadata.pickle"
+    old = time.time() - chroma._HNSW_PERSISTENT_DIVERGENCE_GRACE_SECONDS - 10
+    os.utime(pickle_path, (old, old))
+
+    info = hnsw_capacity_status(str(tmp_path), COLLECTION)
+
+    assert info["divergence"] == 57
+    assert info["threshold"] >= 2000
+    assert info["status"] == "diverged"
+    assert info["diverged"] is True
+    assert "persisted below" in info["message"]
+
+
+def test_capacity_status_ok_with_stale_metadata_under_explicit_threshold(tmp_path):
+    """An idle database with an explicit sync threshold and a gap within tolerance must remain OK."""
+    seg = "seg-1816-stale-ok"
+    _seed_chroma_db(str(tmp_path), sqlite_count=1712, segment_id=seg, sync_threshold=2)
+    _write_pickle(str(tmp_path), seg, hnsw_count=1711)
+    pickle_path = tmp_path / seg / "index_metadata.pickle"
+    old = time.time() - 400.0
+    os.utime(pickle_path, (old, old))
+    info = hnsw_capacity_status(str(tmp_path), COLLECTION)
+    assert info["divergence"] == 1
+    assert info["threshold"] == 4
+    assert info["status"] == "ok"
+    assert info["diverged"] is False
diff --git a/tests/test_hooks_bash_compat.py b/tests/test_hooks_bash_compat.py
index bc194409d..26469b586 100644
--- a/tests/test_hooks_bash_compat.py
+++ b/tests/test_hooks_bash_compat.py
@@ -27,6 +27,14 @@
 REPO_ROOT = Path(__file__).resolve().parent.parent
 SAVE_HOOK = REPO_ROOT / "hooks" / "mempal_save_hook.sh"
 PRECOMPACT_HOOK = REPO_ROOT / "hooks" / "mempal_precompact_hook.sh"
+SESSION_END_HOOK = REPO_ROOT / "hooks" / "mempal_session_end_hook.sh"
+PLUGIN_SESSION_END_HOOK = REPO_ROOT / ".claude-plugin" / "hooks" / "mempal-session-end-hook.sh"
+
+_SESSION_END_HOOKS = pytest.mark.parametrize(
+    "hook",
+    [SESSION_END_HOOK, PLUGIN_SESSION_END_HOOK],
+    ids=["user_hook", "plugin_hook"],
+)
 
 # Re-used by every parametrize decorator that runs the same test against
 # both hooks. ``ids=`` keeps pytest output readable (`...[save_hook]`
@@ -322,3 +330,40 @@ def test_python_stderr_log_is_not_world_readable_on_failure(self, hook, tmp_path
         err_log = tmp_path / ".mempalace" / "hook_state" / "last_python_err.log"
         mode = stat.S_IMODE(err_log.stat().st_mode)
         assert mode == 0o600, f"last_python_err.log mode should be 0600 on failure, got {oct(mode)}"
+
+
+class TestSessionEndWrappers:
+    """The SessionEnd wrappers must background their work — so the foreground
+    beats Claude Code's ~1.5s SessionEnd budget (a plugin-provided per-hook
+    timeout cannot raise it) — and stay bash 3.2-safe like the other hooks."""
+
+    @_SESSION_END_HOOKS
+    def test_bash_syntax_clean(self, hook):
+        p = subprocess.run(["bash", "-n", str(hook)], capture_output=True, text=True)
+        assert p.returncode == 0, f"{hook.name} syntax error: {p.stderr}"
+
+    @_SESSION_END_HOOKS
+    def test_dispatches_session_end_through_cli(self, hook):
+        src = _hook_src_no_comments(hook)
+        # The dispatcher runs ``mempalace hook run "$@"`` in run_mempalace_hook,
+        # and the bottom call supplies the ``--hook session-end --harness`` args
+        # — so the two halves are asserted separately, not as one contiguous string.
+        assert "hook run" in src
+        assert "--hook session-end --harness" in src
+        assert "MEMPALACE_HOOK_HARNESS" in src
+        assert "MEMPAL_PYTHON" in src
+
+    @_SESSION_END_HOOKS
+    def test_backgrounds_then_returns_empty(self, hook):
+        src = _hook_src_no_comments(hook)
+        assert "</dev/null &" in src, f"{hook.name} does not background the worker"
+        assert "disown" in src, f"{hook.name} does not disown the backgrounded worker"
+        assert src.rstrip().endswith("printf '{}'"), (
+            f"{hook.name} must return the empty payload immediately"
+        )
+
+    @_SESSION_END_HOOKS
+    def test_no_bash4_only_builtins(self, hook):
+        src = _hook_src_no_comments(hook)
+        assert "mapfile" not in src, f"{hook.name} uses mapfile (bash 4 only)"
+        assert "readarray" not in src, f"{hook.name} uses readarray (bash 4 only)"
diff --git a/tests/test_hooks_cli.py b/tests/test_hooks_cli.py
index e09e2ba21..4ee3bb46e 100644
--- a/tests/test_hooks_cli.py
+++ b/tests/test_hooks_cli.py
@@ -16,6 +16,7 @@
     _diary_agent_for_harness,
     _extract_recent_messages,
     _get_mine_targets,
+    _hooks_daemon_enabled,
     _log,
     _maybe_auto_ingest,
     _mempalace_python,
@@ -28,6 +29,7 @@
     _wing_from_transcript_path,
     hook_stop,
     hook_session_start,
+    hook_session_end,
     hook_precompact,
     run_hook,
     _claim_mine_slot,
@@ -467,6 +469,45 @@ def test_stop_hook_checkpoint_visible_to_diary_read(monkeypatch, config, palace_
     assert legacy.get("entries") == []
 
 
+def test_save_diary_direct_daemon_opt_in_submits_job(tmp_path):
+    transcript = tmp_path / "session.jsonl"
+    palace_dir = tmp_path / "palace"
+    palace_dir.mkdir()
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"message {i}"}} for i in range(3)],
+    )
+    env = {"MEMPALACE_HOOKS_DAEMON": "yes", "MEMPALACE_PALACE_PATH": str(palace_dir)}
+    job = {"id": "job", "state": "succeeded", "result": {"success": True, "entry_id": "e1"}}
+
+    with patch.dict("os.environ", env):
+        with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+            with patch("mempalace.hooks_cli._daemon_available", return_value=True):
+                with patch("mempalace.daemon.submit_job", return_value=job) as mock_submit:
+                    result = _save_diary_direct(
+                        str(transcript),
+                        "sess1",
+                        wing="wing_project",
+                        agent_name="claude",
+                    )
+
+    assert result["count"] == 3
+    mock_submit.assert_called_once()
+    assert mock_submit.call_args.args[0] == "diary_write"
+    payload = mock_submit.call_args.args[1]
+    assert payload["agent_name"] == "claude"
+    assert payload["wing"] == "wing_project"
+    assert payload["topic"] == "checkpoint"
+    assert (tmp_path / "last_checkpoint").exists()
+
+
+def test_hooks_daemon_enabled_requires_explicit_true():
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        assert _hooks_daemon_enabled() is False
+        mock_cfg_cls.return_value.hook_use_daemon = True
+        assert _hooks_daemon_enabled() is True
+
+
 # --- hook_session_start ---
 
 
@@ -788,6 +829,33 @@ def test_maybe_auto_ingest_with_env(tmp_path):
                     assert cmd[cmd.index("--mode") + 1] == "projects"
 
 
+def test_maybe_auto_ingest_daemon_opt_in_submits_job(tmp_path):
+    """Daemon-enabled hooks submit a background mine instead of spawning one."""
+    mempal_dir = tmp_path / "project"
+    palace_dir = tmp_path / "palace"
+    mempal_dir.mkdir()
+    palace_dir.mkdir()
+    env = {
+        "MEMPAL_DIR": str(mempal_dir),
+        "MEMPALACE_HOOKS_DAEMON": "yes",
+        "MEMPALACE_PALACE_PATH": str(palace_dir),
+    }
+    with patch.dict("os.environ", env):
+        with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+            with patch("mempalace.hooks_cli._daemon_available", return_value=True):
+                with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
+                    with patch(
+                        "mempalace.daemon.submit_job", return_value={"id": "job"}
+                    ) as mock_submit:
+                        _maybe_auto_ingest()
+
+    mock_popen.assert_not_called()
+    mock_submit.assert_called_once()
+    assert mock_submit.call_args.args[0] == "mine"
+    assert mock_submit.call_args.args[1]["source"] == str(mempal_dir.resolve())
+    assert mock_submit.call_args.kwargs["wait"] is False
+
+
 def test_maybe_auto_ingest_uses_mempalace_python(tmp_path):
     """Spawned mine command uses _mempalace_python(), not bare sys.executable.
 
@@ -968,17 +1036,26 @@ def test_detached_popen_kwargs_posix(monkeypatch):
 
 
 def test_detached_popen_kwargs_windows(monkeypatch):
-    """On Windows, kwargs include creationflags that fully detach the child.
-
-    Without these, the parent hook hangs at session end on Windows because
-    the child's inherited stdout/stderr handles keep the parent's exit
-    blocked (#1268 root cause for the Python hook path).
+    """On Windows, the miner child gets a hidden console (CREATE_NO_WINDOW),
+    not a detached/no-console child (DETACHED_PROCESS).
+
+    DETACHED_PROCESS gave the child no console at all, which caused any
+    console grandchild it spawned to allocate a fresh *visible* window
+    (#1783). CREATE_NO_WINDOW gives a real-but-invisible console that all
+    descendants inherit, so nothing flashes — while still fixing the
+    #1268 hang (stdin=DEVNULL, close_fds, explicit stdout/stderr redirect,
+    and CREATE_NEW_PROCESS_GROUP for the signal boundary are unchanged).
+    Per the Win32 CreateProcess docs CREATE_NO_WINDOW is ignored when OR'd
+    with DETACHED_PROCESS, so the two must be mutually exclusive.
     """
     from mempalace.hooks_cli import _detached_popen_kwargs
 
     monkeypatch.setattr("mempalace.hooks_cli.os.name", "nt")
-    # Simulate Windows-only Popen flag constants. Patch on the imported
-    # subprocess module within hooks_cli so getattr() picks them up.
+    # Simulate Windows-only Popen flag constants on the imported subprocess
+    # module so getattr() picks them up cross-platform.
+    monkeypatch.setattr(
+        "mempalace.hooks_cli.subprocess.CREATE_NO_WINDOW", 0x08000000, raising=False
+    )
     monkeypatch.setattr(
         "mempalace.hooks_cli.subprocess.DETACHED_PROCESS", 0x00000008, raising=False
     )
@@ -989,7 +1066,10 @@ def test_detached_popen_kwargs_windows(monkeypatch):
     assert kwargs.get("stdin") is subprocess.DEVNULL
     assert kwargs.get("close_fds") is True
     flags = kwargs.get("creationflags", 0)
-    assert flags & 0x00000008, "DETACHED_PROCESS must be set"
+    assert flags & 0x08000000, "CREATE_NO_WINDOW must be set"
+    assert not (flags & 0x00000008), (
+        "DETACHED_PROCESS must NOT be set (it suppresses CREATE_NO_WINDOW)"
+    )
     assert flags & 0x00000200, "CREATE_NEW_PROCESS_GROUP must be set"
 
 
@@ -1114,6 +1194,31 @@ def test_ingest_transcript_uses_detached_kwargs(tmp_path):
                 assert kwargs.get("close_fds") is True
 
 
+def test_ingest_transcript_daemon_opt_in_submits_job(tmp_path):
+    transcript = tmp_path / "session.jsonl"
+    palace_dir = tmp_path / "palace"
+    palace_dir.mkdir()
+    transcript.write_text("x" * 200)
+    env = {"MEMPALACE_HOOKS_DAEMON": "yes", "MEMPALACE_PALACE_PATH": str(palace_dir)}
+    with patch.dict("os.environ", env):
+        with patch("mempalace.hooks_cli.STATE_DIR", tmp_path):
+            with patch("mempalace.hooks_cli._daemon_available", return_value=True):
+                with patch("mempalace.hooks_cli.subprocess.Popen") as mock_popen:
+                    with patch(
+                        "mempalace.daemon.submit_job", return_value={"id": "job"}
+                    ) as mock_submit:
+                        from mempalace.hooks_cli import _ingest_transcript
+
+                        _ingest_transcript(str(transcript))
+
+    mock_popen.assert_not_called()
+    mock_submit.assert_called_once()
+    payload = mock_submit.call_args.args[1]
+    assert payload["source"] == str(tmp_path)
+    assert payload["mode"] == "convos"
+    assert payload["wing"] == "sessions"
+
+
 def test_ingest_transcript_skips_when_target_running(tmp_path):
     """Repeated transcript ingests for the same transcript should dedup."""
     transcript = tmp_path / "session.jsonl"
@@ -1781,6 +1886,284 @@ def test_hook_stop_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch)
     assert not fake_root.exists()
 
 
+# ── session-end hook (#1341) ──────────────────────────────────────────────
+
+
+def test_run_hook_dispatches_session_end():
+    stdin_data = json.dumps({"session_id": "run-test"})
+    with patch("sys.stdin", io.StringIO(stdin_data)):
+        with patch("mempalace.hooks_cli.hook_session_end") as mock_hook:
+            run_hook("session-end", "claude-code")
+    mock_hook.assert_called_once_with({"session_id": "run-test"}, "claude-code")
+
+
+def test_session_end_uses_detached_paths_not_sync_mine(tmp_path):
+    """SessionEnd must spawn the detached ingest/auto-ingest and NEVER call the
+    synchronous ``_mine_sync``: the foreground SessionEnd budget (~1.5s, which a
+    plugin-provided per-hook timeout cannot raise) would kill a synchronous mine
+    before it saved anything.
+    """
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(3)],
+    )
+    last_save_file = tmp_path / "sess_last_save"
+    last_save_file.write_text("2", encoding="utf-8")
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        with (
+            patch(
+                "mempalace.hooks_cli._save_diary_direct",
+                return_value={"count": 3, "themes": ["exit"]},
+            ) as mock_save,
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+            patch("mempalace.hooks_cli._mine_sync") as mock_sync,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {"session_id": "sess", "transcript_path": str(transcript)},
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    expected_path = str(transcript.resolve())
+    mock_ingest.assert_called_once_with(expected_path)
+    mock_auto.assert_called_once()
+    mock_sync.assert_not_called()
+    mock_save.assert_called_once_with(
+        expected_path, "sess", wing="wing_sessions", toast=False, agent_name="claude"
+    )
+    # The session is over; its per-session save marker is cleared.
+    assert not last_save_file.exists()
+
+
+def test_session_end_disabled_by_config_clears_marker(tmp_path):
+    last_save_file = tmp_path / "sess_last_save"
+    last_save_file.write_text("15", encoding="utf-8")
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = False
+        with (
+            patch("mempalace.hooks_cli._save_diary_direct") as mock_save,
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {"session_id": "sess", "transcript_path": ""},
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    mock_save.assert_not_called()
+    mock_ingest.assert_not_called()
+    mock_auto.assert_not_called()
+    assert not last_save_file.exists()
+
+
+def test_session_end_defaults_to_saving_when_config_unreadable(tmp_path):
+    """A corrupt/unreadable config must not lose the final save: the handler
+    defaults to auto-save on (toasts off) instead of crashing the hook."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [{"message": {"role": "user", "content": "hi"}}])
+    with patch("mempalace.hooks_cli.MempalaceConfig", side_effect=RuntimeError("corrupt config")):
+        with (
+            patch(
+                "mempalace.hooks_cli._save_diary_direct",
+                return_value={"count": 1, "themes": []},
+            ) as mock_save,
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {"session_id": "cfg", "transcript_path": str(transcript)},
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    mock_save.assert_called_once()
+    mock_ingest.assert_called_once()
+    mock_auto.assert_called_once()
+
+
+def test_session_end_clears_marker_even_if_capture_raises(tmp_path):
+    """Marker cleanup runs in a ``finally`` so a failing ingest still cleans up
+    and never wedges the per-session marker on."""
+    last_save_file = tmp_path / "boom_last_save"
+    last_save_file.write_text("5", encoding="utf-8")
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        with (
+            patch("mempalace.hooks_cli.STATE_DIR", tmp_path),
+            patch(
+                "mempalace.hooks_cli._ingest_transcript",
+                side_effect=RuntimeError("boom"),
+            ),
+            patch("mempalace.hooks_cli._output"),
+        ):
+            with pytest.raises(RuntimeError):
+                hook_session_end(
+                    {"session_id": "boom", "transcript_path": str(tmp_path / "x.jsonl")},
+                    "claude-code",
+                )
+    assert not last_save_file.exists()
+
+
+def test_session_end_clears_marker_on_parse_failure(tmp_path):
+    """A non-dict payload makes _parse_harness_input raise, but parsing now runs
+    inside the try, so the finally still clears the default-session marker
+    instead of skipping cleanup entirely."""
+    last_save_file = tmp_path / "unknown_last_save"
+    last_save_file.write_text("5", encoding="utf-8")
+    with (
+        patch("mempalace.hooks_cli.STATE_DIR", tmp_path),
+        patch("mempalace.hooks_cli._output"),
+    ):
+        with pytest.raises(AttributeError):
+            hook_session_end(["not", "a", "dict"], "claude-code")
+    assert not last_save_file.exists()
+
+
+def test_hook_session_end_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch):
+    fake_root = _redirect_palace_root(monkeypatch, tmp_path)
+    buf = io.StringIO()
+    with contextlib.redirect_stdout(buf):
+        hook_session_end({"session_id": "absent", "transcript_path": ""}, "claude-code")
+    assert json.loads(buf.getvalue() or "{}") == {}
+    assert not fake_root.exists()
+
+
+def test_session_end_auto_ingest_only_when_no_transcript(tmp_path):
+    """auto_save on but no transcript: project mine still fires, transcript-only
+    paths are skipped, and the marker is still cleared. Guards against wrapping
+    ``_maybe_auto_ingest`` inside the ``if transcript_path`` block by mistake."""
+    last_save_file = tmp_path / "sess_last_save"
+    last_save_file.write_text("3", encoding="utf-8")
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        with (
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+            patch("mempalace.hooks_cli._save_diary_direct") as mock_save,
+            patch("mempalace.hooks_cli._mine_sync") as mock_sync,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {"session_id": "sess", "transcript_path": ""},
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    mock_auto.assert_called_once()
+    mock_ingest.assert_not_called()
+    mock_save.assert_not_called()
+    mock_sync.assert_not_called()
+    assert not last_save_file.exists()
+
+
+def test_session_end_rejects_invalid_transcript_path(tmp_path):
+    """A traversal / wrong-suffix transcript_path is rejected by the validator:
+    no transcript ingest or diary write fires (the independent project mine
+    still runs), and the per-session marker is still cleared."""
+    last_save_file = tmp_path / "bad_last_save"
+    last_save_file.write_text("5", encoding="utf-8")
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        with (
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._save_diary_direct") as mock_save,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {"session_id": "bad", "transcript_path": "../../etc/passwd"},
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    mock_ingest.assert_not_called()
+    mock_save.assert_not_called()
+    mock_auto.assert_called_once()  # project mine is independent of the transcript
+    assert not last_save_file.exists()
+
+
+def test_session_end_accepts_full_sessionend_payload(tmp_path):
+    """The handler tolerates the real Claude Code SessionEnd stdin (reason,
+    hook_event_name, cwd) — extra keys ignored, behavior unchanged. Pins the
+    contract so a future strict-parse or reason-branching regression is caught."""
+    transcript = tmp_path / "t.jsonl"
+    _write_transcript(transcript, [{"message": {"role": "user", "content": "hi"}}])
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        with (
+            patch(
+                "mempalace.hooks_cli._save_diary_direct",
+                return_value={"count": 1, "themes": []},
+            ),
+            patch("mempalace.hooks_cli._ingest_transcript") as mock_ingest,
+            patch("mempalace.hooks_cli._maybe_auto_ingest") as mock_auto,
+            patch("mempalace.hooks_cli._mine_sync") as mock_sync,
+        ):
+            result = _capture_hook_output(
+                hook_session_end,
+                {
+                    "session_id": "sess",
+                    "transcript_path": str(transcript),
+                    "hook_event_name": "SessionEnd",
+                    "reason": "logout",
+                    "cwd": str(tmp_path),
+                },
+                state_dir=tmp_path,
+            )
+    assert result == {}
+    mock_ingest.assert_called_once_with(str(transcript.resolve()))
+    mock_auto.assert_called_once()
+    mock_sync.assert_not_called()
+
+
+def test_session_end_checkpoint_visible_to_diary_read(
+    monkeypatch, config, palace_path, kg, tmp_path
+):
+    """The SessionEnd in-process diary checkpoint is real and discoverable via
+    diary_read (not mocked) — the diary half of #1341's clean-exit capture,
+    mirroring test_stop_hook_checkpoint_visible_to_diary_read."""
+    import chromadb
+
+    from mempalace import mcp_server
+    from mempalace.mcp_server import tool_diary_read
+
+    monkeypatch.setattr(mcp_server, "_config", config)
+    monkeypatch.setattr(mcp_server, "_get_kg", lambda *a, **kw: kg)
+    client = chromadb.PersistentClient(path=palace_path)
+    client.get_or_create_collection("mempalace_drawers", metadata={"hnsw:space": "cosine"})
+    del client
+
+    transcript = tmp_path / "session.jsonl"
+    _write_transcript(
+        transcript,
+        [{"message": {"role": "user", "content": f"msg {i}"}} for i in range(5)],
+    )
+
+    with patch("mempalace.hooks_cli.MempalaceConfig") as mock_cfg_cls:
+        mock_cfg_cls.return_value.hooks_auto_save = True
+        mock_cfg_cls.return_value.hook_desktop_toast = False
+        # Mock only the detached subprocess mines; run the REAL diary write.
+        with (
+            patch("mempalace.hooks_cli._ingest_transcript"),
+            patch("mempalace.hooks_cli._maybe_auto_ingest"),
+        ):
+            hook_session_end(
+                {"session_id": "sessX", "transcript_path": str(transcript)},
+                "claude-code",
+            )
+
+    visible = tool_diary_read(agent_name="claude")
+    assert visible.get("total", 0) >= 1
+    assert "CHECKPOINT" in visible["entries"][0]["content"]
+
+
 def test_hook_precompact_does_not_create_palace_dir_when_absent(tmp_path, monkeypatch):
     fake_root = _redirect_palace_root(monkeypatch, tmp_path)
     transcript = tmp_path / "t.jsonl"
diff --git a/tests/test_hooks_shell.py b/tests/test_hooks_shell.py
index 7462d97b9..9b8d4f625 100644
--- a/tests/test_hooks_shell.py
+++ b/tests/test_hooks_shell.py
@@ -24,6 +24,7 @@
 import stat
 import subprocess
 import sys
+import time
 from pathlib import Path
 
 import pytest
@@ -31,6 +32,8 @@
 REPO_ROOT = Path(__file__).resolve().parent.parent
 SAVE_HOOK = REPO_ROOT / "hooks" / "mempal_save_hook.sh"
 PRECOMPACT_HOOK = REPO_ROOT / "hooks" / "mempal_precompact_hook.sh"
+SESSION_END_HOOK = REPO_ROOT / "hooks" / "mempal_session_end_hook.sh"
+PLUGIN_SESSION_END_HOOK = REPO_ROOT / ".claude-plugin" / "hooks" / "mempal-session-end-hook.sh"
 
 
 pytestmark = pytest.mark.skipif(os.name == "nt", reason="bash hook scripts are POSIX-only")
@@ -168,3 +171,131 @@ def test_falls_back_to_path_when_unset(self, tmp_path):
         assert "python3" in invocations, (
             f"fallback-to-PATH did not use the shimmed python3. Marker log: {invocations!r}"
         )
+
+
+# ── session-end wrapper: must background so the foreground beats the budget ──
+
+
+def _write_recording_mempalace(
+    path: Path, args_file: Path, *, sleep_secs: float = 0.0, done_file: Path | None = None
+) -> Path:
+    """A fake ``mempalace`` that consumes stdin, optionally sleeps, then records
+    its argv to ``args_file`` (and touches ``done_file``). Lets a test observe a
+    *backgrounded* dispatch after the wrapper's foreground has already returned.
+    """
+    done_line = f'printf done > "{done_file}"' if done_file is not None else ":"
+    src = f"""#!/bin/bash
+cat >/dev/null
+sleep {sleep_secs}
+printf '%s' "$*" > "{args_file}"
+{done_line}
+printf '{{}}'
+"""
+    path.write_text(src)
+    path.chmod(path.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+    return path
+
+
+def _wait_for(path: Path, timeout: float = 15.0) -> bool:
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if path.exists() and path.read_text():
+            return True
+        time.sleep(0.05)
+    return False
+
+
+class TestSessionEndWrapper:
+    def test_foreground_returns_before_worker_finishes(self, tmp_path):
+        """Budget contract: the foreground must return well before the (slow)
+        worker completes, otherwise SessionEnd's ~1.5s budget would kill the
+        mine. Proven with a worker that sleeps 2s before recording."""
+        args_file = tmp_path / "args.log"
+        done_file = tmp_path / "worker.done"
+        fake = _write_recording_mempalace(
+            tmp_path / "mempalace", args_file, sleep_secs=2.0, done_file=done_file
+        )
+        t0 = time.monotonic()
+        result = _run_hook(
+            SESSION_END_HOOK,
+            {"session_id": "abc", "transcript_path": ""},
+            env_overrides={"HOME": str(tmp_path)},
+            path_prefix=[fake.parent],
+        )
+        elapsed = time.monotonic() - t0
+        assert result.returncode == 0, f"stderr={result.stderr!r}"
+        assert result.stdout == "{}"
+        assert elapsed < 1.5, f"foreground blocked {elapsed:.2f}s; the budget would kill it"
+        assert not done_file.exists(), (
+            "worker finished before the foreground returned — wrapper is not backgrounding"
+        )
+        assert _wait_for(done_file), "detached worker never completed"
+        assert args_file.read_text() == "hook run --hook session-end --harness claude-code"
+
+    def test_dispatches_via_mempal_python_override(self, tmp_path):
+        args_file = tmp_path / "args.log"
+        shim = tmp_path / "python3"
+        shim.write_text(
+            f"""#!/bin/bash
+if [ "$1" = "-c" ]; then exit 0; fi
+if [ "$1" = "-m" ] && [ "$2" = "mempalace" ]; then
+  shift 2
+  cat >/dev/null
+  printf '%s' "$*" > "{args_file}"
+  printf '{{}}'
+  exit 0
+fi
+exit 1
+""",
+            encoding="utf-8",
+        )
+        shim.chmod(shim.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+        result = _run_hook(
+            SESSION_END_HOOK,
+            {"session_id": "abc", "transcript_path": ""},
+            env_overrides={
+                "HOME": str(tmp_path),
+                "PATH": "/usr/bin:/bin",
+                "MEMPAL_PYTHON": str(shim),
+            },
+        )
+        assert result.returncode == 0, f"stderr={result.stderr!r}"
+        assert result.stdout == "{}"
+        assert _wait_for(args_file), "backgrounded worker never ran"
+        assert args_file.read_text() == "hook run --hook session-end --harness claude-code"
+
+    def test_harness_override_is_forwarded(self, tmp_path):
+        args_file = tmp_path / "args.log"
+        fake = _write_recording_mempalace(tmp_path / "mempalace", args_file)
+        result = _run_hook(
+            SESSION_END_HOOK,
+            {"session_id": "abc", "transcript_path": ""},
+            env_overrides={"HOME": str(tmp_path), "MEMPALACE_HOOK_HARNESS": "codex"},
+            path_prefix=[fake.parent],
+        )
+        assert result.returncode == 0
+        assert _wait_for(args_file)
+        assert args_file.read_text() == "hook run --hook session-end --harness codex"
+
+
+class TestPluginSessionEndWrapper:
+    def test_foreground_returns_before_worker_finishes(self, tmp_path):
+        args_file = tmp_path / "args.log"
+        done_file = tmp_path / "worker.done"
+        fake = _write_recording_mempalace(
+            tmp_path / "mempalace", args_file, sleep_secs=2.0, done_file=done_file
+        )
+        t0 = time.monotonic()
+        result = _run_hook(
+            PLUGIN_SESSION_END_HOOK,
+            {"session_id": "abc", "transcript_path": ""},
+            env_overrides={"HOME": str(tmp_path)},
+            path_prefix=[fake.parent],
+        )
+        elapsed = time.monotonic() - t0
+        assert result.returncode == 0, f"stderr={result.stderr!r}"
+        assert result.stdout == "{}"
+        assert elapsed < 1.5, f"plugin foreground blocked {elapsed:.2f}s"
+        assert not done_file.exists()
+        assert _wait_for(done_file), "detached plugin worker never completed"
+        assert args_file.read_text() == "hook run --hook session-end --harness claude-code"
diff --git a/tests/test_hybrid_candidate_union.py b/tests/test_hybrid_candidate_union.py
index 0771001d9..7c1a129d3 100644
--- a/tests/test_hybrid_candidate_union.py
+++ b/tests/test_hybrid_candidate_union.py
@@ -213,6 +213,26 @@ def test_union_dedup_is_chunk_precise_not_basename(self, tmp_path):
             f"(basename collision would drop one); got sources={sources}"
         )
 
+    def test_union_respects_source_file_filter(self, tmp_path):
+        """Union pulls BM25 candidates from sqlite FTS5 directly; the
+        source_file filter must constrain that pool too, not just the vector
+        path — otherwise union silently re-injects other sources (#1815)."""
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        result = search_memories(
+            _NARRATIVE_QUERY,
+            palace,
+            n_results=5,
+            candidate_strategy="union",
+            source_file="ticket_D2.md",
+        )
+        sources = {h["source_file"] for h in result["results"]}
+        assert sources <= {"ticket_D2.md"}, (
+            f"union must honor source_file on the BM25 pool; got {sources}"
+        )
+        # The BM25-strong brand-voice doc must NOT leak past the filter.
+        assert "brand_voice_D4.md" not in sources
+
 
 class TestHybridRankTolerantOfMissingDistance:
     """``_hybrid_rank`` accepts ``distance=None`` — required for BM25-only
diff --git a/tests/test_hybrid_search.py b/tests/test_hybrid_search.py
index a2672de41..35aa57934 100644
--- a/tests/test_hybrid_search.py
+++ b/tests/test_hybrid_search.py
@@ -133,3 +133,43 @@ def test_drawer_only_hits_have_no_closet_preview(self, tmp_path):
             assert h["matched_via"] == "drawer"
             assert "closet_preview" not in h
             assert h["closet_boost"] == 0.0
+
+
+# ── source_file filter scopes both drawer and closet queries (#1815) ──────
+
+
+class TestSourceFileFilter:
+    def test_source_file_filter_excludes_other_sources(self, tmp_path):
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        result = search_memories(
+            "Kafka consumer rebalance timeout",
+            palace,
+            n_results=5,
+            source_file="fixture_D4.md",
+        )
+        ids = [h["source_file"] for h in result["results"]]
+        assert ids, "the matching source_file drawer should be returned"
+        assert set(ids) == {"fixture_D4.md"}
+
+    def test_source_file_filter_overrides_closet_boost_for_other_source(self, tmp_path):
+        # A strong closet pointing at D1 must NOT leak D1 in when the search
+        # is scoped to a different source_file — the where clause is applied
+        # to the closet query too, not just the drawer query.
+        palace = str(tmp_path / "palace")
+        _seed_drawers(palace)
+        _seed_strong_closet_for(
+            palace,
+            drawer_id="D1",
+            source_file="fixture_D1.md",
+            topics=["Kafka queue tuning", "consumer rebalance config"],
+        )
+        result = search_memories(
+            "Kafka consumer rebalance",
+            palace,
+            n_results=5,
+            source_file="fixture_D4.md",
+        )
+        ids = [h["source_file"] for h in result["results"]]
+        assert "fixture_D1.md" not in ids
+        assert set(ids) <= {"fixture_D4.md"}
diff --git a/tests/test_live_pgvector_conformance.py b/tests/test_live_pgvector_conformance.py
new file mode 100644
index 000000000..48d516504
--- /dev/null
+++ b/tests/test_live_pgvector_conformance.py
@@ -0,0 +1,341 @@
+"""Live-substrate conformance run for the pgvector backend (RFC 001).
+
+Mirrors the fake-client arms of ``test_pgvector_backend.py`` against a real
+PostgreSQL + pgvector server, plus live-only arms the in-memory fake cannot
+exercise: the real ``<=>`` operator class, JSONB pushdown vs local-fallback
+equivalence, multi-connection concurrent writers, and the advisory-lock
+serialization of ``run_maintenance("reindex")``.
+
+Gate: ``MEMPALACE_PGVECTOR_LIVE_DSN`` (a scratch database — every test creates
+its own namespaced tables; never point this at a production palace).
+"""
+
+import os
+import threading
+from concurrent.futures import ThreadPoolExecutor
+
+import pytest
+
+from _backend_conformance import assert_partition_isolation
+
+from mempalace.backends import (
+    BackendError,
+    BackendMismatchError,
+    CollectionNotInitializedError,
+    DimensionMismatchError,
+    PalaceRef,
+)
+from mempalace.backends.pgvector import PgVectorBackend
+
+LIVE_DSN = os.environ.get("MEMPALACE_PGVECTOR_LIVE_DSN")
+
+pytestmark = pytest.mark.skipif(
+    not LIVE_DSN, reason="set MEMPALACE_PGVECTOR_LIVE_DSN (scratch DB) to run"
+)
+
+
+@pytest.fixture
+def live(request, tmp_path):
+    """Backend + collection on the live server, namespaced per test."""
+    namespace = "conf_" + request.node.name.replace("[", "_").replace("]", "")[:40]
+    backend = PgVectorBackend()
+    created = []
+    created_lock = threading.Lock()
+
+    def make(path, name="drawers", create=True, ns=namespace, dsn=LIVE_DSN, backend_=None):
+        b = backend_ or backend
+        ref = PalaceRef(id=str(path), local_path=str(path), namespace=ns)
+        col = b.get_collection(
+            palace=ref, collection_name=name, create=create, options={"dsn": dsn, "namespace": ns}
+        )
+        # The concurrent tests call make() from worker threads; plain list
+        # append is not guaranteed safe on every Python build.
+        with created_lock:
+            created.append(col)
+        return col
+
+    yield backend, make, namespace
+    for col in created:
+        try:
+            col._client.drop_table(col._table)
+        except Exception:
+            pass
+    backend.close()
+
+
+def _seed(col):
+    col.add(
+        ids=["a", "b", "c"],
+        documents=[
+            "alpha backend note",
+            "rareterm pgvector backend note",
+            "frontend design note",
+        ],
+        metadatas=[
+            {"wing": "project", "room": "backend", "rank": 1},
+            {"wing": "project", "room": "backend", "rank": 3},
+            {"wing": "project", "room": "frontend", "rank": 2},
+        ],
+        embeddings=[[1, 0], [0.9, 0.1], [0, 1]],
+    )
+
+
+def test_live_add_query_filters_lexical_and_marker(live, tmp_path):
+    backend, make, _ns = live
+    col = make(tmp_path)
+    assert not os.path.isfile(tmp_path / "pgvector_backend.json")
+    _seed(col)
+
+    assert PgVectorBackend.detect(str(tmp_path))
+    assert os.path.isfile(tmp_path / "pgvector_backend.json")
+    assert col.count() == 3
+
+    result = col.query(
+        query_embeddings=[[1, 0]],
+        n_results=3,
+        where={"wing": "project"},
+        include=["documents", "metadatas", "distances", "embeddings"],
+    )
+    # ORDER BY distance ASC is part of the query contract — assert the
+    # exact ranking, not just membership.
+    assert result.ids[0] == ["a", "b", "c"]
+    assert result.embeddings[0][0] == pytest.approx([1.0, 0.0])
+
+    hits = col.lexical_search(query="rareterm backend", n_results=2, where={"wing": "project"}).hits
+    assert [hit.id for hit in hits] == ["b", "a"]
+
+
+def test_live_requires_explicit_embeddings(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    with pytest.raises(ValueError, match="explicit embeddings"):
+        col.add(ids=["a"], documents=["no vector"], metadatas=[{}])
+
+
+def test_live_dimension_mismatch(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.upsert(ids=["a"], documents=["one"], metadatas=[{}], embeddings=[[1, 0]])
+    with pytest.raises(DimensionMismatchError):
+        col.upsert(ids=["b"], documents=["two"], metadatas=[{}], embeddings=[[1, 0, 0]])
+
+
+def test_live_duplicate_ids_in_batch_rejected(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    with pytest.raises(ValueError, match="unique"):
+        col.add(
+            ids=["a", "a"], documents=["x", "y"], metadatas=[{}, {}], embeddings=[[1, 0], [0, 1]]
+        )
+
+
+def test_live_complex_filters_pushdown_vs_local_fallback(live, tmp_path):
+    """$or / $contains route to local fallback, equality/$gte push down to
+    JSONB SQL — on the live server both paths must agree with the fake."""
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.add(
+        ids=["a", "b", "c"],
+        documents=["alpha", "beta", "gamma"],
+        metadatas=[
+            {"wing": "x", "rank": 1, "tags": "core,vector"},
+            {"wing": "y", "rank": 3, "tags": "sqlite,exact"},
+            {"wing": "z", "rank": 2, "tags": "old"},
+        ],
+        embeddings=[[1, 0], [0.9, 0.1], [0, 1]],
+    )
+
+    or_hits = col.get(where={"$or": [{"wing": "x"}, {"wing": "z"}]})
+    assert set(or_hits.ids) == {"a", "c"}
+
+    contains = col.get(where={"tags": {"$contains": "sqlite"}})
+    assert contains.ids == ["b"]
+
+    ranked = col.query(query_embeddings=[[1, 0]], n_results=3, where={"rank": {"$gte": 2}})
+    assert ranked.ids[0] == ["b", "c"]
+
+    eq_pushdown = col.get(where={"wing": "y"})
+    assert eq_pushdown.ids == ["b"]
+
+
+def test_live_marker_rejects_target_change(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.upsert(ids=["a"], documents=["one"], metadatas=[{}], embeddings=[[1, 0]])
+
+    backend2 = PgVectorBackend()
+    palace = PalaceRef(id=str(tmp_path), local_path=str(tmp_path))
+    try:
+        with pytest.raises(BackendMismatchError):
+            backend2.get_collection(
+                palace=palace,
+                collection_name="drawers",
+                create=True,
+                options={"dsn": "postgresql://other-host:5432/other"},
+            )
+    finally:
+        backend2.close()
+
+
+def test_live_marker_backend_mismatch(live, tmp_path):
+    from mempalace.palace import resolve_backend_name
+
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.upsert(ids=["a"], documents=["one"], metadatas=[{}], embeddings=[[1, 0]])
+
+    assert resolve_backend_name(str(tmp_path)) == "pgvector"
+    with pytest.raises(BackendMismatchError):
+        resolve_backend_name(str(tmp_path), explicit="qdrant")
+
+
+def test_live_rejects_pure_remote_palace(live):
+    backend = PgVectorBackend()
+    palace = PalaceRef(id="tenant-remote", local_path=None, namespace="tenant-remote")
+    try:
+        with pytest.raises(BackendError, match="local palace path"):
+            backend.get_collection(
+                palace=palace, collection_name="drawers", create=True, options={"dsn": LIVE_DSN}
+            )
+    finally:
+        backend.close()
+
+
+def test_live_missing_table_after_marker_is_not_initialized(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.upsert(ids=["a"], documents=["one"], metadatas=[{}], embeddings=[[1, 0]])
+    col._client.drop_table(col._table)
+
+    assert col.health().ok is False
+    with pytest.raises(CollectionNotInitializedError):
+        col.count()
+
+
+def test_live_cross_palace_isolation_conformance(live, tmp_path):
+    backend, make, _ns = live
+    cols = [make(tmp_path / label) for label in ("alpha", "beta")]
+    assert cols[0]._table != cols[1]._table
+    assert_partition_isolation(backend, cols[0], cols[1], embedding=[1.0, 0.0])
+
+
+def test_live_cross_namespace_isolation_conformance(live, tmp_path):
+    """The cschnatz arm: same DSN, two namespaces, no leakage either way."""
+    assert "supports_namespace_isolation" in PgVectorBackend.capabilities
+    backend, make, ns = live
+    col_a = make(tmp_path / "tenant-a", ns=f"{ns}_a")
+    col_b = make(tmp_path / "tenant-b", ns=f"{ns}_b")
+    assert col_a._table != col_b._table
+    assert_partition_isolation(backend, col_a, col_b, embedding=[1.0, 0.0])
+
+
+def test_live_cosine_operator_ranking_ground_truth(live, tmp_path):
+    """The real ``<=>`` operator class must rank by cosine distance exactly
+    as the fake's local math claims (our #1679 Q2-adjacent point: distance
+    semantics should be a contract fact; here we verify the live operator)."""
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.add(
+        ids=["same", "close", "orthogonal", "opposite"],
+        documents=["d1", "d2", "d3", "d4"],
+        metadatas=[{}, {}, {}, {}],
+        embeddings=[[1, 0], [0.9, 0.1], [0, 1], [-1, 0]],
+    )
+    result = col.query(query_embeddings=[[1, 0]], n_results=4, include=["distances"])
+    assert result.ids[0] == ["same", "close", "orthogonal", "opposite"]
+    distances = result.distances[0]
+    assert distances[0] == pytest.approx(0.0, abs=1e-6)
+    assert distances[2] == pytest.approx(1.0, abs=1e-6)
+    assert distances[3] == pytest.approx(2.0, abs=1e-6)
+
+
+def test_live_concurrent_writers_distinct_connections(live, tmp_path):
+    """8 backends (8 connections) upserting distinct rows into the same
+    table concurrently — the multi-daemon-writer shape from production."""
+    _backend, make, ns = live
+    seed_col = make(tmp_path)
+    seed_col.upsert(ids=["seed"], documents=["seed"], metadatas=[{}], embeddings=[[1, 0]])
+
+    errors = []
+
+    def writer(worker):
+        backend = PgVectorBackend()
+        # The marker file is already written by the seed step. upsert()
+        # rewrites it on every call with a plain open("w"), so 8 backends
+        # sharing one local_path would race on the same file — a test-design
+        # artifact (and a known sharing-violation hazard on Windows), not
+        # the contract under test here. Stub it for the concurrent phase.
+        backend._write_marker = lambda *args, **kwargs: None
+        try:
+            col = make(tmp_path, backend_=backend)
+            for i in range(25):
+                col.upsert(
+                    ids=[f"w{worker}-r{i}"],
+                    documents=[f"row {i} from worker {worker}"],
+                    metadatas=[{"worker": worker}],
+                    embeddings=[[1.0, float(i) / 100]],
+                )
+        except Exception as exc:  # noqa: BLE001 - collected for the report
+            errors.append(repr(exc))
+        finally:
+            backend.close()
+
+    with ThreadPoolExecutor(max_workers=8) as pool:
+        list(pool.map(writer, range(8)))
+
+    assert errors == [], f"concurrent writers raised: {errors[:3]}"
+    assert seed_col.count() == 1 + 8 * 25
+
+
+def test_live_reindex_advisory_lock_race(live, tmp_path):
+    """Two connections racing run_maintenance('reindex') — the #1732
+    advisory-lock behavior: exactly one 'ran', the loser learns
+    'already_running' (or 'noop' after the winner finishes), nobody stacks
+    a second ACCESS EXCLUSIVE build and nobody raises."""
+    _backend, make, ns = live
+    col = make(tmp_path)
+    col.add(
+        ids=[f"r{i}" for i in range(50)],
+        documents=[f"doc {i}" for i in range(50)],
+        metadatas=[{} for _ in range(50)],
+        embeddings=[[1.0, float(i)] for i in range(50)],
+    )
+    assert col.maintenance_state()["vector_index"] is None
+
+    barrier = threading.Barrier(2)
+    statuses, errors = [], []
+
+    def race():
+        backend = PgVectorBackend()
+        try:
+            racer = make(tmp_path, backend_=backend)
+            barrier.wait(timeout=10)
+            result = racer.run_maintenance("reindex")
+            statuses.append(result.status)
+        except Exception as exc:  # noqa: BLE001 - collected for the report
+            errors.append(repr(exc))
+        finally:
+            backend.close()
+
+    threads = [threading.Thread(target=race) for _ in range(2)]
+    for t in threads:
+        t.start()
+    for t in threads:
+        t.join(timeout=60)
+
+    assert errors == [], f"reindex race raised: {errors}"
+    # The index does not exist beforehand, so exactly one racer must win
+    # the advisory lock and build it.
+    assert statuses.count("ran") == 1
+    assert all(s in {"ran", "already_running", "noop"} for s in statuses), statuses
+    state = col.maintenance_state()
+    assert state["vector_index"] == "hnsw"
+    assert state["index_build_complete"] is True
+
+
+def test_live_analyze_maintenance(live, tmp_path):
+    _backend, make, _ns = live
+    col = make(tmp_path)
+    col.upsert(ids=["a"], documents=["one"], metadatas=[{}], embeddings=[[1, 0]])
+    result = col.run_maintenance("analyze")
+    assert result.status == "ran"
diff --git a/tests/test_mcp_http_transport.py b/tests/test_mcp_http_transport.py
new file mode 100644
index 000000000..82121b752
--- /dev/null
+++ b/tests/test_mcp_http_transport.py
@@ -0,0 +1,210 @@
+# tests/test_mcp_http_transport.py
+"""
+Tests for the opt-in HTTP transport added for #1801.
+
+These exercise the *production* server built by
+``mempalace.mcp_server._build_http_server`` over a real loopback socket on an
+ephemeral port — the earlier version of this file reimplemented the endpoint in
+Starlette and guarded on ``pytest.importorskip("starlette")``/``uvicorn``,
+neither of which is a project dependency, so it was silently skipped in CI and
+the real ``_serve_http`` handler had zero coverage.
+
+Design constraints
+------------------
+* Real sockets, but bound to ``127.0.0.1:0`` (OS-assigned port) so there is no
+  port conflict on any CI runner.
+* Pure stdlib (``http.client``, ``threading``) — no third-party deps.
+* Server runs in a daemon thread and is shut down in fixture teardown.
+"""
+
+import http.client
+import json
+import threading
+
+import pytest
+
+from mempalace import mcp_server as mcp
+
+
+def _post(port, path, body, headers=None, host_header=None):
+    """Raw POST with full control over Host / Origin / Authorization headers."""
+    conn = http.client.HTTPConnection("127.0.0.1", port, timeout=5)
+    try:
+        raw = body if isinstance(body, (bytes, bytearray)) else json.dumps(body).encode("utf-8")
+        headers = headers or {}
+        conn.putrequest("POST", path, skip_host=(host_header is not None))
+        if host_header is not None:
+            conn.putheader("Host", host_header)
+        conn.putheader("Content-Type", "application/json")
+        # Let a caller override Content-Length (used to fake an oversized body)
+        # instead of emitting a second, conflicting header.
+        if not any(k.lower() == "content-length" for k in headers):
+            conn.putheader("Content-Length", str(len(raw)))
+        for k, v in headers.items():
+            conn.putheader(k, v)
+        conn.endheaders()
+        conn.send(raw)
+        resp = conn.getresponse()
+        return resp.status, resp.read()
+    finally:
+        conn.close()
+
+
+def _get(port, path, headers=None):
+    conn = http.client.HTTPConnection("127.0.0.1", port, timeout=5)
+    try:
+        conn.request("GET", path, headers=headers or {})
+        resp = conn.getresponse()
+        return resp.status, resp.read()
+    finally:
+        conn.close()
+
+
+@pytest.fixture
+def http_server():
+    """A running production MCP HTTP server on an ephemeral loopback port."""
+    httpd = mcp._build_http_server("127.0.0.1", 0)
+    port = httpd.server_address[1]
+    thread = threading.Thread(
+        target=httpd.serve_forever, kwargs={"poll_interval": 0.05}, daemon=True
+    )
+    thread.start()
+    try:
+        yield port, httpd
+    finally:
+        httpd.shutdown()
+        httpd.server_close()
+        thread.join(timeout=5)
+
+
+def test_post_dispatches_to_handle_request(http_server):
+    """A real POST to /mcp reaches handle_request and returns its JSON-RPC reply."""
+    port, _ = http_server
+    status, body = _post(port, "/mcp", {"jsonrpc": "2.0", "id": 1, "method": "tools/list"})
+    assert status == 200
+    payload = json.loads(body)
+    assert payload["id"] == 1
+    names = {t["name"] for t in payload["result"]["tools"]}
+    assert "mempalace_search" in names
+
+
+def test_initialize_reports_server_info(http_server):
+    port, _ = http_server
+    status, body = _post(port, "/mcp", {"jsonrpc": "2.0", "id": 7, "method": "initialize"})
+    assert status == 200
+    assert json.loads(body)["result"]["serverInfo"]["name"] == "mempalace"
+
+
+def test_healthz_ok(http_server):
+    port, _ = http_server
+    status, body = _get(port, "/healthz")
+    assert status == 200
+    assert body == b"ok\n"
+
+
+def test_unknown_path_404(http_server):
+    port, _ = http_server
+    assert _post(port, "/nope", {"jsonrpc": "2.0", "id": 1, "method": "ping"})[0] == 404
+    assert _get(port, "/nope")[0] == 404
+
+
+def test_invalid_json_returns_parse_error(http_server):
+    port, _ = http_server
+    status, body = _post(port, "/mcp", b"{not valid json")
+    assert status == 400
+    assert json.loads(body)["error"]["code"] == -32700
+
+
+def test_oversized_request_rejected_413(http_server):
+    """A declared Content-Length over the cap is rejected before the body is read."""
+    port, _ = http_server
+    # Lie about the length: the handler checks the header and returns 413 before
+    # reading the (tiny) body, so we never have to ship 16 MiB.
+    status, body = _post(
+        port,
+        "/mcp",
+        b"{}",
+        headers={"Content-Length": str(mcp._HTTP_MAX_REQUEST_BYTES + 1)},
+    )
+    assert status == 413
+    assert json.loads(body)["error"]["code"] == -32600
+
+
+def test_notification_returns_202_no_body(http_server):
+    port, _ = http_server
+    status, body = _post(port, "/mcp", {"jsonrpc": "2.0", "method": "notifications/initialized"})
+    assert status == 202
+    assert body == b""
+
+
+def test_rejects_foreign_host_header(http_server):
+    """DNS-rebinding guard: a request carrying an attacker domain in Host is 403."""
+    port, _ = http_server
+    status, _ = _post(
+        port,
+        "/mcp",
+        {"jsonrpc": "2.0", "id": 1, "method": "ping"},
+        host_header="evil.example.com",
+    )
+    assert status == 403
+
+
+def test_rejects_cross_origin(http_server):
+    """A browser Origin from a non-loopback page is 403 (rebinding/SSRF guard)."""
+    port, _ = http_server
+    status, _ = _post(
+        port,
+        "/mcp",
+        {"jsonrpc": "2.0", "id": 1, "method": "ping"},
+        headers={"Origin": "https://evil.example"},
+    )
+    assert status == 403
+
+
+def test_allows_loopback_origin(http_server):
+    port, _ = http_server
+    status, _ = _post(
+        port,
+        "/mcp",
+        {"jsonrpc": "2.0", "id": 1, "method": "ping"},
+        headers={"Origin": "http://localhost:5173"},
+    )
+    assert status == 200
+
+
+def test_bearer_token_enforced_when_configured(monkeypatch):
+    """With MEMPALACE_MCP_HTTP_TOKEN set, /mcp requires a matching bearer token."""
+    monkeypatch.setenv("MEMPALACE_MCP_HTTP_TOKEN", "s3cret")
+    httpd = mcp._build_http_server("127.0.0.1", 0)
+    port = httpd.server_address[1]
+    thread = threading.Thread(
+        target=httpd.serve_forever, kwargs={"poll_interval": 0.05}, daemon=True
+    )
+    thread.start()
+    try:
+        ping = {"jsonrpc": "2.0", "id": 1, "method": "ping"}
+        # No token → 401.
+        assert _post(port, "/mcp", ping)[0] == 401
+        # Wrong token → 401.
+        assert _post(port, "/mcp", ping, headers={"Authorization": "Bearer nope"})[0] == 401
+        # Correct token → 200.
+        assert _post(port, "/mcp", ping, headers={"Authorization": "Bearer s3cret"})[0] == 200
+        # /healthz never requires the token (orchestrator liveness probes).
+        assert _get(port, "/healthz")[0] == 200
+    finally:
+        httpd.shutdown()
+        httpd.server_close()
+        thread.join(timeout=5)
+
+
+def test_loopback_and_origin_helpers():
+    assert mcp._http_is_loopback("127.0.0.1")
+    assert mcp._http_is_loopback("localhost")
+    assert not mcp._http_is_loopback("0.0.0.0")
+    assert not mcp._http_is_loopback("192.168.1.10")
+    assert mcp._http_origin_allowed("http://127.0.0.1:8765")
+    assert mcp._http_origin_allowed("http://localhost")
+    assert not mcp._http_origin_allowed("https://evil.example")
+    assert not mcp._http_origin_allowed("garbage")
+    allowed = mcp._http_allowed_host_values("127.0.0.1", 8765)
+    assert "127.0.0.1:8765" in allowed and "localhost" in allowed
diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 27f4251c9..062e0fb9c 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -845,6 +845,106 @@ def test_get_taxonomy(self, monkeypatch, config, palace_path, seeded_collection,
         assert result["taxonomy"]["project"]["frontend"] == 1
         assert result["taxonomy"]["notes"]["planning"] == 1
 
+    def test_overview_tools_use_sqlite_fast_path(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        """Overview tools must answer from the sqlite cross-tab without paging
+        all metadata through the chroma client (#1748 / #1379). A tripwire on
+        the pagination helper fails loudly if the fast path regresses to the
+        slow client path that times out on large palaces."""
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace import mcp_server
+
+        def _boom(*_a, **_k):
+            raise AssertionError("pagination path used instead of sqlite fast path")
+
+        monkeypatch.setattr(mcp_server, "_metadata_cache", None)
+        monkeypatch.setattr(mcp_server, "_fetch_all_metadata", _boom)
+
+        status = mcp_server.tool_status()
+        assert status["total_drawers"] == 4
+        assert status["wings"] == {"project": 3, "notes": 1}
+
+        assert mcp_server.tool_list_wings()["wings"] == {"project": 3, "notes": 1}
+
+        rooms = mcp_server.tool_list_rooms(wing="project")["rooms"]
+        assert rooms == {"backend": 2, "frontend": 1}
+
+        tax = mcp_server.tool_get_taxonomy()["taxonomy"]
+        assert tax["project"] == {"backend": 2, "frontend": 1}
+        assert tax["notes"] == {"planning": 1}
+
+    def test_overview_tools_normalize_missing_wing_room_to_unknown(
+        self, monkeypatch, config, palace_path, collection, kg
+    ):
+        """Fast path must keep the client path's contract: drawers missing
+        wing/room metadata read as 'unknown', not the sqlite COALESCE
+        placeholder '?' (#1748 review)."""
+        collection.add(
+            ids=["no_meta_drawer"],
+            documents=["a drawer with no wing or room metadata"],
+            metadatas=[{"source_file": "loose.txt"}],
+        )
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace import mcp_server
+
+        monkeypatch.setattr(mcp_server, "_metadata_cache", None)
+
+        tax = mcp_server.tool_get_taxonomy()["taxonomy"]
+        assert tax == {"unknown": {"unknown": 1}}
+
+        status = mcp_server.tool_status()
+        assert status["wings"] == {"unknown": 1}
+        assert status["rooms"] == {"unknown": 1}
+
+    def test_graph_stats_uses_sqlite_fast_path(
+        self, monkeypatch, config, palace_path, collection, kg
+    ):
+        """graph_stats must aggregate from sqlite without paging metadata
+        through build_graph()/HNSW (#1379). Mirrors the build_graph parity
+        case in test_palace_graph. Tripwires fail loudly if the fast path
+        regresses: graph_stats() (the slow client build) and _get_collection()
+        (any client/HNSW open) must never be reached."""
+        collection.add(
+            ids=["d_db_code", "d_db_proj", "d_auth", "d_general", "d_orphan"],
+            documents=[
+                "chromadb setup in the code wing",
+                "chromadb usage in the project wing",
+                "auth and security notes",
+                "a general catch-all drawer",
+                "a drawer with no wing",
+            ],
+            metadatas=[
+                {"room": "chromadb", "wing": "wing_code", "hall": "db"},
+                {"room": "chromadb", "wing": "wing_project", "hall": "db"},
+                {"room": "auth", "wing": "wing_code", "hall": "security"},
+                {"room": "general", "wing": "wing_code", "hall": "misc"},
+                {"room": "orphan", "source_file": "loose.txt"},
+            ],
+        )
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace import mcp_server
+
+        def _boom(*_a, **_k):
+            raise AssertionError("build_graph client path used instead of sqlite fast path")
+
+        def _no_client_open(*_a, **_k):
+            raise AssertionError("chroma collection opened — fast path must avoid HNSW")
+
+        monkeypatch.setattr(mcp_server, "graph_stats", _boom)
+        monkeypatch.setattr(mcp_server, "_get_collection", _no_client_open)
+
+        stats = mcp_server.tool_graph_stats()
+        # "general" room and the wing-less drawer are excluded, matching
+        # build_graph's per-drawer filter.
+        assert stats["total_rooms"] == 2
+        assert stats["tunnel_rooms"] == 1
+        assert stats["total_edges"] == 1
+        assert stats["rooms_per_wing"] == {"wing_code": 2, "wing_project": 1}
+        assert stats["top_tunnels"] == [
+            {"room": "chromadb", "wings": ["wing_code", "wing_project"], "count": 2}
+        ]
+
     def test_no_palace_returns_error(self, monkeypatch, config, kg):
         _patch_mcp_server(monkeypatch, config, kg)
         from mempalace.mcp_server import tool_status
@@ -1005,6 +1105,91 @@ def test_search_with_room_filter(self, monkeypatch, config, palace_path, seeded_
         result = tool_search(query="database", room="backend")
         assert all(r["room"] == "backend" for r in result["results"])
 
+    def test_search_with_source_file_filter(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="authentication module", source_file="auth.py")
+        assert result["results"]
+        assert all(r["source_file"] == "auth.py" for r in result["results"])
+        assert result["filters"]["source_file"] == "auth.py"
+
+    def test_search_source_file_allows_path_separators(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        # Unlike wing/room, a source_file is a path — '/' must NOT be rejected
+        # as a path-traversal attempt the way sanitize_name() would.
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="authentication", source_file="/abs/path/to/auth.py")
+        assert "error" not in result
+
+    def test_search_blank_source_file_ignored(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="JWT authentication", source_file="   ")
+        assert "results" in result
+        assert result["filters"]["source_file"] is None
+
+    def test_search_rejects_null_byte_source_file(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        # A null byte in a metadata where-value can crash chromadb add/upsert
+        # (#1235 lineage); reject it cleanly the way sanitize_name does.
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="JWT", source_file="bad\x00null")
+        assert "error" in result
+
+    def test_search_rejects_overlong_source_file(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="JWT", source_file="x" * 5000)
+        assert "error" in result
+
+    def test_search_rejects_non_string_source_file(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        # A non-string source_file (e.g. a JSON number, which the schema's
+        # string type does not coerce) must yield a clean validation error,
+        # not an unhandled AttributeError from .strip().
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="JWT", source_file=42)
+        assert "error" in result
+
+    def test_search_rejects_lone_surrogate_source_file(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        # A lone UTF-16 surrogate can crash chromadb (#1235); reject it for
+        # parity with sanitize_name rather than letting it reach the backend.
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import tool_search
+
+        result = tool_search(query="JWT", source_file="bad\udc80surrogate")
+        assert "error" in result
+
+    def test_search_accepts_source_file_at_length_boundary(
+        self, monkeypatch, config, palace_path, seeded_collection, kg
+    ):
+        # Exactly _MAX_SOURCE_FILE_LENGTH is allowed (the cap is a strict '>').
+        _patch_mcp_server(monkeypatch, config, kg)
+        from mempalace.mcp_server import _MAX_SOURCE_FILE_LENGTH, tool_search
+
+        result = tool_search(query="JWT", source_file="x" * _MAX_SOURCE_FILE_LENGTH)
+        assert "error" not in result
+
     def test_search_min_similarity_backwards_compat(
         self, monkeypatch, config, palace_path, seeded_collection, kg
     ):
@@ -1189,12 +1374,12 @@ def test_find_tunnels_rejects_invalid_wing(self, monkeypatch, config, kg):
 
     def test_wal_redacts_sensitive_fields(self, monkeypatch, config, kg, tmp_path):
         _patch_mcp_server(monkeypatch, config, kg)
-        from mempalace import mcp_server
+        from mempalace import wal
 
         wal_file = tmp_path / "write_log.jsonl"
-        monkeypatch.setattr(mcp_server, "_WAL_FILE", wal_file)
+        monkeypatch.setattr(wal, "_WAL_FILE", wal_file)
 
-        mcp_server._wal_log(
+        wal._wal_log(
             "test",
             {"content": "secret note", "query": "private search", "safe": "ok"},
         )
@@ -1427,6 +1612,127 @@ def fail_get_collection():
         assert result["vector_disabled"] is True
         assert result["vector_disabled_reason"] == "capacity mismatch"
 
+    def test_checkpoint_files_items_and_writes_diary(self, monkeypatch, config, palace_path, kg):
+        _patch_mcp_server(monkeypatch, config, kg)
+        _client, _col = _get_collection(palace_path, create=True)
+        del _client
+        from mempalace.mcp_server import tool_checkpoint
+
+        result = tool_checkpoint(
+            items=[
+                {"wing": "w", "room": "decisions", "content": "Use PostgreSQL for storage."},
+                {"wing": "w", "room": "backend", "content": "Cache sessions in Redis."},
+            ],
+            diary={"agent_name": "cursor-ide", "wing": "w", "entry": "SESSION|did.stuff|★"},
+        )
+        assert len(result["added"]) == 2
+        assert result["duplicates"] == []
+        assert result["errors"] == []
+        assert all(a["success"] for a in result["added"])
+        assert result["diary"]["success"] is True
+
+    def test_checkpoint_skips_semantic_duplicates(self, monkeypatch, config, kg):
+        from mempalace import mcp_server
+
+        monkeypatch.setattr(
+            mcp_server,
+            "tool_check_duplicate",
+            lambda content, threshold=0.9: {
+                "is_duplicate": True,
+                "matches": [{"id": "x", "similarity": 0.95}],
+            },
+        )
+        called = {"add": False}
+
+        def _fail_add(**_kwargs):
+            called["add"] = True
+            return {"success": True}
+
+        monkeypatch.setattr(mcp_server, "tool_add_drawer", _fail_add)
+
+        result = mcp_server.tool_checkpoint(
+            items=[{"wing": "w", "room": "r", "content": "already known"}]
+        )
+        assert result["added"] == []
+        assert len(result["duplicates"]) == 1
+        assert called["add"] is False
+
+    def test_checkpoint_reports_malformed_items(self, monkeypatch, config, kg):
+        from mempalace import mcp_server
+
+        monkeypatch.setattr(
+            mcp_server, "tool_check_duplicate", lambda *a, **k: {"is_duplicate": False}
+        )
+        result = mcp_server.tool_checkpoint(items=[{"wing": "w", "room": "r"}, "not-a-dict"])
+        assert result["added"] == []
+        assert len(result["errors"]) == 2
+
+    def test_checkpoint_rejects_non_string_fields_without_calling_handlers(
+        self, monkeypatch, config, kg
+    ):
+        """A non-string content must be reported, never passed to the
+        single-item handlers where it would raise deep in sanitization."""
+        from mempalace import mcp_server
+
+        def _explode(*_a, **_k):
+            raise AssertionError("handlers must not run for malformed items")
+
+        monkeypatch.setattr(mcp_server, "tool_check_duplicate", _explode)
+        monkeypatch.setattr(mcp_server, "tool_add_drawer", _explode)
+
+        result = mcp_server.tool_checkpoint(
+            items=[{"wing": "w", "room": "r", "content": {"not": "a string"}}]
+        )
+        assert result["added"] == []
+        assert len(result["errors"]) == 1
+        assert "non-empty strings" in result["errors"][0]["error"]
+
+    def test_checkpoint_files_when_dedup_check_errors(self, monkeypatch, config, kg):
+        """A dedup error is a genuine index failure (content is already
+        validated as a string); we still file rather than drop the memory."""
+        from mempalace import mcp_server
+
+        monkeypatch.setattr(
+            mcp_server,
+            "tool_check_duplicate",
+            lambda *a, **k: {"error": "Duplicate check failed"},
+        )
+        filed = {}
+
+        def _add(**kwargs):
+            filed.update(kwargs)
+            return {"success": True, "drawer_id": "d1"}
+
+        monkeypatch.setattr(mcp_server, "tool_add_drawer", _add)
+
+        result = mcp_server.tool_checkpoint(
+            items=[{"wing": "w", "room": "r", "content": "keep me"}]
+        )
+        assert len(result["added"]) == 1
+        assert filed["content"] == "keep me"
+
+    def test_checkpoint_reports_malformed_diary(self, monkeypatch, config, kg):
+        from mempalace import mcp_server
+
+        monkeypatch.setattr(
+            mcp_server, "tool_check_duplicate", lambda *a, **k: {"is_duplicate": False}
+        )
+
+        def _fail_diary(*_a, **_k):
+            raise AssertionError("diary_write must not run for malformed diary")
+
+        monkeypatch.setattr(mcp_server, "tool_diary_write", _fail_diary)
+
+        result = mcp_server.tool_checkpoint(items=[], diary={"agent_name": "x"})
+        assert "diary" not in result
+        assert any("diary entry" in e.get("error", "") for e in result["errors"])
+
+    def test_checkpoint_registered_in_tools(self):
+        from mempalace import mcp_server
+
+        assert "mempalace_checkpoint" in mcp_server.TOOLS
+        assert mcp_server.TOOLS["mempalace_checkpoint"]["handler"] is mcp_server.tool_checkpoint
+
     def test_get_drawer(self, monkeypatch, config, palace_path, seeded_collection, kg):
         _patch_mcp_server(monkeypatch, config, kg)
         from mempalace.mcp_server import tool_get_drawer
@@ -1929,6 +2235,194 @@ def test_update_drawer_chunked_logical_id_rewrites_group(monkeypatch, config, pa
     assert listed["drawers"][0]["drawer_id"] == logical_id
 
 
+# ── Delete by source (#1722) ────────────────────────────────────────────
+
+
+class TestDeleteBySource:
+    """``tool_delete_by_source`` — bulk cleanup of benchmark/test contamination (#1722)."""
+
+    def _seed(self, monkeypatch, config, palace_path, kg):
+        _patch_mcp_server(monkeypatch, config, kg)
+        _client, _col = _get_collection(palace_path, create=True)
+        del _client
+        from mempalace.mcp_server import tool_add_drawer
+
+        # Two drawers from a "benchmark" source, one from real user data.
+        tool_add_drawer(
+            wing="bench",
+            room="general",
+            content="ShareGPT yoga retreat conversation noise number one.",
+            source_file="results_mempal_hybrid_v4_session_1.jsonl",
+        )
+        tool_add_drawer(
+            wing="bench",
+            room="general",
+            content="ShareGPT coding job description noise number two.",
+            source_file="results_mempal_hybrid_v4_session_1.jsonl",
+        )
+        tool_add_drawer(
+            wing="clients",
+            room="webdesign",
+            content="GG Sauna Dachdecker real client memory that must survive.",
+            source_file="notes/clients.md",
+        )
+
+    def _seed_closets(self, palace_path):
+        """Seed the AAAK index (closets) directly.
+
+        ``tool_add_drawer`` never builds closets — those are a miner-side
+        artifact — so to exercise the closet purge we add them straight to the
+        collection, keyed by the same ``source_file`` the drawers use: two for
+        the benchmark source, one for the real-client source.
+        """
+        from mempalace.palace import get_closets_collection
+
+        closets_col = get_closets_collection(palace_path, create=True)
+        closets_col.add(
+            ids=["bench_closet_01", "bench_closet_02", "client_closet_01"],
+            documents=[
+                "topic: yoga retreat | coding job",
+                "topic: more bench noise",
+                "topic: GG Sauna client",
+            ],
+            metadatas=[
+                {"source_file": "results_mempal_hybrid_v4_session_1.jsonl"},
+                {"source_file": "results_mempal_hybrid_v4_session_1.jsonl"},
+                {"source_file": "notes/clients.md"},
+            ],
+        )
+        return closets_col
+
+    def test_dry_run_reports_count_without_deleting(self, monkeypatch, config, palace_path, kg):
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import tool_delete_by_source, tool_status
+
+        result = tool_delete_by_source("results_mempal_hybrid_v4_session_1.jsonl")
+        assert result["success"] is True
+        assert result["dry_run"] is True
+        assert result["match_count"] == 2
+        assert {"wing": "bench", "room": "general"} in result["sample"]
+        # Nothing removed — all three drawers still present.
+        assert tool_status()["total_drawers"] == 3
+
+    def test_dry_run_reports_closet_match_count(self, monkeypatch, config, palace_path, kg):
+        """Dry run surfaces the closet blast radius (#1722) without deleting."""
+        self._seed(monkeypatch, config, palace_path, kg)
+        closets_col = self._seed_closets(palace_path)
+        from mempalace.mcp_server import tool_delete_by_source
+
+        result = tool_delete_by_source("results_mempal_hybrid_v4_session_1.jsonl")
+        assert result["dry_run"] is True
+        assert result["closet_match_count"] == 2
+        # Nothing removed — all three closets still present.
+        assert len(closets_col.get(include=[])["ids"]) == 3
+
+    def test_commit_deletes_only_matching_source(self, monkeypatch, config, palace_path, kg):
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import tool_delete_by_source, tool_status
+
+        result = tool_delete_by_source("results_mempal_hybrid_v4_session_1.jsonl", dry_run=False)
+        assert result["success"] is True
+        assert result["dry_run"] is False
+        assert result["deleted"] == 2
+        # Only the real client drawer remains.
+        assert tool_status()["total_drawers"] == 1
+
+    def test_commit_purges_matching_closets(self, monkeypatch, config, palace_path, kg):
+        """Deleting by source purges the matching closets too, so the AAAK
+        index keeps no stale pointers at the now-deleted drawers (#1722)."""
+        self._seed(monkeypatch, config, palace_path, kg)
+        closets_col = self._seed_closets(palace_path)
+        from mempalace.mcp_server import tool_delete_by_source
+
+        result = tool_delete_by_source("results_mempal_hybrid_v4_session_1.jsonl", dry_run=False)
+        assert result["success"] is True
+        assert result["deleted"] == 2
+        assert result["closets_deleted"] == 2
+        # The two benchmark closets are gone; the real-client closet survives.
+        remaining = closets_col.get(include=["metadatas"])
+        sources = {m["source_file"] for m in remaining["metadatas"]}
+        assert sources == {"notes/clients.md"}
+
+    def test_no_match_is_idempotent_not_error(self, monkeypatch, config, palace_path, kg):
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import tool_delete_by_source, tool_status
+
+        result = tool_delete_by_source("does/not/exist.jsonl", dry_run=False)
+        assert result["success"] is True
+        assert result["deleted"] == 0
+        assert tool_status()["total_drawers"] == 3
+
+    def test_empty_source_file_rejected(self, monkeypatch, config, palace_path, kg):
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import tool_delete_by_source
+
+        result = tool_delete_by_source("   ", dry_run=False)
+        assert result["success"] is False
+        assert "non-empty" in result["error"]
+
+    def test_non_string_source_rejected(self, monkeypatch, config, palace_path, kg):
+        """A non-string source_file must return a clean error, not AttributeError."""
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import tool_delete_by_source
+
+        result = tool_delete_by_source(123, dry_run=False)
+        assert result["success"] is False
+        assert "non-empty" in result["error"]
+
+    def test_matches_after_surrogate_normalization(self, monkeypatch, config, palace_path, kg):
+        """source_file is stripped of lone surrogates on both ingest and delete,
+        so a path that arrived via a cp1252 stdin (#1488) still matches."""
+        _patch_mcp_server(monkeypatch, config, kg)
+        _client, _col = _get_collection(palace_path, create=True)
+        del _client
+        from mempalace.mcp_server import (
+            tool_add_drawer,
+            tool_delete_by_source,
+            tool_status,
+        )
+
+        # Lone low surrogate embedded in the path — add_drawer strips it.
+        raw_source = "noise\udce9_data.jsonl"
+        tool_add_drawer(
+            wing="bench",
+            room="general",
+            content="benchmark noise from a non-ASCII path",
+            source_file=raw_source,
+        )
+        assert tool_status()["total_drawers"] == 1
+
+        # Deleting with the same raw (un-stripped) string must still match.
+        result = tool_delete_by_source(raw_source, dry_run=False)
+        assert result["success"] is True
+        assert result["deleted"] == 1
+        assert tool_status()["total_drawers"] == 0
+
+    def test_registered_and_dispatchable(self, monkeypatch, config, palace_path, kg):
+        self._seed(monkeypatch, config, palace_path, kg)
+        from mempalace.mcp_server import handle_request
+
+        # Listed in tools/list
+        listed = handle_request({"method": "tools/list", "id": 1, "params": {}})
+        names = {t["name"] for t in listed["result"]["tools"]}
+        assert "mempalace_delete_by_source" in names
+
+        # Dispatches and defaults to dry-run (no destructive side effect)
+        resp = handle_request(
+            {
+                "method": "tools/call",
+                "id": 2,
+                "params": {
+                    "name": "mempalace_delete_by_source",
+                    "arguments": {"source_file": "results_mempal_hybrid_v4_session_1.jsonl"},
+                },
+            }
+        )
+        content = json.loads(resp["result"]["content"][0]["text"])
+        assert content["dry_run"] is True
+        assert content["match_count"] == 2
+
+
 # ── KG Tools ────────────────────────────────────────────────────────────
 
 
@@ -2932,13 +3426,13 @@ def test_wal_log_creates_dir_lazily_on_first_write(self, tmp_path, monkeypatch):
         Proves the deferred setup still works (defers WAL creation to write
         time, does not disable it) and preserves the WAL permission bits.
         """
-        from mempalace import mcp_server
+        from mempalace import wal
 
         wal_file = tmp_path / "fresh" / "wal" / "write_log.jsonl"
         assert not wal_file.parent.exists()
-        monkeypatch.setattr(mcp_server, "_WAL_FILE", wal_file)
+        monkeypatch.setattr(wal, "_WAL_FILE", wal_file)
 
-        mcp_server._wal_log("test_op", {"safe": "ok"})
+        wal._wal_log("test_op", {"safe": "ok"})
 
         assert wal_file.exists(), "lazy WAL init did not create the log on first write"
         entry = json.loads(wal_file.read_text().strip())
@@ -3834,3 +4328,264 @@ def passthrough(**kwargs):
         )
         assert "error" not in resp
         assert "result" in resp
+
+
+def test_peer_writer_guard_refuses_mutating_tool_before_handler(monkeypatch):
+    from mempalace import mcp_server
+
+    called = {"value": False}
+
+    def handler(**kwargs):
+        called["value"] = True
+        return {"ok": True}
+
+    monkeypatch.setitem(
+        mcp_server.TOOLS,
+        "mempalace_add_drawer",
+        {
+            "description": "test write tool",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "wing": {"type": "string"},
+                    "room": {"type": "string"},
+                    "content": {"type": "string"},
+                },
+            },
+            "handler": handler,
+        },
+    )
+    monkeypatch.setattr(
+        mcp_server,
+        "_acquire_mcp_writer_lock",
+        lambda: (False, "busy writer"),
+    )
+
+    response = mcp_server.handle_request(
+        {
+            "jsonrpc": "2.0",
+            "id": 7,
+            "method": "tools/call",
+            "params": {
+                "name": "mempalace_add_drawer",
+                "arguments": {
+                    "wing": "wing_test",
+                    "room": "room_test",
+                    "content": "hello",
+                },
+            },
+        }
+    )
+
+    assert called["value"] is False
+    assert response["error"]["code"] == -32001
+    assert "read-only" in response["error"]["message"]
+    assert response["error"]["data"]["tool"] == "mempalace_add_drawer"
+
+
+def test_peer_writer_guard_does_not_gate_read_tool(monkeypatch):
+    from mempalace import mcp_server
+
+    def forbidden_lock():
+        raise AssertionError("read tools should not acquire the peer-writer lock")
+
+    monkeypatch.setitem(
+        mcp_server.TOOLS,
+        "mempalace_status",
+        {
+            "description": "test read tool",
+            "input_schema": {"type": "object", "properties": {}},
+            "handler": lambda: {"ok": True},
+        },
+    )
+    monkeypatch.setattr(mcp_server, "_acquire_mcp_writer_lock", forbidden_lock)
+
+    response = mcp_server.handle_request(
+        {
+            "jsonrpc": "2.0",
+            "id": 8,
+            "method": "tools/call",
+            "params": {"name": "mempalace_status", "arguments": {}},
+        }
+    )
+
+    assert '"ok": true' in response["result"]["content"][0]["text"]
+
+
+def test_peer_writer_lock_setup_failure_is_cached(monkeypatch):
+    from mempalace import mcp_server, palace
+
+    calls = {"count": 0}
+
+    def broken_mine_palace_lock(palace_path):
+        calls["count"] += 1
+        raise RuntimeError(f"permission denied for {palace_path}")
+
+    monkeypatch.delenv(mcp_server._MCP_ALLOW_PEER_WRITER_ENV, raising=False)
+    monkeypatch.setattr(palace, "mine_palace_lock", broken_mine_palace_lock)
+
+    monkeypatch.setattr(mcp_server, "_MCP_WRITER_LOCK_CM", None)
+    monkeypatch.setattr(mcp_server, "_MCP_WRITER_READ_ONLY", False)
+    monkeypatch.setattr(mcp_server, "_MCP_WRITER_LOCK_FAILED", False)
+    monkeypatch.setattr(mcp_server, "_MCP_WRITER_LOCK_ERROR", "")
+
+    ok_first, reason_first = mcp_server._acquire_mcp_writer_lock()
+    ok_second, reason_second = mcp_server._acquire_mcp_writer_lock()
+
+    assert ok_first is True
+    assert ok_second is True
+    assert calls["count"] == 1
+    assert mcp_server._MCP_WRITER_LOCK_FAILED is True
+    assert "continuing without peer-writer protection" in reason_first
+    assert reason_second == reason_first
+
+
+def test_sqlite_integrity_gate_refuses_non_status_tool(monkeypatch):
+    from mempalace import mcp_server
+
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_checked", True)
+    monkeypatch.setattr(
+        mcp_server,
+        "_sqlite_integrity_errors",
+        ["malformed inverted index for FTS5 table main.embedding_fulltext_search"],
+    )
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_check_error", "")
+
+    response = mcp_server.handle_request(
+        {
+            "jsonrpc": "2.0",
+            "id": 1818,
+            "method": "tools/call",
+            "params": {"name": "mempalace_list_wings", "arguments": {}},
+        }
+    )
+
+    assert response["error"]["code"] == mcp_server._SQLITE_INTEGRITY_ERROR_CODE
+    assert "integrity check failed" in response["error"]["message"]
+    assert response["error"]["data"]["tool"] == "mempalace_list_wings"
+    assert "malformed inverted index" in response["error"]["data"]["errors"][0]
+
+
+def test_sqlite_integrity_status_surfaces_payload_without_chroma(monkeypatch):
+    import json
+
+    from mempalace import mcp_server
+
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_checked", True)
+    monkeypatch.setattr(
+        mcp_server,
+        "_sqlite_integrity_errors",
+        ["malformed inverted index for FTS5 table main.embedding_fulltext_search"],
+    )
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_check_error", "")
+    monkeypatch.setattr(
+        mcp_server,
+        "_tool_status_via_sqlite",
+        lambda: {"total_drawers": 123, "backend": "chroma"},
+    )
+
+    response = mcp_server.handle_request(
+        {
+            "jsonrpc": "2.0",
+            "id": 1819,
+            "method": "tools/call",
+            "params": {"name": "mempalace_status", "arguments": {}},
+        }
+    )
+
+    payload = json.loads(response["result"]["content"][0]["text"])
+
+    assert payload["total_drawers"] == 123
+    assert payload["sqlite_integrity_failed"] is True
+    assert payload["sqlite_integrity"]["ok"] is False
+    assert payload["sqlite_integrity"]["error_count"] == 1
+    assert "malformed inverted index" in payload["sqlite_integrity"]["errors"][0]
+
+
+def test_sqlite_integrity_reconnect_allowed_when_corrupt(monkeypatch):
+    from mempalace import mcp_server
+
+    called = {"value": False}
+
+    def fake_reconnect():
+        called["value"] = True
+        return {"success": True}
+
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_checked", True)
+    monkeypatch.setattr(
+        mcp_server,
+        "_sqlite_integrity_errors",
+        ["malformed inverted index for FTS5 table main.embedding_fulltext_search"],
+    )
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_check_error", "")
+    monkeypatch.setitem(
+        mcp_server.TOOLS,
+        "mempalace_reconnect",
+        {
+            "description": "test reconnect",
+            "input_schema": {"type": "object", "properties": {}},
+            "handler": fake_reconnect,
+        },
+    )
+
+    response = mcp_server.handle_request(
+        {
+            "jsonrpc": "2.0",
+            "id": 1820,
+            "method": "tools/call",
+            "params": {"name": "mempalace_reconnect", "arguments": {}},
+        }
+    )
+
+    assert called["value"] is True
+    assert '"success": true' in response["result"]["content"][0]["text"]
+
+
+def test_refresh_sqlite_integrity_status_records_quick_check_errors(monkeypatch):
+    from mempalace import mcp_server, repair
+
+    monkeypatch.setattr(mcp_server, "_is_chroma_backend", lambda: True)
+    monkeypatch.setattr(
+        repair,
+        "sqlite_integrity_errors",
+        lambda palace_path: [
+            "malformed inverted index for FTS5 table main.embedding_fulltext_search"
+        ],
+    )
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_checked", False)
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_errors", [])
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_check_error", "")
+
+    mcp_server._refresh_sqlite_integrity_status()
+
+    assert mcp_server._sqlite_integrity_checked is True
+    assert len(mcp_server._sqlite_integrity_errors) == 1
+    assert "malformed inverted index" in mcp_server._sqlite_integrity_errors[0]
+
+
+def test_sqlite_integrity_refusal_handles_none_palace_path(monkeypatch):
+    """
+    Regression test for Gemini review feedback on PR #1823 (lines 433-455).
+
+    _mcp_sqlite_integrity_refusal() must not raise TypeError when
+    _config.palace_path is None — os.path.join(None, "chroma.sqlite3")
+    would otherwise crash the server on every mutating tool call while
+    the palace is unconfigured and integrity errors are present.
+    """
+    from mempalace import mcp_server
+
+    # palace_path is a read-only @property on MempalaceConfig (no setter),
+    # so monkeypatch.setattr on the instance fails. Patch the class-level
+    # property instead -- monkeypatch restores it automatically on teardown.
+    monkeypatch.setattr(type(mcp_server._config), "palace_path", property(lambda self: None))
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_checked", True)
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_errors", ["malformed inverted index"])
+    monkeypatch.setattr(mcp_server, "_sqlite_integrity_check_error", "")
+
+    # Must not raise
+    result = mcp_server._mcp_sqlite_integrity_refusal(req_id=1, tool_name="mempalace_kg_add")
+
+    assert result is not None
+    assert result["error"]["data"]["palace"] == ""
+    assert result["error"]["data"]["sqlite_path"] == ""
+    assert result["error"]["data"]["tool"] == "mempalace_kg_add"
diff --git a/tests/test_migrate.py b/tests/test_migrate.py
index 1e0259ba1..f29b888b3 100644
--- a/tests/test_migrate.py
+++ b/tests/test_migrate.py
@@ -1,10 +1,13 @@
 """Tests for destructive-operation safety in mempalace.migrate."""
 
+import errno
 import os
 import sqlite3
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 
+import pytest
+
 from mempalace.migrate import (
     _restore_stale_palace,
     collection_write_roundtrip_works,
@@ -335,3 +338,75 @@ def test_migrate_prunes_old_pre_migrate_backups(tmp_path, monkeypatch):
     # The two oldest stale backups must be gone.
     assert "palace.pre-migrate.20260100_000000" not in backups
     assert "palace.pre-migrate.20260101_000000" not in backups
+
+
+def test_migrate_restores_palace_on_swap_failure(tmp_path, capsys):
+    """End-to-end coverage for swap-failure rollback.
+
+    `migrate` swaps the old palace aside via `os.replace` rather than
+    deleting it. If `os.replace(temp_palace, palace_path)` raises EXDEV
+    (cross-filesystem) AND its `shutil.move` fallback ALSO fails,
+    `_restore_stale_palace` rolls back by renaming the aside-copy back
+    into place. This exercises that full failure path through the public
+    `migrate()` entry point; develop already has unit-level tests for the
+    helper itself.
+    """
+    palace_dir = tmp_path / "palace"
+    palace_dir.mkdir()
+    (palace_dir / "chroma.sqlite3").write_text("dummy db")
+    # Sentinel file we verify survives the failed swap via rename-aside rollback.
+    (palace_dir / "sentinel.txt").write_text("original")
+
+    fake_col = MagicMock()
+    fake_col.count.return_value = 1
+    fake_col.add.return_value = None
+
+    drawers = [{"id": "id1", "document": "doc", "metadata": {"wing": "w", "room": "r"}}]
+
+    # Selective os.replace mock: pass-through for the rename-aside (call A,
+    # palace -> palace.old) and the rollback (call C, palace.old -> palace);
+    # raise EXDEV exactly once on the swap-in (call B, temp -> palace).
+    real_os_replace = os.replace
+    fail_state = {"swap_in_failed": False}
+
+    def selective_replace(src, dst):
+        if os.fspath(dst) == os.fspath(palace_dir) and not fail_state["swap_in_failed"]:
+            fail_state["swap_in_failed"] = True
+            raise OSError(errno.EXDEV, "Invalid cross-device link")
+        return real_os_replace(src, dst)
+
+    with (
+        patch("mempalace.migrate.detect_chromadb_version", return_value="0.5.x"),
+        patch("mempalace.backends.chroma.ChromaBackend") as mock_backend_cls,
+        patch("mempalace.migrate.collection_write_roundtrip_works", return_value=False),
+        patch("mempalace.migrate.extract_drawers_from_sqlite", return_value=drawers),
+        patch("mempalace.migrate.confirm_destructive_action", return_value=True),
+        patch("mempalace.migrate.os.replace", side_effect=selective_replace),
+        patch(
+            "mempalace.migrate.shutil.move",
+            side_effect=OSError("fallback move also failed"),
+        ),
+        pytest.raises(OSError),
+    ):
+        mock_backend_cls.backend_version.return_value = "1.5.4"
+        mock_backend_cls.return_value.get_collection.return_value = fake_col
+        mock_backend_cls.return_value.get_or_create_collection.return_value = fake_col
+        migrate(str(palace_dir))
+
+    # Palace directory restored from the rename-aside copy.
+    assert palace_dir.is_dir(), "palace directory missing after rollback"
+    sentinel = palace_dir / "sentinel.txt"
+    assert sentinel.is_file(), "sentinel file not restored"
+    assert sentinel.read_text() == "original", "restored contents differ from original"
+
+    # Pre-migrate backup remains on disk for post-mortem.
+    backups = [p for p in tmp_path.iterdir() if p.name.startswith("palace.pre-migrate.")]
+    assert backups, "pre-migrate backup directory missing"
+
+    # Stale .old aside-copy was consumed by the rollback (renamed back).
+    stale_path = tmp_path / "palace.old"
+    assert not stale_path.exists(), "stale .old should have been consumed by rollback"
+
+    # No CRITICAL message — rollback succeeded cleanly.
+    out = capsys.readouterr().out
+    assert "CRITICAL" not in out
diff --git a/tests/test_miner.py b/tests/test_miner.py
index 34ceff6dc..85c8fc305 100644
--- a/tests/test_miner.py
+++ b/tests/test_miner.py
@@ -10,7 +10,15 @@
 import yaml
 
 from mempalace.config import normalize_wing_name
-from mempalace.miner import detect_room, load_config, mine, scan_project, status
+from mempalace.miner import (
+    PHP_EXTENSIONS,
+    READABLE_EXTENSIONS,
+    detect_room,
+    load_config,
+    mine,
+    scan_project,
+    status,
+)
 from mempalace.palace import NORMALIZE_VERSION, file_already_mined, prefetch_mined_set
 
 
@@ -24,6 +32,24 @@ def scanned_files(project_root: Path, **kwargs):
     return sorted(path.relative_to(project_root).as_posix() for path in files)
 
 
+def test_php_ecosystem_extensions_are_readable():
+    assert PHP_EXTENSIONS <= READABLE_EXTENSIONS
+
+
+def test_scan_project_includes_php_ecosystem_files(tmp_path):
+    expected = []
+    for index, extension in enumerate(sorted(PHP_EXTENSIONS)):
+        filename = f"example_{index}{extension}"
+        write_file(tmp_path / filename, "<?php echo 'verbatim';\n")
+        expected.append(filename)
+
+    # Extension matching is deliberately case-insensitive.
+    write_file(tmp_path / "uppercase.PHP", "<?php echo 'uppercase';\n")
+    expected.append("uppercase.PHP")
+
+    assert scanned_files(tmp_path) == sorted(expected)
+
+
 def test_project_mining():
     tmpdir = tempfile.mkdtemp()
     try:
@@ -296,6 +322,33 @@ def test_scan_project_skips_mempalace_generated_files():
         assert scanned_files(project_root) == ["notes.md"]
 
 
+def test_scan_project_includes_swift_files():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        project_root = Path(tmpdir).resolve()
+        write_file(
+            project_root / "Sources" / "App.swift",
+            "struct App {}\n" * 20,
+        )
+        assert scanned_files(project_root) == ["Sources/App.swift"]
+
+
+def test_scan_project_includes_kotlin_files():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        project_root = Path(tmpdir).resolve()
+        write_file(
+            project_root / "src" / "Main.kt",
+            "fun main() {}\n" * 20,
+        )
+        write_file(
+            project_root / "settings.gradle.kts",
+            'rootProject.name = "demo"\n' * 20,
+        )
+        assert scanned_files(project_root) == [
+            "settings.gradle.kts",
+            "src/Main.kt",
+        ]
+
+
 def test_scan_project_respects_gitignore():
     tmpdir = tempfile.mkdtemp()
     try:
diff --git a/tests/test_normalize.py b/tests/test_normalize.py
index f56ff3a6b..a716a7f0d 100644
--- a/tests/test_normalize.py
+++ b/tests/test_normalize.py
@@ -11,8 +11,11 @@
     _try_claude_ai_json,
     _try_claude_code_jsonl,
     _try_codex_jsonl,
+    _try_gemini_json,
     _try_gemini_jsonl,
+    _try_continue_json,
     _try_normalize_json,
+    _try_pi_jsonl,
     _try_slack_json,
     normalize,
     strip_noise,
@@ -613,6 +616,167 @@ def test_gemini_jsonl_messages_before_session_metadata_discarded():
     assert "real A" in result
 
 
+# ── _try_gemini_json ──────────────────────────────────────────────────
+
+
+class TestGeminiJson:
+    """Tests for the Gemini JSON parser (Layouts 1, 2a, 2b).
+
+    Layouts:
+      1.  ``{"contents": [...]}``                      — Gemini API / CLI session JSON
+      2a. ``{"messages": [...]}``                      — Wrapper variant
+      2b. ``[{"role": ...}, ...]``                     — Flat top-level list
+
+    The parser must:
+      • Treat ``role="model"`` as the assistant role.
+      • Reject inputs without any ``role="model"`` entry (so it doesn't
+        false-positive against Claude / ChatGPT exports).
+      • Run *before* ``_try_claude_ai_json`` in the dispatch chain so the
+        Layout 2a (messages-wrapper) form isn't silently claimed by the
+        Claude parser, which would drop the model turns.
+      • Concatenate multi-part text within a single message.
+      • Skip non-text parts (``inline_data``, ``function_call``, …).
+    """
+
+    def test_contents_format(self, tmp_path):
+        """Layout 1: ``{"contents": [...]}`` with ``parts`` arrays parses correctly."""
+        data = {
+            "contents": [
+                {"role": "user", "parts": [{"text": "Capital of France?"}]},
+                {"role": "model", "parts": [{"text": "Paris."}]},
+            ]
+        }
+        f = tmp_path / "gemini.json"
+        f.write_text(json.dumps(data))
+        result = normalize(str(f))
+        assert "> Capital of France?" in result
+        assert "Paris." in result
+
+    def test_messages_wrapper_format(self, tmp_path):
+        """Layout 2a: ``{"messages": [...]}`` (the bug-fix case for review #1).
+
+        Without the parser-precedence fix, ``_try_claude_ai_json`` would
+        silently claim this input and drop all ``role="model"`` turns,
+        producing a user-only transcript. After the fix, ``_try_gemini_json``
+        runs first and recognises the ``model`` role.
+        """
+        data = {
+            "messages": [
+                {"role": "user", "content": "What is Python?"},
+                {"role": "model", "content": "A programming language."},
+                {"role": "user", "content": "And Java?"},
+                {"role": "model", "content": "Also a programming language."},
+            ]
+        }
+        f = tmp_path / "gemini_messages.json"
+        f.write_text(json.dumps(data))
+        result = normalize(str(f))
+        assert "> What is Python?" in result
+        assert "A programming language." in result
+        assert "> And Java?" in result
+        assert "Also a programming language." in result
+
+    def test_flat_list_format(self, tmp_path):
+        """Layout 2b: top-level ``[...]`` list with ``role="model"`` parses correctly."""
+        data = [
+            {"role": "user", "content": "Hi"},
+            {"role": "model", "content": "Hello! How can I help?"},
+            {"role": "user", "content": "Tell me a joke"},
+            {"role": "model", "content": "Why did the chicken cross the road?"},
+        ]
+        f = tmp_path / "gemini_flat.json"
+        f.write_text(json.dumps(data))
+        result = normalize(str(f))
+        assert "> Hi" in result
+        assert "Hello! How can I help?" in result
+        assert "Why did the chicken cross the road?" in result
+
+    def test_multi_part_text_joined(self):
+        """Multiple text parts within a single message are joined with spaces."""
+        data = {
+            "contents": [
+                {
+                    "role": "user",
+                    "parts": [
+                        {"text": "Part one."},
+                        {"text": "Part two."},
+                    ],
+                },
+                {"role": "model", "parts": [{"text": "Got it."}]},
+            ]
+        }
+        result = _try_gemini_json(data)
+        assert result is not None
+        assert "Part one. Part two." in result
+
+    def test_non_text_parts_skipped(self):
+        """``inline_data`` / ``function_call`` parts are skipped; only ``text`` is extracted."""
+        data = {
+            "contents": [
+                {
+                    "role": "user",
+                    "parts": [
+                        {"text": "Look at this image"},
+                        {"inline_data": {"mime_type": "image/png", "data": "..."}},
+                    ],
+                },
+                {"role": "model", "parts": [{"text": "I see it"}]},
+            ]
+        }
+        result = _try_gemini_json(data)
+        assert result is not None
+        assert "Look at this image" in result
+        assert "I see it" in result
+        # The inline_data shouldn't bleed into the transcript.
+        assert "image/png" not in result
+
+    def test_rejects_without_model_role(self):
+        """Without any ``role="model"`` entry the parser must return ``None``.
+
+        This is the disambiguator that prevents the Gemini parser from
+        false-positiving against Claude / ChatGPT exports that use the
+        ``"assistant"`` role.
+        """
+        data = [
+            {"role": "user", "content": "Hi"},
+            {"role": "assistant", "content": "Hello"},
+        ]
+        assert _try_gemini_json(data) is None
+
+    def test_rejects_too_few_messages(self):
+        """Inputs with fewer than 2 entries return ``None`` (not enough conversation)."""
+        data = {"contents": [{"role": "user", "parts": [{"text": "Just one"}]}]}
+        assert _try_gemini_json(data) is None
+
+    def test_rejects_non_dict_non_list(self):
+        """Scalar / unsupported inputs return ``None`` cleanly."""
+        assert _try_gemini_json("not a dict") is None
+        assert _try_gemini_json(42) is None
+        assert _try_gemini_json(None) is None
+
+    def test_messages_wrapper_does_not_get_claimed_by_claude(self, tmp_path):
+        """Regression test for review #1: the full ``normalize()`` pipeline must
+        route the ``{"messages":[..., model, ...]}`` form to the Gemini parser,
+        not to ``_try_claude_ai_json``. Both user and model turns must survive.
+        """
+        data = {
+            "messages": [
+                {"role": "user", "content": "Q1"},
+                {"role": "model", "content": "A1"},
+                {"role": "user", "content": "Q2"},
+                {"role": "model", "content": "A2"},
+            ]
+        }
+        f = tmp_path / "ambiguous.json"
+        f.write_text(json.dumps(data))
+        result = normalize(str(f))
+        # All four turns must appear — proves the Claude parser didn't eat this.
+        assert "A1" in result
+        assert "A2" in result
+        assert "> Q1" in result
+        assert "> Q2" in result
+
+
 # ── _try_claude_ai_json ───────────────────────────────────────────────
 
 
@@ -1015,6 +1179,268 @@ def test_slack_json_sanitizes_speaker_id():
     assert "\n> fake" not in result
 
 
+# ── _try_continue_json ─────────────────────────────────────────────────
+
+
+def test_continue_json_valid_multi_turn():
+    data = {
+        "history": [
+            {"role": "user", "content": "What is Python?"},
+            {"role": "assistant", "content": "Python is a programming language."},
+            {"role": "user", "content": "How do I install it?"},
+            {"role": "assistant", "content": "Use your package manager."},
+        ],
+        "title": "Python help",
+        "sessionId": "abc-123",
+        "dateCreated": "2025-01-15T10:30:00Z",
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> What is Python?" in result
+    assert "Python is a programming language." in result
+    assert "> How do I install it?" in result
+    assert "Use your package manager." in result
+
+
+def test_continue_json_with_system_messages():
+    """System messages are skipped."""
+    data = {
+        "history": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello"},
+            {"role": "assistant", "content": "Hi there"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> Hello" in result
+    assert "helpful assistant" not in result
+
+
+def test_continue_json_with_tool_messages():
+    """Tool messages are appended to the previous assistant turn."""
+    data = {
+        "history": [
+            {"role": "user", "content": "List files"},
+            {"role": "assistant", "content": "Let me check."},
+            {"role": "tool", "content": "file1.py\nfile2.py"},
+            {"role": "assistant", "content": "I found two files."},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> List files" in result
+    assert "Let me check." in result
+    assert "[tool] file1.py" in result
+    assert "I found two files." in result
+
+
+def test_continue_json_with_code_blocks():
+    """Code blocks in content are preserved."""
+    data = {
+        "history": [
+            {"role": "user", "content": "Show me a hello world"},
+            {
+                "role": "assistant",
+                "content": "Here you go:\n```python\nprint('Hello, world!')\n```",
+            },
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "```python" in result
+    assert "print('Hello, world!')" in result
+
+
+def test_continue_json_list_content_blocks():
+    """Content as a list of typed blocks (text blocks)."""
+    data = {
+        "history": [
+            {"role": "user", "content": [{"type": "text", "text": "Help me"}]},
+            {"role": "assistant", "content": [{"type": "text", "text": "Sure thing"}]},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> Help me" in result
+    assert "Sure thing" in result
+
+
+def test_continue_json_empty_history():
+    """Empty history returns None."""
+    data = {"history": []}
+    result = _try_continue_json(data)
+    assert result is None
+
+
+def test_continue_json_single_message():
+    """Too few messages returns None."""
+    data = {"history": [{"role": "user", "content": "Hello"}]}
+    result = _try_continue_json(data)
+    assert result is None
+
+
+def test_continue_json_no_history_key():
+    """Missing history key returns None."""
+    data = {"title": "Some session", "sessionId": "abc"}
+    result = _try_continue_json(data)
+    assert result is None
+
+
+def test_continue_json_not_a_dict():
+    """Non-dict input returns None."""
+    result = _try_continue_json([1, 2, 3])
+    assert result is None
+    result = _try_continue_json("not a dict")
+    assert result is None
+
+
+def test_continue_json_history_not_a_list():
+    """history key that isn't a list returns None."""
+    data = {"history": "not a list"}
+    result = _try_continue_json(data)
+    assert result is None
+
+
+def test_continue_json_malformed_entries():
+    """Non-dict entries in history are skipped."""
+    data = {
+        "history": [
+            "not a dict",
+            42,
+            {"role": "user", "content": "Q"},
+            {"role": "assistant", "content": "A"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> Q" in result
+
+
+def test_continue_json_missing_role():
+    """Entries without a role are skipped."""
+    data = {
+        "history": [
+            {"content": "orphan text"},
+            {"role": "user", "content": "Q"},
+            {"role": "assistant", "content": "A"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "orphan" not in result
+
+
+def test_continue_json_missing_content():
+    """Entries without content are skipped."""
+    data = {
+        "history": [
+            {"role": "user"},
+            {"role": "user", "content": "Real question"},
+            {"role": "assistant", "content": "Real answer"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> Real question" in result
+
+
+def test_continue_json_empty_content():
+    """Entries with empty/whitespace content are skipped."""
+    data = {
+        "history": [
+            {"role": "user", "content": ""},
+            {"role": "user", "content": "   "},
+            {"role": "user", "content": "Actual question"},
+            {"role": "assistant", "content": "Actual answer"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    user_turns = [line for line in result.split("\n") if line.strip().startswith(">")]
+    assert len(user_turns) == 1
+
+
+def test_continue_json_unicode_cjk():
+    """Unicode and CJK content is handled correctly."""
+    data = {
+        "history": [
+            {"role": "user", "content": "Python\u306e\u4f7f\u3044\u65b9\u3092\u6559\u3048\u3066"},
+            {
+                "role": "assistant",
+                "content": "\u306f\u3044\u3001Python\u306f\u7d20\u6674\u3089\u3057\u3044\u8a00\u8a9e\u3067\u3059\u3002\ud83d\ude80",
+            },
+            {"role": "user", "content": "\u8c22\u8c22\uff01\u975e\u5e38\u6709\u5e2e\u52a9"},
+            {"role": "assistant", "content": "\u4e0d\u5ba2\u6c14 \ud83d\ude0a"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "\u306e\u4f7f\u3044\u65b9" in result
+    assert "\u8c22\u8c22" in result
+    assert "\ud83d\ude80" in result
+
+
+def test_continue_json_very_long_message():
+    """Very long messages are handled without error."""
+    long_text = "x" * 50000
+    data = {
+        "history": [
+            {"role": "user", "content": "Summarize this: " + long_text},
+            {"role": "assistant", "content": "That's a lot of x's."},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "Summarize this:" in result
+
+
+def test_continue_json_non_string_content_skipped():
+    """Non-string, non-list content (e.g. int, None) is skipped."""
+    data = {
+        "history": [
+            {"role": "user", "content": 42},
+            {"role": "assistant", "content": None},
+            {"role": "user", "content": "Real Q"},
+            {"role": "assistant", "content": "Real A"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "> Real Q" in result
+
+
+def test_continue_json_tool_without_preceding_assistant():
+    """Tool message without a preceding assistant turn is ignored."""
+    data = {
+        "history": [
+            {"role": "tool", "content": "orphan tool output"},
+            {"role": "user", "content": "Q"},
+            {"role": "assistant", "content": "A"},
+        ]
+    }
+    result = _try_continue_json(data)
+    assert result is not None
+    assert "orphan" not in result
+
+
+def test_continue_json_integration_via_normalize(tmp_path):
+    """Continue.dev JSON is detected and parsed via the top-level normalize()."""
+    data = {
+        "history": [
+            {"role": "user", "content": "What is MemPalace?"},
+            {"role": "assistant", "content": "A memory system for AI."},
+        ],
+        "title": "MemPalace overview",
+        "sessionId": "session-001",
+    }
+    f = tmp_path / "session.json"
+    f.write_text(json.dumps(data))
+    result = normalize(str(f))
+    assert "> What is MemPalace?" in result
+    assert "A memory system for AI." in result
+
+
 # ── _try_normalize_json ────────────────────────────────────────────────
 
 
@@ -1403,3 +1829,115 @@ def test_collapses_excessive_blank_lines(self):
         assert "line two" in out
         # Should collapse to no more than 3 newlines
         assert "\n\n\n\n" not in out
+
+
+# ── _try_pi_jsonl ──────────────────────────────────────────────────────
+#
+# Pi agent stores sessions as JSONL under
+# ``~/.config/pi/agent/sessions/{cwd}/{timestamp}_{uuid}.jsonl``. The
+# schema (per github.com/badlogic/pi-mono session.md):
+#
+#   {"type": "session", "version": "1", ...}
+#   {"type": "message", "message": {"role": "user", "content": "Q"}}
+#   {"type": "message", "message": {"role": "assistant",
+#       "content": [{"type": "text", "text": "A"}]}}
+#
+# Detection requires a ``session`` record with a ``version`` field so the
+# parser does not false-positive against Codex / Gemini / Claude Code
+# JSONL routed through the same dispatch chain.
+
+
+def test_pi_jsonl_valid_string_content():
+    """User content as a plain string is captured."""
+    lines = [
+        json.dumps({"type": "session", "version": "1"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps({"type": "message", "message": {"role": "assistant", "content": "A"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is not None
+    assert "> Q" in result
+    assert "A" in result
+
+
+def test_pi_jsonl_valid_block_content():
+    """Assistant content as [{type, text}] blocks is captured."""
+    lines = [
+        json.dumps({"type": "session", "version": "1"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps(
+            {
+                "type": "message",
+                "message": {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": "Assistant reply"}],
+                },
+            }
+        ),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is not None
+    assert "Assistant reply" in result
+
+
+def test_pi_jsonl_no_session_header():
+    """Without a ``session`` record, parser returns None — protects against
+    false-positives on other JSONL formats that share a ``type=message`` shape."""
+    lines = [
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps({"type": "message", "message": {"role": "assistant", "content": "A"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is None
+
+
+def test_pi_jsonl_session_without_version():
+    """A ``session`` record missing ``version`` is not a Pi header."""
+    lines = [
+        json.dumps({"type": "session"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps({"type": "message", "message": {"role": "assistant", "content": "A"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is None
+
+
+def test_pi_jsonl_skips_tool_results():
+    """toolResult role records are skipped (they are operational, not conversation)."""
+    lines = [
+        json.dumps({"type": "session", "version": "1"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps(
+            {
+                "type": "message",
+                "message": {"role": "toolResult", "content": "tool output"},
+            }
+        ),
+        json.dumps({"type": "message", "message": {"role": "assistant", "content": "A"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is not None
+    assert "tool output" not in result
+
+
+def test_pi_jsonl_under_two_messages_returns_none():
+    """A session with fewer than 2 captured turns is not considered valid."""
+    lines = [
+        json.dumps({"type": "session", "version": "1"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is None
+
+
+def test_pi_jsonl_invalid_lines_skipped():
+    """Malformed JSON lines and non-dict entries are tolerated, not fatal."""
+    lines = [
+        "not json",
+        json.dumps([1, 2, 3]),  # list, not dict
+        json.dumps({"type": "session", "version": "1"}),
+        json.dumps({"type": "message", "message": {"role": "user", "content": "Q"}}),
+        json.dumps({"type": "message", "message": {"role": "assistant", "content": "A"}}),
+    ]
+    result = _try_pi_jsonl("\n".join(lines))
+    assert result is not None
diff --git a/tests/test_pgvector_backend.py b/tests/test_pgvector_backend.py
index a22df2ee0..f2c591942 100644
--- a/tests/test_pgvector_backend.py
+++ b/tests/test_pgvector_backend.py
@@ -1,3 +1,4 @@
+import json
 import os
 import sys
 import threading
@@ -22,6 +23,8 @@
     _matches_where,
     _vector_distance,
     _as_vector_array,
+    _strip_nul,
+    _json_dumps,
 )
 
 
@@ -39,6 +42,7 @@ class _FakePgVectorClient:
     def __init__(self, _config):
         self.tables: dict = {}
         self.query_calls: list = []
+        self.scroll_calls: list = []
         _FakePgVectorClient.instances.append(self)
 
     def ping(self):
@@ -89,9 +93,18 @@ def query_rows(self, table, *, vector, limit, where, with_embedding):
             out.append(item)
         return out
 
-    def scroll_rows(self, table, *, where=None, with_embedding=False):
+    def scroll_rows(self, table, *, where=None, with_embedding=False, limit=None, offset=None):
+        self.scroll_calls.append({"where": where, "limit": limit, "offset": offset})
+        rows = self._filtered(table, where)
+        if limit is not None or offset:
+            # Mirror the real backend: ORDER BY id, then LIMIT/OFFSET.
+            rows = sorted(rows, key=lambda row: row["id"])
+            if offset:
+                rows = rows[offset:]
+            if limit is not None:
+                rows = rows[:limit]
         out = []
-        for row in self._filtered(table, where):
+        for row in rows:
             out.append(
                 {
                     "id": row["id"],
@@ -362,6 +375,104 @@ def test_pgvector_get_limit_offset_and_embeddings(tmp_path, fake_pgvector):
     assert page.embeddings is not None and len(page.embeddings[0]) == 2
 
 
+def test_pgvector_get_unfiltered_page_pushes_limit_offset(tmp_path, fake_pgvector):
+    _backend, col = _collection(tmp_path)
+    col.add(
+        ids=["a", "b", "c", "d"],
+        documents=["da", "db", "dc", "dd"],
+        metadatas=[{"wing": "x"}, {"wing": "x"}, {"wing": "x"}, {"wing": "x"}],
+        embeddings=[[1, 0], [0, 1], [0.5, 0.5], [0.2, 0.8]],
+    )
+    client = fake_pgvector.instances[0]
+    client.scroll_calls.clear()
+
+    page = col.get(limit=2, offset=1, include=["metadatas"])
+
+    # An unfiltered page is pushed to SQL as LIMIT/OFFSET instead of fetching
+    # the whole table and slicing in Python (the O(rows x pages) path).
+    assert client.scroll_calls == [{"where": None, "limit": 2, "offset": 1}]
+    # ORDER BY id, then OFFSET 1 LIMIT 2 -> b, c.
+    assert page.ids == ["b", "c"]
+
+
+def test_pgvector_get_filtered_page_stays_on_full_scan(tmp_path, fake_pgvector):
+    _backend, col = _collection(tmp_path)
+    col.add(
+        ids=["a", "b", "c"],
+        documents=["da", "db", "dc"],
+        metadatas=[{"wing": "x"}, {"wing": "y"}, {"wing": "x"}],
+        embeddings=[[1, 0], [0, 1], [0.5, 0.5]],
+    )
+    client = fake_pgvector.instances[0]
+    client.scroll_calls.clear()
+
+    page = col.get(where={"wing": "x"}, limit=1, offset=1, include=["metadatas"])
+
+    # A filtered get keeps the full-scan path (no LIMIT/OFFSET pushed) so the
+    # exact _matches_where re-filter runs before pagination.
+    assert client.scroll_calls == [{"where": {"wing": "x"}, "limit": None, "offset": None}]
+    assert page.ids == ["c"]
+
+
+def test_pgvector_get_offset_only_and_limit_only_push(tmp_path, fake_pgvector):
+    _backend, col = _collection(tmp_path)
+    col.add(
+        ids=["a", "b", "c", "d"],
+        documents=["da", "db", "dc", "dd"],
+        metadatas=[{"wing": "x"}] * 4,
+        embeddings=[[1, 0], [0, 1], [0.5, 0.5], [0.2, 0.8]],
+    )
+    client = fake_pgvector.instances[0]
+
+    # offset-only (limit=None) is pushed.
+    client.scroll_calls.clear()
+    page = col.get(offset=2, include=["metadatas"])
+    assert client.scroll_calls == [{"where": None, "limit": None, "offset": 2}]
+    assert page.ids == ["c", "d"]
+
+    # limit-only (offset=None) is pushed.
+    client.scroll_calls.clear()
+    page = col.get(limit=2, include=["metadatas"])
+    assert client.scroll_calls == [{"where": None, "limit": 2, "offset": None}]
+    assert page.ids == ["a", "b"]
+
+
+def test_pgvector_get_negative_bounds_use_python_slice(tmp_path, fake_pgvector):
+    _backend, col = _collection(tmp_path)
+    col.add(
+        ids=["a", "b", "c"],
+        documents=["da", "db", "dc"],
+        metadatas=[{"wing": "x"}] * 3,
+        embeddings=[[1, 0], [0, 1], [0.5, 0.5]],
+    )
+    client = fake_pgvector.instances[0]
+    client.scroll_calls.clear()
+
+    # A negative offset must not reach SQL (OFFSET -1 would error); it falls
+    # through to the unchanged full-scan + Python-slice path.
+    page = col.get(offset=-1, include=["metadatas"])
+    assert client.scroll_calls == [{"where": None, "limit": None, "offset": None}]
+    assert page.ids == ["c"]
+
+
+def test_pgvector_get_pages_tile_without_overlap(tmp_path, fake_pgvector):
+    _backend, col = _collection(tmp_path)
+    col.add(
+        ids=["a", "b", "c", "d", "e"],
+        documents=["da", "db", "dc", "dd", "de"],
+        metadatas=[{"wing": "x"}] * 5,
+        embeddings=[[1, 0], [0, 1], [0.5, 0.5], [0.2, 0.8], [0.3, 0.7]],
+    )
+    # Consecutive pages tile the whole table exactly once, in stable id order.
+    p1 = col.get(limit=2, offset=0, include=["metadatas"]).ids
+    p2 = col.get(limit=2, offset=2, include=["metadatas"]).ids
+    p3 = col.get(limit=2, offset=4, include=["metadatas"]).ids
+    assert p1 == ["a", "b"]
+    assert p2 == ["c", "d"]
+    assert p3 == ["e"]
+    assert p1 + p2 + p3 == ["a", "b", "c", "d", "e"]
+
+
 def test_pgvector_delete_by_where_pushdown_and_local(tmp_path, fake_pgvector):
     _backend, col = _collection(tmp_path)
     col.add(
@@ -651,3 +762,213 @@ def fake_connect(dsn):
     with pytest.raises(BackendError, match="closed"):
         client.ping()
     assert len(created) == 1
+
+
+class _FakeUpsertCursor:
+    """Captures the params bound by ``upsert_rows`` -> ``_execute(many=True)``."""
+
+    def __init__(self, captured):
+        self._captured = captured
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *exc):
+        return False
+
+    def execute(self, sql, params=None):
+        return None
+
+    def executemany(self, sql, params=None):
+        self._captured.extend(params or [])
+
+    def fetchall(self):
+        return []
+
+
+class _FakeUpsertConn:
+    def __init__(self, captured):
+        self._captured = captured
+
+    def cursor(self):
+        return _FakeUpsertCursor(self._captured)
+
+    def commit(self):
+        return None
+
+    def rollback(self):
+        return None
+
+    def close(self):
+        return None
+
+
+def _fake_upsert_client(monkeypatch):
+    """Install a fake psycopg whose connection captures bound params, and return
+    ``(client, captured)`` for driving the real ``upsert_rows`` write path."""
+    captured = []
+    fake_psycopg = types.ModuleType("psycopg")
+    fake_psycopg.connect = lambda *args, **kwargs: _FakeUpsertConn(captured)
+    monkeypatch.setitem(sys.modules, "psycopg", fake_psycopg)
+    client = _PgVectorClient(_PgVectorConfig(dsn="postgresql://localhost/unused", namespace=None))
+    return client, captured
+
+
+def test_pgvector_upsert_strips_nul_bytes(monkeypatch):
+    """A NUL (0x00) byte in id/document/metadata must never reach Postgres.
+
+    psycopg's text/jsonb dumpers reject NUL outright ("PostgreSQL text fields
+    cannot contain NUL (0x00) bytes"), which aborts the entire mine run (#1829)
+    when a single transcript captured a NUL in tool output. ChromaDB and the
+    SQLite backend store the byte verbatim, so pgvector strips it to keep the
+    same inputs ingestible. Strip, not reject: rejecting would re-abort the
+    mine or drop the drawer entirely (recall loss).
+    """
+    client, captured = _fake_upsert_client(monkeypatch)
+    client.upsert_rows(
+        "drawers",
+        [
+            {
+                "id": "draw\x00er",
+                "document": "before\x00after",
+                "metadata": {"go\x00od": "v\x00w", "nested": ["a\x00b", 7]},
+                "embedding": [1.0, 0.0],
+                "updated_at": "2026-06-20T00:00:00Z",
+            }
+        ],
+    )
+
+    assert len(captured) == 1, "upsert_rows should bind exactly one row"
+    row_id, document, metadata_json = captured[0][0], captured[0][1], captured[0][2]
+
+    # No NUL survives into any text-bound parameter (id, document, metadata).
+    assert "\x00" not in row_id
+    assert "\x00" not in document
+    assert "\x00" not in metadata_json
+
+    # Stripping removes only the NUL; surrounding content is otherwise preserved.
+    assert row_id == "drawer"
+    assert document == "beforeafter"
+    assert json.loads(metadata_json) == {"good": "vw", "nested": ["ab", 7]}
+
+
+def test_strip_nul_helper():
+    """``_strip_nul`` removes NUL from strings, list/tuple items, and dict keys
+    and values; NUL-free input and non-string scalars are returned unchanged."""
+    assert _strip_nul("a\x00b") == "ab"
+    assert _strip_nul("clean") == "clean"
+    assert _strip_nul("") == ""
+    assert _strip_nul("\x00") == ""
+    # Keys, values, list items, and nested structures are all stripped.
+    assert _strip_nul({"k\x00": "v\x00", "n": [1, "x\x00y"]}) == {"k": "v", "n": [1, "xy"]}
+    assert _strip_nul([{"a\x00": "b\x00"}, "c\x00"]) == [{"a": "b"}, "c"]
+    # Tuples recurse too and stay tuples (defends direct callers that pass
+    # un-normalized metadata before the JSON round-trip).
+    assert _strip_nul(("a\x00b", 1, ["c\x00"])) == ("ab", 1, ["c"])
+    # Keys differing only by a NUL collapse, last wins (documented, harmless:
+    # real metadata keys are fixed field names, never NUL-only-distinguished).
+    assert _strip_nul({"a\x00": 1, "a": 2}) == {"a": 2}
+    # Non-string scalars pass through unchanged (bool stays bool, not int).
+    assert _strip_nul(7) == 7
+    assert _strip_nul(3.5) == 3.5
+    assert _strip_nul(True) is True
+    assert _strip_nul(None) is None
+
+
+def test_pgvector_upsert_replaces_lone_surrogates(monkeypatch):
+    """A lone UTF-16 surrogate in id/document/metadata must never reach Postgres.
+
+    psycopg encodes text/jsonb parameters as UTF-8, and a lone surrogate has no
+    UTF-8 encoding, so it raises UnicodeEncodeError ("surrogates not allowed") and
+    aborts the entire mine run (the surrogate sibling of the NUL abort in #1829).
+    ChromaDB sanitizes document text via config.strip_lone_surrogates;
+    pgvector matches it (for document and metadata) by replacing the surrogate with
+    U+FFFD rather than dropping the drawer (recall loss) or re-aborting the mine.
+    """
+    # Build the surrogates with chr() so this source file stays valid UTF-8 (a raw
+    # lone surrogate has no UTF-8 encoding and would not parse).
+    hi, lo, s3, s4, s5 = (chr(c) for c in (0xD800, 0xDFFF, 0xD834, 0xDCA1, 0xDC00))
+    repl = chr(0xFFFD)
+    client, captured = _fake_upsert_client(monkeypatch)
+    client.upsert_rows(
+        "drawers",
+        [
+            {
+                "id": f"draw{hi}er",
+                "document": f"before{lo}after",
+                "metadata": {f"go{s3}od": f"v{s4}w", "nested": [f"a{s5}b", 7]},
+                "embedding": [1.0, 0.0],
+                "updated_at": "2026-06-20T00:00:00Z",
+            }
+        ],
+    )
+
+    assert len(captured) == 1, "upsert_rows should bind exactly one row"
+    row_id, document, metadata_json = captured[0][0], captured[0][1], captured[0][2]
+
+    # Every text-bound parameter must now be UTF-8 encodable (what psycopg does to
+    # bind it); a surviving lone surrogate would raise here.
+    for field in (row_id, document, metadata_json):
+        field.encode("utf-8")
+
+    # Surrogates are replaced with U+FFFD, not dropped: surrounding content stays
+    # and each lone surrogate maps to exactly one replacement character.
+    assert row_id == f"draw{repl}er"
+    assert document == f"before{repl}after"
+    assert json.loads(metadata_json) == {f"go{repl}od": f"v{repl}w", "nested": [f"a{repl}b", 7]}
+
+
+def test_pgvector_upsert_strips_nul_and_surrogate_together(monkeypatch):
+    """A single row carrying *both* a NUL and a lone surrogate must come out
+    clean on every text-bound field.
+
+    This pins the composition of the two sibling fixes (#1829 NUL, #1833
+    surrogate), which edit the same ``upsert_rows`` binding: NUL is stripped
+    pre-serialization and the surrogate replaced post-serialization. A rebase
+    that kept only one strip would regress the other byte class silently, since
+    neither sibling test exercises both at once.
+    """
+    sur = chr(0xD800)
+    repl = chr(0xFFFD)
+    client, captured = _fake_upsert_client(monkeypatch)
+    client.upsert_rows(
+        "drawers",
+        [
+            {
+                "id": f"id\x00{sur}x",
+                "document": f"doc\x00{sur}y",
+                "metadata": {f"k\x00{sur}": f"v\x00{sur}", "nested": [f"a\x00{sur}b", 7]},
+                "embedding": [1.0, 0.0],
+                "updated_at": "2026-06-20T00:00:00Z",
+            }
+        ],
+    )
+
+    assert len(captured) == 1, "upsert_rows should bind exactly one row"
+    row_id, document, metadata_json = captured[0][0], captured[0][1], captured[0][2]
+
+    # Neither unstorable byte survives, and each bound field is UTF-8 encodable.
+    for field in (row_id, document, metadata_json):
+        assert "\x00" not in field
+        assert sur not in field
+        field.encode("utf-8")
+
+    # NUL dropped, surrogate -> U+FFFD, surrounding content preserved.
+    assert row_id == f"id{repl}x"
+    assert document == f"doc{repl}y"
+    assert json.loads(metadata_json) == {f"k{repl}": f"v{repl}", "nested": [f"a{repl}b", 7]}
+
+
+def test_strip_lone_surrogates_reuses_config_util():
+    """The pgvector write path strips surrogates via ``config.strip_lone_surrogates``
+    applied to id/document and the serialized metadata JSON (no pgvector-local
+    helper). End-to-end coverage is ``test_pgvector_upsert_replaces_lone_surrogates``;
+    the utility's own edge cases live in ``tests/test_clean_lone_surrogates.py``."""
+    from mempalace.config import strip_lone_surrogates
+
+    # ensure_ascii=False leaves a metadata surrogate raw in the JSON, so a single
+    # pass over the serialized string cleans it (the property the write path relies on).
+    raw = _json_dumps({"k": f"v{chr(0xD800)}w"})
+    cleaned = strip_lone_surrogates(raw)
+    assert chr(0xD800) not in cleaned
+    assert json.loads(cleaned) == {"k": f"v{chr(0xFFFD)}w"}
diff --git a/tests/test_project_scanner.py b/tests/test_project_scanner.py
index 45dc8027f..38ad1077f 100644
--- a/tests/test_project_scanner.py
+++ b/tests/test_project_scanner.py
@@ -18,8 +18,10 @@
     _collect_manifest_names,
     _merge_detected,
     _parse_cargo,
+    _parse_gradle,
     _parse_gomod,
     _parse_package_json,
+    _parse_pom,
     _parse_pyproject,
     _UnionFind,
     discover_entities,
@@ -82,6 +84,76 @@ def test_parse_gomod(tmp_path):
     assert _parse_gomod(f) == "my-go-mod"
 
 
+def test_parse_pom_direct_artifact_id_with_namespace(tmp_path):
+    f = tmp_path / "pom.xml"
+    f.write_text(
+        """<project xmlns="http://maven.apache.org/POM/4.0.0">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>com.example</groupId>
+    <artifactId>parent-artifact</artifactId>
+    <version>1.0.0</version>
+  </parent>
+  <groupId>com.example</groupId>
+  <artifactId>child-artifact</artifactId>
+</project>
+"""
+    )
+    assert _parse_pom(f) == "child-artifact"
+
+
+def test_parse_pom_missing_or_malformed_artifact_id(tmp_path):
+    missing = tmp_path / "missing-pom.xml"
+    missing.write_text("<project><modelVersion>4.0.0</modelVersion></project>")
+    malformed = tmp_path / "bad-pom.xml"
+    malformed.write_text("<project><artifactId>broken")
+
+    assert _parse_pom(missing) is None
+    assert _parse_pom(malformed) is None
+
+
+def test_parse_pom_ignores_non_string_child_tags(tmp_path, monkeypatch):
+    class FakeChild:
+        tag = object()
+        text = "ignored"
+
+    class ArtifactChild:
+        tag = "artifactId"
+        text = "safe-artifact"
+
+    class FakeTree:
+        def getroot(self):
+            return [FakeChild(), ArtifactChild()]
+
+    monkeypatch.setattr("mempalace.project_scanner.ET.parse", lambda _path: FakeTree())
+
+    assert _parse_pom(tmp_path / "pom.xml") == "safe-artifact"
+
+
+def test_parse_gradle_build_reads_sibling_settings(tmp_path):
+    (tmp_path / "settings.gradle").write_text('rootProject.name = "settings-name"\n')
+    f = tmp_path / "build.gradle"
+    f.write_text("plugins { id 'java' }\n")
+
+    assert _parse_gradle(f) == "settings-name"
+
+
+def test_parse_gradle_kotlin_set_syntax(tmp_path):
+    f = tmp_path / "settings.gradle.kts"
+    f.write_text('rootProject.name.set("kotlin-settings-name")\n')
+
+    assert _parse_gradle(f) == "kotlin-settings-name"
+
+
+def test_parse_gradle_falls_back_to_directory_name(tmp_path):
+    project = tmp_path / "gradle-dir-name"
+    project.mkdir()
+    f = project / "build.gradle.kts"
+    f.write_text("plugins { java }\n")
+
+    assert _parse_gradle(f) == "gradle-dir-name"
+
+
 # ── bot filtering ───────────────────────────────────────────────────────
 
 
@@ -286,6 +358,95 @@ def test_scan_project_from_pyproject(tmp_path):
     assert any(p.name == "pyproj" for p in projects)
 
 
+def test_scan_project_from_maven_pom(tmp_path):
+    (tmp_path / "pom.xml").write_text(
+        """<project>
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.example</groupId>
+  <artifactId>maven-app</artifactId>
+</project>
+"""
+    )
+    _init_git_repo(tmp_path)
+    projects, _ = scan(tmp_path)
+
+    assert projects[0].name == "maven-app"
+    assert projects[0].manifest == "pom.xml"
+
+
+def test_scan_project_from_gradle_settings_without_git(tmp_path):
+    (tmp_path / "settings.gradle.kts").write_text('rootProject.name = "gradle-root"\n')
+    (tmp_path / "build.gradle.kts").write_text("plugins { java }\n")
+    projects, people = scan(tmp_path)
+
+    assert len(projects) == 1
+    assert projects[0].name == "gradle-root"
+    assert projects[0].manifest == "settings.gradle.kts"
+    assert projects[0].has_git is False
+    assert people == []
+
+
+def test_scan_gradle_subproject_without_git_keeps_manifest_dir(tmp_path):
+    (tmp_path / "settings.gradle").write_text('rootProject.name = "gradle-root"\n')
+    app = tmp_path / "app"
+    app.mkdir()
+    (app / "build.gradle").write_text("plugins { id 'java' }\n")
+
+    projects, _ = scan(tmp_path)
+    by_name = {p.name: p for p in projects}
+
+    assert by_name["gradle-root"].repo_root == tmp_path
+    assert by_name["app"].manifest == "build.gradle"
+    assert by_name["app"].repo_root == app
+
+
+def test_scan_includes_java_subprojects_inside_mixed_git_repo(tmp_path):
+    (tmp_path / "package.json").write_text(json.dumps({"name": "web-root"}))
+    service = tmp_path / "service"
+    service.mkdir()
+    (service / "pom.xml").write_text(
+        """<project>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>java-service</artifactId>
+</project>
+"""
+    )
+    worker = tmp_path / "worker"
+    worker.mkdir()
+    (worker / "build.gradle.kts").write_text("plugins { java }\n")
+    _init_git_repo(tmp_path)
+
+    projects, _ = scan(tmp_path)
+    by_name = {p.name: p for p in projects}
+
+    assert by_name["web-root"].manifest == "package.json"
+    assert by_name["java-service"].manifest == "pom.xml"
+    assert by_name["java-service"].repo_root == service
+    assert by_name["worker"].manifest == "build.gradle.kts"
+    assert by_name["worker"].repo_root == worker
+
+
+def test_scan_git_repo_without_root_manifest_keeps_java_subproject_dir(tmp_path):
+    service = tmp_path / "service"
+    service.mkdir()
+    (service / "pom.xml").write_text(
+        """<project>
+  <modelVersion>4.0.0</modelVersion>
+  <artifactId>java-service</artifactId>
+</project>
+"""
+    )
+    _init_git_repo(tmp_path)
+
+    projects, _ = scan(tmp_path)
+    by_name = {p.name: p for p in projects}
+
+    assert by_name[tmp_path.name].manifest is None
+    assert by_name[tmp_path.name].repo_root == tmp_path
+    assert by_name["java-service"].manifest == "pom.xml"
+    assert by_name["java-service"].repo_root == service
+
+
 def test_scan_prefers_root_manifest_with_explicit_priority(tmp_path):
     (tmp_path / "package.json").write_text(json.dumps({"name": "package-name"}))
     (tmp_path / "pyproject.toml").write_text('[project]\nname = "pyproject-name"\n')
diff --git a/tests/test_qdrant_bulk_metadata_scroll.py b/tests/test_qdrant_bulk_metadata_scroll.py
new file mode 100644
index 000000000..2c1961566
--- /dev/null
+++ b/tests/test_qdrant_bulk_metadata_scroll.py
@@ -0,0 +1,536 @@
+# tests/test_qdrant_bulk_metadata_scroll.py
+"""
+Tests for issue #1796 -- O(n^2) bulk-metadata reads on the Qdrant backend.
+
+Covers:
+  1. BaseCollection.get_all_metadata() default implementation (offset loop,
+     unchanged behavior for backends with real server-side cursors).
+  2. QdrantCollection.get_all_metadata() single-scroll override -- the actual
+     fix -- verified by counting how many times the underlying HTTP scroll
+     call fires.
+  3. mcp_server._fetch_all_metadata() delegates to get_all_metadata() when
+     present, and falls back to the legacy offset loop when it is not.
+  4. The Qdrant scroll page size constant is 4096, not 256.
+"""
+
+import types
+import sys
+from unittest import mock
+
+import pytest
+
+
+# ── Stub heavy deps so we can import mempalace modules in isolation ─────────
+#
+# These names are only stubbed for the DURATION OF THIS MODULE's collection +
+# test run, via the autouse fixture below. Mutating sys.modules at import
+# time with no teardown (the previous approach) risked an order-dependent
+# flake: if pytest collected this file before something else that needed the
+# REAL mempalace.config / mempalace.searcher / etc., that other test would
+# silently get our fake module instead, with no error and no obvious cause.
+# (Maintainer review on #1832.)
+_STUB_MODULE_NAMES = [
+    "mempalace.knowledge_graph",
+    "mempalace.searcher",
+    "mempalace.palace_graph",
+    "mempalace.config",
+]
+
+
+def _build_stub(name: str) -> types.ModuleType:
+    m = types.ModuleType(name)
+    m.KnowledgeGraph = lambda: types.SimpleNamespace()
+    m.search_memories = lambda *a, **kw: []
+    m.traverse = lambda *a, **kw: {}
+    m.find_tunnels = lambda *a, **kw: {}
+    m.graph_stats = lambda *a, **kw: {}
+    m.MempalaceConfig = lambda: types.SimpleNamespace(
+        palace_path="~/.mempalace/palace", collection_name="mempalace"
+    )
+    return m
+
+
+@pytest.fixture(autouse=True)
+def _stub_heavy_deps(monkeypatch):
+    """Install fake modules for the stub names, restored automatically on teardown.
+
+    monkeypatch.setitem(sys.modules, ...) records the original value (or
+    "absent") for each key and restores it when the test ends -- unlike the
+    previous bare module-level `if name not in sys.modules: sys.modules[name]
+    = stub` pattern, which left the stub installed permanently for the rest
+    of the test session once set.
+    """
+    for name in _STUB_MODULE_NAMES:
+        if name not in sys.modules:
+            monkeypatch.setitem(sys.modules, name, _build_stub(name))
+    yield
+
+
+# numpy is a real, light dependency -- imported eagerly here (not stubbed)
+# so qdrant.py's own `import numpy as np` resolves to the real module both
+# during this file's first import below and during every test.
+import numpy  # noqa: E402,F401
+
+from mempalace.backends.base import (  # noqa: E402
+    BaseCollection,
+    GetResult,
+    PalaceRef,
+)
+from mempalace.backends import qdrant as qdrant_mod  # noqa: E402
+from mempalace.backends.qdrant import QdrantCollection, _QdrantConfig  # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# 1. BaseCollection default get_all_metadata()
+# ---------------------------------------------------------------------------
+
+
+class _FakeOffsetPagedCollection(BaseCollection):
+    """Minimal concrete collection with a real server-side offset cursor.
+
+    Simulates Chroma-like behavior: get(limit=, offset=) returns exactly the
+    requested slice without re-scanning anything -- the case the default
+    get_all_metadata() implementation is correct for.
+    """
+
+    def __init__(self, all_metadata):
+        self._all = all_metadata
+        self.get_call_count = 0
+
+    def add(self, **kwargs):
+        raise NotImplementedError
+
+    def upsert(self, **kwargs):
+        raise NotImplementedError
+
+    def query(self, **kwargs):
+        raise NotImplementedError
+
+    def get(
+        self, *, ids=None, where=None, where_document=None, limit=None, offset=None, include=None
+    ):
+        self.get_call_count += 1
+        offset = offset or 0
+        limit = limit if limit is not None else len(self._all)
+        page = self._all[offset : offset + limit]
+        return GetResult(ids=[], documents=[], metadatas=page, embeddings=None)
+
+    def delete(self, **kwargs):
+        raise NotImplementedError
+
+    def count(self) -> int:
+        return len(self._all)
+
+
+class TestBaseCollectionDefaultGetAllMetadata:
+    def test_returns_all_metadata_across_pages(self):
+        all_meta = [{"wing": f"w{i}"} for i in range(2500)]
+        col = _FakeOffsetPagedCollection(all_meta)
+        result = col.get_all_metadata()
+        assert result == all_meta
+
+    def test_empty_collection_returns_empty_list(self):
+        col = _FakeOffsetPagedCollection([])
+        assert col.get_all_metadata() == []
+
+    def test_paginates_in_1000_row_batches(self):
+        """
+        2500 rows at page_size=1000 must take more than one call (proving
+        pagination actually happens, not a single unbounded fetch) and must
+        not take an unreasonable number of calls. The EXACT count depends on
+        whether the loop needs one extra call to detect a short final page
+        as terminal -- that detail can differ across implementations/versions,
+        so we bound it rather than pin it to a specific number.
+        """
+        all_meta = [{"wing": f"w{i}"} for i in range(2500)]
+        col = _FakeOffsetPagedCollection(all_meta)
+        result = col.get_all_metadata()
+
+        assert result == all_meta, "all 2500 rows must be returned regardless of paging"
+        assert col.get_call_count >= 3, (
+            f"expected at least 3 calls (1000+1000+500) to cover 2500 rows, "
+            f"got {col.get_call_count}"
+        )
+        assert col.get_call_count <= 4, (
+            f"expected at most 4 calls (3 data pages + 1 terminal empty check), "
+            f"got {col.get_call_count}"
+        )
+
+    def test_passes_where_through(self):
+        all_meta = [{"wing": "a"}, {"wing": "b"}]
+        col = _FakeOffsetPagedCollection(all_meta)
+
+        captured = {}
+        original_get = col.get
+
+        def spy_get(**kwargs):
+            captured.update(kwargs)
+            return original_get(**kwargs)
+
+        col.get = spy_get
+        col.get_all_metadata(where={"wing": "a"})
+        assert captured.get("where") == {"wing": "a"}
+
+
+# ---------------------------------------------------------------------------
+# 2. QdrantCollection.get_all_metadata() single-scroll override
+# ---------------------------------------------------------------------------
+
+
+def _make_qdrant_collection(monkeypatch, scroll_pages):
+    """
+    Build a QdrantCollection with a mocked REST client whose scroll_points()
+    returns the given pre-baked pages: list[tuple[list[dict_point], next_offset]].
+    """
+    config = _QdrantConfig(url="http://localhost:6333")
+    client = mock.MagicMock()
+    call_log = []
+
+    def fake_scroll_points(
+        collection, *, qdrant_filter=None, limit=4096, offset=None, with_vector=False
+    ):
+        call_log.append({"limit": limit, "offset": offset, "filter": qdrant_filter})
+        idx = len([c for c in call_log]) - 1
+        return scroll_pages[idx]
+
+    client.scroll_points.side_effect = fake_scroll_points
+    client.collection_exists.return_value = True
+
+    backend = mock.MagicMock()
+    backend._closed = False
+    backend._marker_exists.return_value = True
+
+    palace = PalaceRef(id="/tmp/fake-palace", local_path="/tmp/fake-palace")
+    col = QdrantCollection(
+        backend=backend,
+        client=client,
+        config=config,
+        palace=palace,
+        collection_name="mempalace",
+        remote_collection="mempalace_abc123_mempalace",
+    )
+    return col, call_log
+
+
+def _fake_point(doc_id: str, wing: str) -> dict:
+    return {
+        "id": f"point-{doc_id}",
+        "payload": {
+            qdrant_mod._PAYLOAD_ID: doc_id,
+            qdrant_mod._PAYLOAD_DOCUMENT: f"content for {doc_id}",
+            qdrant_mod._PAYLOAD_METADATA: {"wing": wing},
+        },
+        "vector": None,
+    }
+
+
+class TestQdrantGetAllMetadataSingleScroll:
+    def test_returns_all_metadata_in_one_logical_pass(self, monkeypatch):
+        page1 = ([_fake_point(f"d{i}", "wing_a") for i in range(3)], "cursor-1")
+        page2 = ([_fake_point(f"d{i}", "wing_b") for i in range(3, 5)], None)
+        col, call_log = _make_qdrant_collection(monkeypatch, [page1, page2])
+
+        result = col.get_all_metadata()
+
+        assert len(result) == 5
+        assert result[0] == {"wing": "wing_a"}
+        assert result[-1] == {"wing": "wing_b"}
+
+    def test_walks_collection_exactly_once_regardless_of_size(self, monkeypatch):
+        """
+        The whole point of #1796: calling get_all_metadata() must not
+        re-trigger additional full scrolls. Two scroll_points() calls (one
+        per page until next_page_offset is None) is the expected, constant
+        cost -- independent of how the caller might have looped before.
+        """
+        page1 = ([_fake_point(f"d{i}", "wing_a") for i in range(3)], "cursor-1")
+        page2 = ([_fake_point(f"d{i}", "wing_a") for i in range(3, 6)], None)
+        col, call_log = _make_qdrant_collection(monkeypatch, [page1, page2])
+
+        col.get_all_metadata()
+
+        assert len(call_log) == 2, (
+            f"Expected exactly 2 scroll_points() calls (one full pass), got {len(call_log)}"
+        )
+
+    def test_does_not_call_get_internally(self, monkeypatch):
+        """
+        Regression guard: get_all_metadata() must call _scroll_all() directly,
+        not self.get(limit=, offset=) -- calling get() in a loop is exactly
+        the O(n^2) pattern this fix removes.
+        """
+        page1 = ([_fake_point("d0", "wing_a")], None)
+        col, _ = _make_qdrant_collection(monkeypatch, [page1])
+        col.get = mock.MagicMock(side_effect=AssertionError("get() should not be called"))
+
+        result = col.get_all_metadata()
+        assert result == [{"wing": "wing_a"}]
+        col.get.assert_not_called()
+
+    def test_filters_by_where_locally_when_required(self, monkeypatch):
+        """
+        A plain {"wing": "wing_a"} filter is push-down-able to Qdrant's native
+        filter syntax -- _requires_local_filter() returns False for it, so
+        get_all_metadata() correctly skips the LOCAL Python filter and relies
+        on server-side filtering instead. Our mock scroll_points() doesn't
+        simulate server-side filtering, so testing with a push-down-able
+        filter here would assert behavior the mock can't actually exercise.
+
+        Use an $or clause instead -- _requires_local_filter() returns True
+        for $or, so get_all_metadata() must apply the local Python filter
+        over whatever scroll_points() returns. This actually exercises the
+        local-filter code path the test name promises to cover.
+        """
+        page1 = (
+            [
+                _fake_point("d0", "wing_a"),
+                _fake_point("d1", "wing_b"),
+                _fake_point("d2", "wing_c"),
+            ],
+            None,
+        )
+        col, _ = _make_qdrant_collection(monkeypatch, [page1])
+
+        result = col.get_all_metadata(where={"$or": [{"wing": "wing_a"}, {"wing": "wing_b"}]})
+        assert result == [{"wing": "wing_a"}, {"wing": "wing_b"}]
+
+    def test_empty_remote_collection_returns_empty_list(self, monkeypatch):
+        col, call_log = _make_qdrant_collection(monkeypatch, [])
+        col._client.collection_exists.return_value = False
+        col._backend._marker_exists.return_value = False
+
+        result = col.get_all_metadata()
+        assert result == []
+
+
+# ---------------------------------------------------------------------------
+# 3. Scroll page-size constant
+# ---------------------------------------------------------------------------
+
+
+class TestScrollPageSizeBump:
+    def test_scroll_page_size_constant_is_4096(self):
+        assert qdrant_mod._SCROLL_PAGE_SIZE == 4096
+
+    def test_scroll_all_uses_page_size_constant(self, monkeypatch):
+        page1 = ([_fake_point("d0", "wing_a")], None)
+        col, call_log = _make_qdrant_collection(monkeypatch, [page1])
+
+        col._scroll_all()
+
+        assert call_log[0]["limit"] == qdrant_mod._SCROLL_PAGE_SIZE
+        assert call_log[0]["limit"] != 256
+
+
+# ---------------------------------------------------------------------------
+# 4. mcp_server._fetch_all_metadata() delegation
+# ---------------------------------------------------------------------------
+#
+# mcp_server.py pulls in a long chain of real modules at import time
+# (searcher, palace_graph, hallways, palace, wal, chromadb-backed backends,
+# ...) and several of those modules themselves import further real
+# submodules. Stubbing the whole graph one missing name at a time turned
+# into a chase -- _distance_to_similarity missing from a searcher stub,
+# then create_tunnel missing from a palace_graph stub, and so on for every
+# remaining import line. _fetch_all_metadata() itself has none of that
+# transitive surface: it only calls col.get_all_metadata(...) or
+# col.get(...)/col.count(...). Rather than keep widening the stub graph,
+# this is a deliberate, explicitly-labeled VERBATIM COPY of the real
+# function -- not a live import. If mcp_server._fetch_all_metadata() is
+# ever edited, this copy must be updated to match by hand; there is no
+# automatic link between the two. (Diagnosed during review on #1832 after
+# two successive ImportErrors chasing the stub graph -- see PR discussion.)
+#
+# Real source as of this writing (mempalace/mcp_server.py):
+#
+#     def _fetch_all_metadata(col, where=None):
+#         get_all = getattr(col, "get_all_metadata", None)
+#         if callable(get_all):
+#             return get_all(where=where)
+#         total = col.count()
+#         all_meta = []
+#         offset = 0
+#         while offset < total:
+#             kwargs = {"include": ["metadatas"], "limit": 1000, "offset": offset}
+#             if where:
+#                 kwargs["where"] = where
+#             batch = col.get(**kwargs)
+#             if not batch["metadatas"]:
+#                 break
+#             all_meta.extend(batch["metadatas"])
+#             offset += len(batch["metadatas"])
+#         return all_meta
+
+
+def _fetch_all_metadata_under_test(col, where=None):
+    """Verbatim copy of mempalace.mcp_server._fetch_all_metadata. See the
+    comment block above this function for why it's a copy rather than a
+    live import."""
+    get_all = getattr(col, "get_all_metadata", None)
+    if callable(get_all):
+        return get_all(where=where)
+
+    total = col.count()
+    all_meta = []
+    offset = 0
+    while offset < total:
+        kwargs = {"include": ["metadatas"], "limit": 1000, "offset": offset}
+        if where:
+            kwargs["where"] = where
+        batch = col.get(**kwargs)
+        if not batch["metadatas"]:
+            break
+        all_meta.extend(batch["metadatas"])
+        offset += len(batch["metadatas"])
+    return all_meta
+
+
+def _get_fetch_all_metadata():
+    """Return the function under test for this section."""
+    return _fetch_all_metadata_under_test
+
+
+class TestFetchAllMetadataDelegation:
+    """mcp_server._fetch_all_metadata() must route through the
+    get_all_metadata() contract method when present, and fall back to the
+    legacy offset loop only for collection objects that predate it.
+    """
+
+    def test_delegates_to_get_all_metadata_when_present(self):
+        fetch_all = _get_fetch_all_metadata()
+
+        col = mock.MagicMock()
+        col.get_all_metadata.return_value = [{"wing": "a"}, {"wing": "b"}]
+
+        result = fetch_all(col)
+
+        col.get_all_metadata.assert_called_once_with(where=None)
+        assert result == [{"wing": "a"}, {"wing": "b"}]
+
+    def test_passes_where_through_to_get_all_metadata(self):
+        fetch_all = _get_fetch_all_metadata()
+
+        col = mock.MagicMock()
+        col.get_all_metadata.return_value = [{"wing": "a"}]
+
+        fetch_all(col, where={"wing": "a"})
+
+        col.get_all_metadata.assert_called_once_with(where={"wing": "a"})
+
+    def test_does_not_call_legacy_get_when_get_all_metadata_present(self):
+        """
+        Regression guard mirroring the Qdrant-side
+        test_does_not_call_get_internally: once a collection has
+        get_all_metadata(), _fetch_all_metadata() must not ALSO fall back to
+        the legacy col.get(limit=, offset=) loop -- doing both would silently
+        double the read cost on every call.
+        """
+        fetch_all = _get_fetch_all_metadata()
+
+        col = mock.MagicMock()
+        col.get_all_metadata.return_value = []
+        col.get = mock.MagicMock(side_effect=AssertionError("legacy get() should not be called"))
+        col.count = mock.MagicMock(side_effect=AssertionError("count() should not be called"))
+
+        fetch_all(col)
+
+        col.get.assert_not_called()
+        col.count.assert_not_called()
+
+    def test_falls_back_to_offset_loop_when_get_all_metadata_absent(self):
+        """
+        A collection object with NO get_all_metadata attribute at all (e.g.
+        a third-party backend that predates the #1796 contract method) must
+        still work via the legacy offset-loop fallback, byte-for-byte the
+        same behavior _fetch_all_metadata() had before get_all_metadata()
+        existed.
+        """
+        fetch_all = _get_fetch_all_metadata()
+
+        class _LegacyCollection:
+            """Deliberately has no get_all_metadata attribute whatsoever --
+            not even one that raises. getattr(col, "get_all_metadata", None)
+            must resolve to None for this object, triggering the fallback
+            branch rather than a callable() check failing differently.
+            """
+
+            def __init__(self):
+                self._data = [{"wing": "x"}, {"wing": "y"}, {"wing": "z"}]
+
+            def count(self):
+                return len(self._data)
+
+            def get(self, *, include, limit, offset, where=None):
+                page = self._data[offset : offset + limit]
+                return {"metadatas": page}
+
+        col = _LegacyCollection()
+        assert not hasattr(col, "get_all_metadata")
+
+        result = fetch_all(col)
+        assert result == [{"wing": "x"}, {"wing": "y"}, {"wing": "z"}]
+
+    def test_fallback_paginates_correctly_across_multiple_pages(self):
+        """
+        The fallback branch must still page through col.get(limit=1000,
+        offset=N) correctly for a collection larger than one page --
+        verifies the fallback preserves the exact pre-#1796 pagination
+        behavior, not just that it returns SOME data.
+        """
+        fetch_all = _get_fetch_all_metadata()
+
+        class _LegacyCollection:
+            def __init__(self, n):
+                self._data = [{"wing": f"w{i}"} for i in range(n)]
+                self.get_calls = []
+
+            def count(self):
+                return len(self._data)
+
+            def get(self, *, include, limit, offset, where=None):
+                self.get_calls.append((limit, offset))
+                page = self._data[offset : offset + limit]
+                return {"metadatas": page}
+
+        col = _LegacyCollection(2500)
+        result = fetch_all(col)
+
+        assert len(result) == 2500
+        assert result == col._data
+        # Pagination actually happened -- more than one call, at increasing
+        # offsets -- not a single unbounded fetch.
+        assert len(col.get_calls) >= 3
+        offsets = [offset for _, offset in col.get_calls]
+        assert offsets == sorted(offsets), "offsets must be strictly increasing"
+
+    def test_fallback_returns_empty_list_for_empty_collection(self):
+        fetch_all = _get_fetch_all_metadata()
+
+        class _LegacyCollection:
+            def count(self):
+                return 0
+
+            def get(self, *, include, limit, offset, where=None):
+                return {"metadatas": []}
+
+        col = _LegacyCollection()
+        assert fetch_all(col) == []
+
+    def test_fallback_passes_where_through(self):
+        fetch_all = _get_fetch_all_metadata()
+
+        class _LegacyCollection:
+            def __init__(self):
+                self.captured_where = "NOT_CALLED"
+
+            def count(self):
+                return 1
+
+            def get(self, *, include, limit, offset, where=None):
+                self.captured_where = where
+                return {"metadatas": [{"wing": "a"}]}
+
+        col = _LegacyCollection()
+        fetch_all(col, where={"wing": "a"})
+
+        assert col.captured_where == {"wing": "a"}
diff --git a/tests/test_repair.py b/tests/test_repair.py
index 8824dcea4..b0743a842 100644
--- a/tests/test_repair.py
+++ b/tests/test_repair.py
@@ -312,6 +312,32 @@ def test_rebuild_index_empty_palace(mock_backend_cls, mock_shutil, tmp_path):
     mock_backend.delete_collection.assert_not_called()
 
 
+@patch("mempalace.repair.ChromaBackend")
+def test_rebuild_index_read_failure_points_to_from_sqlite(mock_backend_cls, tmp_path):
+    """A chromadb HNSW compactor failure makes the first ``count()`` read
+    raise; rebuild_index cannot recover it, so it must direct the user to
+    ``repair --mode from-sqlite`` (rows are intact in chroma.sqlite3) rather
+    than re-mining from source files, which drops MCP-added drawers (#1843)."""
+    sqlite3.connect(str(tmp_path / "chroma.sqlite3")).close()
+    mock_col = MagicMock()
+    mock_col.count.side_effect = Exception("Failed to apply logs to the hnsw segment writer")
+    mock_backend_cls.return_value.get_collection.return_value = mock_col
+    msgs: list[str] = []
+    repair.rebuild_index(palace_path=str(tmp_path), progress=msgs.append)
+    out = "\n".join(msgs)
+    assert "mempalace repair --mode from-sqlite --archive-existing" in out
+    assert "may need to be re-mined" not in out
+
+
+def test_index_read_recovery_guidance_recommends_from_sqlite():
+    """The shared guidance helper names the from-sqlite recovery command in
+    full and never tells the user the palace ``may need to be re-mined`` —
+    the harmful pre-#1843 advice that silently drops MCP-added drawers."""
+    msg = repair.index_read_recovery_guidance()
+    assert "mempalace repair --mode from-sqlite --archive-existing" in msg
+    assert "may need to be re-mined" not in msg
+
+
 @patch("mempalace.repair.shutil")
 @patch("mempalace.repair.ChromaBackend")
 def test_rebuild_index_success(mock_backend_cls, mock_shutil, tmp_path):
@@ -1995,3 +2021,26 @@ def test_rebuild_index_calls_vacuum(mock_backend_cls, mock_shutil, tmp_path):
         args, kwargs = mock_vacuum.call_args
         assert args[0] == str(tmp_path)
         assert "progress" in kwargs
+
+
+def test_rebuild_from_sqlite_preserves_knowledge_graph_sidecar(tmp_path):
+    """The from-sqlite repair path must not drop the KG SQLite sidecar."""
+    src = tmp_path / "source"
+    dest = tmp_path / "dest"
+    src.mkdir()
+    dest.mkdir()
+
+    (src / "knowledge_graph.sqlite3").write_text("kg-db", encoding="utf-8")
+    (src / "knowledge_graph.sqlite3-wal").write_text("kg-wal", encoding="utf-8")
+    (src / "knowledge_graph.sqlite3-shm").write_text("kg-shm", encoding="utf-8")
+
+    copied = repair._preserve_knowledge_graph_sqlite(str(src), str(dest))
+
+    assert copied == [
+        "knowledge_graph.sqlite3",
+        "knowledge_graph.sqlite3-wal",
+        "knowledge_graph.sqlite3-shm",
+    ]
+    assert (dest / "knowledge_graph.sqlite3").read_text(encoding="utf-8") == "kg-db"
+    assert (dest / "knowledge_graph.sqlite3-wal").read_text(encoding="utf-8") == "kg-wal"
+    assert (dest / "knowledge_graph.sqlite3-shm").read_text(encoding="utf-8") == "kg-shm"
diff --git a/tests/test_searcher.py b/tests/test_searcher.py
index 236d5b98d..c99ae6537 100644
--- a/tests/test_searcher.py
+++ b/tests/test_searcher.py
@@ -9,7 +9,49 @@
 
 import pytest
 
-from mempalace.searcher import SearchError, search, search_memories
+from mempalace.searcher import SearchError, build_where_filter, search, search_memories
+
+
+# ── build_where_filter (unit) ──────────────────────────────────────────
+
+
+class TestBuildWhereFilter:
+    """build_where_filter composes a ChromaDB where clause from optional
+    wing / room / source_file constraints (#1815). ChromaDB needs a ``$and``
+    only when ≥2 clauses are present; a single clause is returned bare and
+    zero clauses yield an empty filter."""
+
+    def test_no_filters_returns_empty(self):
+        assert build_where_filter() == {}
+
+    def test_wing_only(self):
+        assert build_where_filter(wing="backend") == {"wing": "backend"}
+
+    def test_room_only(self):
+        assert build_where_filter(room="auth") == {"room": "auth"}
+
+    def test_wing_and_room(self):
+        assert build_where_filter(wing="backend", room="auth") == {
+            "$and": [{"wing": "backend"}, {"room": "auth"}]
+        }
+
+    def test_source_file_only(self):
+        assert build_where_filter(source_file="auth.py") == {"source_file": "auth.py"}
+
+    def test_wing_and_source_file(self):
+        assert build_where_filter(wing="backend", source_file="auth.py") == {
+            "$and": [{"wing": "backend"}, {"source_file": "auth.py"}]
+        }
+
+    def test_room_and_source_file(self):
+        assert build_where_filter(room="auth", source_file="auth.py") == {
+            "$and": [{"room": "auth"}, {"source_file": "auth.py"}]
+        }
+
+    def test_wing_room_and_source_file(self):
+        assert build_where_filter(wing="backend", room="auth", source_file="auth.py") == {
+            "$and": [{"wing": "backend"}, {"room": "auth"}, {"source_file": "auth.py"}]
+        }
 
 
 # ── search_memories (API) ──────────────────────────────────────────────
@@ -34,6 +76,68 @@ def test_wing_and_room_filter(self, palace_path, seeded_collection):
         result = search_memories("code", palace_path, wing="project", room="frontend")
         assert all(r["wing"] == "project" and r["room"] == "frontend" for r in result["results"])
 
+    def test_source_file_filter(self, palace_path, seeded_collection):
+        result = search_memories("authentication module", palace_path, source_file="auth.py")
+        assert result["results"], "exact source_file match should return its drawer"
+        assert all(r["source_file"] == "auth.py" for r in result["results"])
+
+    def test_source_file_with_wing_filter(self, palace_path, seeded_collection):
+        result = search_memories("database", palace_path, wing="project", source_file="db.py")
+        assert result["results"]
+        assert all(
+            r["source_file"] == "db.py" and r["wing"] == "project" for r in result["results"]
+        )
+
+    def test_nonmatching_source_file_returns_empty_not_error(self, palace_path, seeded_collection):
+        result = search_memories("authentication", palace_path, source_file="nope.md")
+        assert "error" not in result
+        assert result["results"] == []
+
+    def test_filters_envelope_includes_source_file(self, palace_path, seeded_collection):
+        result = search_memories("authentication", palace_path, source_file="auth.py")
+        assert result["filters"]["source_file"] == "auth.py"
+
+    def test_result_exposes_full_source_path(self, palace_path, seeded_collection):
+        # The displayed source_file is a basename; source_path carries the full
+        # stored value so a caller can round-trip it back into a source_file filter.
+        result = search_memories("authentication module", palace_path)
+        hit = result["results"][0]
+        assert hit["source_file"] == "auth.py"
+        assert hit["source_path"] == "auth.py"
+
+    def test_source_file_filter_matches_full_path_not_basename(self, palace_path):
+        from mempalace.palace import get_collection
+
+        col = get_collection(palace_path, create=True)
+        col.upsert(
+            ids=["fp1"],
+            documents=["The deploy script restarts the gunicorn workers nightly."],
+            metadatas=[{"wing": "ops", "room": "deploy", "source_file": "/srv/app/deploy.sh"}],
+        )
+        # The full stored path matches and round-trips via source_path.
+        hit = search_memories(
+            "deploy gunicorn workers", palace_path, source_file="/srv/app/deploy.sh"
+        )
+        assert [h["source_path"] for h in hit["results"]] == ["/srv/app/deploy.sh"]
+        assert [h["source_file"] for h in hit["results"]] == ["deploy.sh"]
+        # The basename does NOT match — exact full-path semantics only (issue v1).
+        miss = search_memories("deploy gunicorn workers", palace_path, source_file="deploy.sh")
+        assert miss["results"] == []
+
+    def test_source_file_filter_honored_in_bm25_fallback(self, palace_path, seeded_collection):
+        # vector_disabled routes through _bm25_only_via_sqlite (#1222); the
+        # source_file filter must hold there too, not silently no-op.
+        result = search_memories(
+            "authentication module",
+            palace_path,
+            source_file="auth.py",
+            vector_disabled=True,
+            collection_name="mempalace_drawers",
+        )
+        assert "error" not in result
+        assert result["results"], "BM25 fallback should still find the auth drawer"
+        assert all(r["source_file"] == "auth.py" for r in result["results"])
+
     def test_n_results_limit(self, palace_path, seeded_collection):
         result = search_memories("code", palace_path, n_results=2)
         assert len(result["results"]) <= 2
diff --git a/tests/test_sqlite_exact_backend.py b/tests/test_sqlite_exact_backend.py
index 82322d35d..796930559 100644
--- a/tests/test_sqlite_exact_backend.py
+++ b/tests/test_sqlite_exact_backend.py
@@ -139,6 +139,159 @@ def test_sqlite_exact_get_preserves_requested_id_order_and_duplicates(tmp_path):
     assert result.documents == ["doc b", "doc a", "doc b"]
 
 
+def _doc_select_sql(col, action):
+    """Run ``action`` while tracing SQL; return (result, [documents SELECTs]).
+
+    The documents-table scan in ``_rows`` is the only statement that is both
+    ``FROM documents`` and ``ORDER BY rowid`` (``count`` lacks the ORDER BY),
+    so filtering on both isolates it from collection-id lookups and commits.
+    """
+    statements = []
+    conn = col._handle.conn
+    conn.set_trace_callback(statements.append)
+    try:
+        result = action()
+    finally:
+        conn.set_trace_callback(None)
+    selects = [s for s in statements if "FROM documents" in s and "ORDER BY rowid" in s]
+    return result, selects
+
+
+def _seed(col, n):
+    col.add(
+        ids=[f"d{i}" for i in range(n)],
+        documents=[f"doc {i}" for i in range(n)],
+        metadatas=[{"wing": "w", "n": i} for i in range(n)],
+        embeddings=[[float(i), 1.0] for i in range(n)],
+    )
+
+
+def test_sqlite_exact_get_unfiltered_page_pushes_limit_offset(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 10)
+
+    result, selects = _doc_select_sql(
+        col, lambda: col.get(limit=3, offset=2, include=["documents"])
+    )
+
+    assert result.ids == ["d2", "d3", "d4"]
+    assert result.documents == ["doc 2", "doc 3", "doc 4"]
+    assert len(selects) == 1
+    assert "LIMIT" in selects[0]
+    assert "OFFSET" in selects[0]
+
+
+def test_sqlite_exact_get_filtered_page_stays_on_full_scan(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 6)
+
+    # With a filter the rows are dropped after the scan, so LIMIT/OFFSET must
+    # not reach SQL; the page is taken in Python over the filtered rows.
+    result, selects = _doc_select_sql(
+        col,
+        lambda: col.get(where={"wing": "w"}, limit=2, offset=1, include=["metadatas"]),
+    )
+
+    assert result.ids == ["d1", "d2"]
+    assert len(selects) == 1
+    assert "LIMIT" not in selects[0]
+    assert "OFFSET" not in selects[0]
+
+
+def test_sqlite_exact_get_offset_only_and_limit_only_push(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 5)
+
+    limit_only, limit_sql = _doc_select_sql(col, lambda: col.get(limit=2))
+    assert limit_only.ids == ["d0", "d1"]
+    assert len(limit_sql) == 1
+    assert "LIMIT" in limit_sql[0]
+    assert "OFFSET" not in limit_sql[0]
+
+    offset_only, offset_sql = _doc_select_sql(col, lambda: col.get(offset=3))
+    assert offset_only.ids == ["d3", "d4"]
+    assert len(offset_sql) == 1
+    assert "OFFSET" in offset_sql[0]
+    # SQLite requires a LIMIT before OFFSET; an offset-only page uses LIMIT -1.
+    assert "LIMIT" in offset_sql[0]
+
+
+def test_sqlite_exact_get_negative_bounds_use_python_slice(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 5)
+
+    # Negative limit means Python "all but last", which a SQL LIMIT (negative ==
+    # unbounded in SQLite) cannot express, so it must stay on the slice path.
+    neg_limit, neg_limit_sql = _doc_select_sql(col, lambda: col.get(limit=-1))
+    assert neg_limit.ids == ["d0", "d1", "d2", "d3"]
+    assert len(neg_limit_sql) == 1
+    assert "LIMIT" not in neg_limit_sql[0]
+
+    # Negative offset means Python "last N"; it must not reach SQL either.
+    neg_offset, neg_offset_sql = _doc_select_sql(col, lambda: col.get(offset=-2))
+    assert neg_offset.ids == ["d3", "d4"]
+    assert len(neg_offset_sql) == 1
+    assert "OFFSET" not in neg_offset_sql[0]
+
+
+def test_sqlite_exact_get_pages_tile_without_overlap(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 10)
+
+    seen = []
+    offset = 0
+    while True:
+        page = col.get(limit=4, offset=offset)
+        if not page.ids:
+            break
+        seen.extend(page.ids)
+        offset += len(page.ids)
+
+    assert seen == [f"d{i}" for i in range(10)]
+    # The same set, same rowid order, as a single unfiltered scan.
+    assert col.get().ids == seen
+
+
+def test_sqlite_exact_get_limit_zero_pushes_empty_page(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 3)
+
+    # limit=0 is a real bound, not "no limit": it pushes LIMIT 0 and returns
+    # nothing, matching the old rows[:0] slice. Guards the `is not None` check
+    # against an `if limit:` regression that would treat 0 as unbounded.
+    result, selects = _doc_select_sql(col, lambda: col.get(limit=0))
+    assert result.ids == []
+    assert len(selects) == 1
+    assert "LIMIT" in selects[0]
+
+
+def test_sqlite_exact_get_offset_zero_is_a_full_scan(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 3)
+
+    # offset=0 with no limit is not a page request, so it stays on the full scan.
+    result, selects = _doc_select_sql(col, lambda: col.get(offset=0))
+    assert result.ids == ["d0", "d1", "d2"]
+    assert len(selects) == 1
+    assert "LIMIT" not in selects[0]
+    assert "OFFSET" not in selects[0]
+
+
+def test_sqlite_exact_get_ids_with_page_slices_in_python(tmp_path):
+    _backend, col = _collection(tmp_path)
+    _seed(col, 5)
+
+    # ids force the Python path even with a page: the requested order is kept,
+    # then offset/limit slice the reordered list with no SQL LIMIT/OFFSET.
+    result, selects = _doc_select_sql(
+        col, lambda: col.get(ids=["d4", "d3", "d2", "d1"], offset=1, limit=2)
+    )
+    assert result.ids == ["d3", "d2"]
+    assert len(selects) == 1
+    assert "LIMIT" not in selects[0]
+    assert "OFFSET" not in selects[0]
+
+
 def test_sqlite_exact_upsert_delete_and_multi_collection_isolation(tmp_path):
     backend, drawers = _collection(tmp_path, "drawers")
     palace = PalaceRef(id=str(tmp_path), local_path=str(tmp_path))
diff --git a/tests/test_sync.py b/tests/test_sync.py
index d32db688c..148bdc61c 100644
--- a/tests/test_sync.py
+++ b/tests/test_sync.py
@@ -1355,16 +1355,16 @@ def test_apply_flag_deletes(self, monkeypatch, tmp_dir, synced_world, capsys):
     def test_cli_emits_wal_on_apply(self, monkeypatch, synced_world):
         """F8 regression: cmd_sync must wire `_wal_log` so CLI deletes are
         audited. Without this, scripted CLI invocations leave no trail."""
-        from mempalace import cli, mcp_server
+        from mempalace import cli, wal
 
         seen = []
-        original = mcp_server._wal_log
+        original = wal._wal_log
 
         def recording_wal(operation, params, result=None):
             seen.append((operation, params, result))
             original(operation, params, result)
 
-        monkeypatch.setattr(mcp_server, "_wal_log", recording_wal)
+        monkeypatch.setattr(wal, "_wal_log", recording_wal)
 
         argv = [
             "mempalace",
@@ -1397,3 +1397,93 @@ def test_apply_without_scope_exits_2(self, monkeypatch, synced_world, capsys):
         with pytest.raises(SystemExit) as exc_info:
             cli.main()
         assert exc_info.value.code == 2
+
+
+class TestServiceRunSyncReport:
+    """Daemon path (service.run_sync) must render the same report shape as the
+    direct CLI path, with no KeyError on report['deleted'] (regression: the old
+    code read a non-existent 'deleted' key and dropped no_source/out_of_scope/
+    by_source and the Re-run/Removed hints).
+
+    sync_palace is mocked so the test exercises only run_sync's report
+    formatting — opening the real Chroma collection reinitializes the embedder,
+    which disturbs sys.stdout and defeats capsys.
+    """
+
+    @pytest.fixture(autouse=True)
+    def _cache_mcp_server_import(self):
+        """run_sync lazily imports mempalace.mcp_server, whose import initializes
+        the embedder and rebinds sys.stdout — defeating capsys for any prints
+        after sync_palace returns. Lazy-load it here, scoped to just these report
+        tests (not the whole module at collection time), so the import is a cached
+        no-op by the time run_sync runs and its report output stays capturable.
+        """
+        import mempalace.mcp_server  # noqa: F401
+
+        yield
+
+    def _fake_report(self, **overrides):
+        report = {
+            "scanned": 6,
+            "kept": 1,
+            "gitignored": 2,
+            "missing": 1,
+            "no_source": 1,
+            "out_of_scope": 1,
+            "removed_drawers": 0,
+            "removed_closets": 0,
+            "dry_run": True,
+            "by_source": {"src/a.py": 2, "src/b.py": 1},
+        }
+        report.update(overrides)
+        return report
+
+    def test_dry_run_renders_full_report(self, monkeypatch, tmp_dir, capsys):
+        import mempalace.sync as sync_module
+        from mempalace import service
+
+        palace = os.path.join(tmp_dir, "palace")
+        os.makedirs(palace)
+        # Satisfy run_sync's detect_backend_for_path guard without spinning up
+        # the real Chroma/embedder stack (which would disturb sys.stdout).
+        Path(palace, "chroma.sqlite3").touch()
+        monkeypatch.setattr(
+            sync_module,
+            "sync_palace",
+            lambda **kw: self._fake_report(dry_run=True),
+        )
+        result = service.run_sync({"palace_path": palace, "dir": tmp_dir, "dry_run": True})
+        assert result["success"] is True
+        out = capsys.readouterr().out
+        # The fields the stripped daemon report used to drop.
+        assert "No source:" in out
+        assert "Out of scope:" in out
+        # by_source top sources block.
+        assert "Top sources to remove" in out
+        assert "src/a.py  (2)" in out
+        # Re-run hint fires when there is something to remove.
+        assert "Re-run with --apply" in out
+        # The old KeyError line must not be present.
+        assert "Deleted:" not in out
+
+    def test_apply_renders_removed_counts(self, monkeypatch, tmp_dir, capsys):
+        import mempalace.sync as sync_module
+        from mempalace import service
+
+        palace = os.path.join(tmp_dir, "palace")
+        os.makedirs(palace)
+        Path(palace, "chroma.sqlite3").touch()
+        monkeypatch.setattr(
+            sync_module,
+            "sync_palace",
+            lambda **kw: self._fake_report(
+                dry_run=False, removed_drawers=3, removed_closets=2, by_source={"src/a.py": 3}
+            ),
+        )
+        result = service.run_sync({"palace_path": palace, "dir": tmp_dir, "dry_run": False})
+        assert result["success"] is True
+        out = capsys.readouterr().out
+        # Apply mode prints the removed-drawers/closets line, not the Re-run hint.
+        assert "Removed 3 drawers, 2 closets" in out
+        assert "Top sources removed" in out
+        assert "Re-run with --apply" not in out
diff --git a/tests/test_wal.py b/tests/test_wal.py
new file mode 100644
index 000000000..69a263af7
--- /dev/null
+++ b/tests/test_wal.py
@@ -0,0 +1,42 @@
+import subprocess
+import sys
+
+
+def test_wal_import_has_no_mcp_server_side_effect():
+    """Importing mempalace.wal must NOT import mempalace.mcp_server.
+
+    mcp_server installs MCP stdio protection at import time (os.dup2(2, 1) and
+    sys.stdout = sys.stderr). The CLI sync path and the daemon service layer
+    obtain _wal_log from mempalace.wal precisely so they can audit writes
+    without triggering that process-global redirect. Run in a fresh subprocess
+    so the already-imported mcp_server in this test session can't mask a
+    regression.
+    """
+    code = (
+        "import sys\n"
+        "import mempalace.wal\n"
+        "assert 'mempalace.mcp_server' not in sys.modules, "
+        "'importing mempalace.wal pulled in mempalace.mcp_server'\n"
+        "print('ok')\n"
+    )
+    result = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True)
+    assert result.returncode == 0, result.stderr
+    assert "ok" in result.stdout
+
+
+def test_wal_log_redacts_and_writes(tmp_path, monkeypatch):
+    """_wal_log lives in mempalace.wal now; smoke-test redaction + write there."""
+    import json
+
+    from mempalace import wal
+
+    wal_file = tmp_path / "wal" / "write_log.jsonl"
+    monkeypatch.setattr(wal, "_WAL_FILE", wal_file)
+    monkeypatch.setattr(wal, "_WAL_INITIALIZED_DIR", None)
+
+    wal._wal_log("op", {"entry": "secret diary text", "safe": "ok"})
+
+    entry = json.loads(wal_file.read_text().strip())
+    assert entry["operation"] == "op"
+    assert entry["params"]["entry"].startswith("[REDACTED")
+    assert entry["params"]["safe"] == "ok"
diff --git a/uv.lock b/uv.lock
index 17efd938c..f83a0dce1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1951,7 +1951,7 @@ wheels = [
 
 [[package]]
 name = "mempalace"
-version = "3.4.1"
+version = "3.5.0"
 source = { editable = "." }
 dependencies = [
     { name = "chromadb" },
@@ -1983,6 +1983,8 @@ dev = [
     { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "pytest-cov" },
+    { name = "pytest-rerunfailures", version = "16.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pytest-rerunfailures", version = "16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "ruff" },
 ]
 dml = [
@@ -2019,6 +2021,8 @@ dev = [
     { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
     { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "pytest-cov" },
+    { name = "pytest-rerunfailures", version = "16.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pytest-rerunfailures", version = "16.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "ruff" },
 ]
 
@@ -2039,9 +2043,10 @@ requires-dist = [
     { name = "psycopg", extras = ["binary"], marker = "extra == 'pgvector'", specifier = ">=3.1" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
     { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
+    { name = "pytest-rerunfailures", marker = "extra == 'dev'", specifier = ">=12.0" },
     { name = "python-dateutil", specifier = ">=2.8" },
     { name = "pyyaml", specifier = ">=6.0,<7" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.15.15" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = "==0.15.18" },
     { name = "striprtf", marker = "extra == 'extract'", specifier = ">=0.0.27" },
     { name = "tokenizers", specifier = ">=0.15" },
     { name = "tomli", marker = "python_full_version < '3.11'", specifier = ">=2.0.0" },
@@ -2056,7 +2061,8 @@ dev = [
     { name = "psutil", specifier = ">=5.9" },
     { name = "pytest", specifier = ">=7.0" },
     { name = "pytest-cov", specifier = ">=4.0" },
-    { name = "ruff", specifier = "==0.15.15" },
+    { name = "pytest-rerunfailures", specifier = ">=12.0" },
+    { name = "ruff", specifier = "==0.15.18" },
 ]
 
 [[package]]
@@ -4437,6 +4443,49 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" },
 ]
 
+[[package]]
+name = "pytest-rerunfailures"
+version = "16.0.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/26/53/a543a76f922a5337d10df22441af8bf68f1b421cadf9aedf8a77943b81f6/pytest_rerunfailures-16.0.1.tar.gz", hash = "sha256:ed4b3a6e7badb0a720ddd93f9de1e124ba99a0cb13bc88561b3c168c16062559", size = 27612, upload-time = "2025-09-02T06:48:25.193Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/73/67dc14cda1942914e70fbb117fceaf11e259362c517bdadd76b0dd752524/pytest_rerunfailures-16.0.1-py3-none-any.whl", hash = "sha256:0bccc0e3b0e3388275c25a100f7077081318196569a121217688ed05e58984b9", size = 13610, upload-time = "2025-09-02T06:48:23.615Z" },
+]
+
+[[package]]
+name = "pytest-rerunfailures"
+version = "16.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.15' and sys_platform == 'win32'",
+    "python_full_version >= '3.15' and sys_platform != 'win32'",
+    "python_full_version == '3.14.*' and sys_platform == 'win32'",
+    "python_full_version == '3.14.*' and sys_platform != 'win32'",
+    "python_full_version == '3.13.*' and sys_platform == 'win32'",
+    "python_full_version == '3.13.*' and sys_platform != 'win32'",
+    "python_full_version == '3.12.*' and sys_platform == 'win32'",
+    "python_full_version == '3.12.*' and sys_platform != 'win32'",
+    "python_full_version == '3.11.*' and sys_platform == 'win32'",
+    "python_full_version == '3.11.*' and sys_platform != 'win32'",
+    "python_full_version == '3.10.*' and sys_platform == 'win32'",
+    "python_full_version == '3.10.*' and sys_platform != 'win32'",
+]
+dependencies = [
+    { name = "packaging", marker = "python_full_version >= '3.10'" },
+    { name = "pytest", version = "9.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/4d/f0/74f8e685be7ecd1572c1256132f18fce3a665d7e07649a3f23b7eb2d3bec/pytest_rerunfailures-16.3.tar.gz", hash = "sha256:37c9b1231c8083e9f4e724f50f7a21241822f9516c15c700ebbf218d6452355c", size = 34148, upload-time = "2026-05-22T06:51:22.292Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/98/58a71d68d3126d7f6a6ed1944c37ec207a4ff3dc66cad3bed7b59d38df61/pytest_rerunfailures-16.3-py3-none-any.whl", hash = "sha256:6bdfb8ffb46c46072e6c16bdedee38b6c13eac620d9415ed5b63152cbf283170", size = 15396, upload-time = "2026-05-22T06:51:20.547Z" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -5019,27 +5068,27 @@ wheels = [
 
 [[package]]
 name = "ruff"
-version = "0.15.15"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/84/6f/a76f7d96e5c962f5b69cee865e49c15c1116897c01990faa8a57edb62e7f/ruff-0.15.15.tar.gz", hash = "sha256:b8dff018130b46d8e5bf0f926ef6b60cf871d6d5ae45fc9334e09632daa741d6", size = 4706985, upload-time = "2026-05-28T14:16:57.784Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/9d/3a45c05b8ab04b4705989de70a79008e27c8003296a0feaee9edc18dd7e9/ruff-0.15.15-py3-none-linux_armv6l.whl", hash = "sha256:cf93e5388f412e1b108b1f8b34a6e036b70fe8aff89393befad96fe48670311b", size = 10710652, upload-time = "2026-05-28T14:16:06.701Z" },
-    { url = "https://files.pythonhosted.org/packages/05/66/da974431624bf3b49f6ee1f9543c02d929ff1cba78b0d5a79c38cf21f744/ruff-0.15.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ac5a646d1f6a7dadd5d50842dae2c1f9862ac887ef5d1b1375e02def791fde6e", size = 11096615, upload-time = "2026-05-28T14:16:23.313Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/09/7443452e5d290230a712103f2fdceeef7184f3ec99a2bd01c8be78aaceb5/ruff-0.15.15-py3-none-macosx_11_0_arm64.whl", hash = "sha256:77d955a431430c66f72dd94e379ad38a16daea3d25094872ac4edf9e797be530", size = 10436683, upload-time = "2026-05-28T14:16:40.974Z" },
-    { url = "https://files.pythonhosted.org/packages/53/01/d330c26a57fa4f3943a14424904027428315b700fe4d14a84bb123a649e5/ruff-0.15.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7614ee79c69788cf6cedd568069ade9cecc22a1ad20494efe8d0c9ebb4b622d4", size = 10769064, upload-time = "2026-05-28T14:16:28.905Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/85/cc8770f8bdff541b1da8392d1634141fe4a0e3f4ee596605959b7906c27f/ruff-0.15.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3cdb1679e06a1f6b47bc384714ae96f6e2fb65ca441eb78c43d2ca554176ce1f", size = 10511987, upload-time = "2026-05-28T14:16:43.732Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/29/8c190c1472b63013583ba391f3342036e02010544c1270455ed8e519bdf3/ruff-0.15.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2728b93d7b23a603ea2c0ac6eb73d760bd38ec9de35f35fb41e18f7a3fee7622", size = 11275100, upload-time = "2026-05-28T14:16:55.244Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/6b/7e145ce2cc8e63d6834eca03d83a0e18d121def5c69f91b4cf4011ed4879/ruff-0.15.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be582fcc0db438902c7792b08d6ddf6c9b9e21addaa10092c2c741cfb09e5a45", size = 12176903, upload-time = "2026-05-28T14:16:14.368Z" },
-    { url = "https://files.pythonhosted.org/packages/80/a3/d5974637f68e451f7fadf015cf3101d1cd7d8ba5027cffe0b9e3826ebe6b/ruff-0.15.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7aa77465b8ecaf1a27bea098d696f7fed5e1eccbd10b321b682d6de586ae5627", size = 11404550, upload-time = "2026-05-28T14:16:20.138Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/1c/e6e5e568f22be4fb05d6244234aba384c06b451252453b821e1a529263cf/ruff-0.15.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48decfa11d740de4889de623be1463308346312f2409a56e24aa280c86162dc4", size = 11382027, upload-time = "2026-05-28T14:16:46.615Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/01/170921b49fcd2e8858825593f91cf7146c3e40a5c3e6df763e4bb0484dde/ruff-0.15.15-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:a5015088452ca0081387063649ec67f06d3d1d6b8b936a1f836b5e9657ecd48c", size = 11366041, upload-time = "2026-05-28T14:16:26.247Z" },
-    { url = "https://files.pythonhosted.org/packages/87/54/a7bad711d7de93254e15e06a4c375b89a03d18de45d3e5dcc86a4472fb1a/ruff-0.15.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:f5294aab6356c81600fcdea3a62bb1b924dfd5e91767c12318d3f68f86af57cd", size = 10741795, upload-time = "2026-05-28T14:16:17.11Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/31/38c075963668f8b41c6914ee0f6f318727fbe30ab9145cb29e6df464c5fa/ruff-0.15.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:db5bd4d802415cca656dc1616070b725952d6ae95eb5d4831e49fbd94a38f75f", size = 10511117, upload-time = "2026-05-28T14:16:31.767Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/96/6ff689e1f7e375d1d97075eca022f74c2bab59554a432fe4d2e6f091986a/ruff-0.15.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:587a6278ed42059191c1a466e490bd7930fb50bd2e255398bc29616c895a61cb", size = 10994867, upload-time = "2026-05-28T14:16:35.149Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/c2/5dce0ab9f92a8d534fa62b9bf9caca3eddb8c1a81b616f5e195ada4f0d6e/ruff-0.15.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:df0c1c084f5f4be9812f61518a45c440d3c30d69ce4bf6c5270e66d38338f02a", size = 11482101, upload-time = "2026-05-28T14:16:49.598Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/c0/1003b60edd697c649faf61f1a34094b1abb38fb3d1181e3f895781250a08/ruff-0.15.15-py3-none-win32.whl", hash = "sha256:29428ea79694afbe756d45fd59b36f22b6b020dc0443cf7de0173046236964b9", size = 10716774, upload-time = "2026-05-28T14:16:52.337Z" },
-    { url = "https://files.pythonhosted.org/packages/02/a8/1269eddd6945a06c23f055ef7848886e37cf9d6a8bebb386a3115f01470c/ruff-0.15.15-py3-none-win_amd64.whl", hash = "sha256:8df0323902e15e24bc4bf246da830573d3cf3352bd0b9a164eab335d111ff4a4", size = 11868463, upload-time = "2026-05-28T14:16:11.333Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/b2/920464c907b191e37469d477a1aa8bc048b8f36c4c1610dfa4ab87b39e18/ruff-0.15.15-py3-none-win_arm64.whl", hash = "sha256:3c8ceca6792f38196b8f589bc92eccd03eef286602da92e5dc05cc42ef6441b7", size = 11138498, upload-time = "2026-05-28T14:16:38.425Z" },
+version = "0.15.18"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/74/98/1295ad5a5aa9bc85bdcdfa5d82fe7b49c61af5657df4f227637ff9de0da6/ruff-0.15.18.tar.gz", hash = "sha256:2698a964c70e8bf402dcb99c8810472d270d141e7aa8c4e13599fd52033a2f33", size = 4761437, upload-time = "2026-06-18T18:25:39.224Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b9/d0/686e984941269621e2be72612d5c1e461f8f7b38415a2a7d7a81c8ae6715/ruff-0.15.18-py3-none-linux_armv6l.whl", hash = "sha256:8b6850172348c8381b8b3084c5915a4393c2373b9b54cd5b5e1ea15812bc10df", size = 10887308, upload-time = "2026-06-18T18:25:03.062Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/21/bc4123e3f5515ee99f8ce1eb93a14a0628fe4d1678663cd08f933ac16931/ruff-0.15.18-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3fccc153a85417dcd976883160cacce486997b0a0058dd18f54b8aaaac7d1ce2", size = 11281305, upload-time = "2026-06-18T18:25:30.026Z" },
+    { url = "https://files.pythonhosted.org/packages/51/93/4769464c25cf7ab2acb3c7dda9cad3d867eb41c59565b3e2a9d17249c90c/ruff-0.15.18-py3-none-macosx_11_0_arm64.whl", hash = "sha256:08d4c86a68f2c3ec2c9d56380a71fb4a4f65373055cbb8caabd645e9102f38d4", size = 10641215, upload-time = "2026-06-18T18:25:15.802Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/42/56926d17120db2c208d76bf60a1a019644dd9e91dc27f0f95c9caddb1366/ruff-0.15.18-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37e5108745c2c0705da916d7d4de533ddf547051ef45f62888c31bae73f66318", size = 10957224, upload-time = "2026-06-18T18:25:36.955Z" },
+    { url = "https://files.pythonhosted.org/packages/22/4f/d43fab8d8189afde803103022d000a8ef9f230616d436d52a8b2b8d63b50/ruff-0.15.18-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56949a6ce8b3abde54c0bcb22cebfe57e8771cadc84b407ae8b8eaf67ebdcd43", size = 10699024, upload-time = "2026-06-18T18:25:05.707Z" },
+    { url = "https://files.pythonhosted.org/packages/63/42/1e3e4c68bd408b9768cf3e439acbe2c78245225faef253f7028a0cdb63e0/ruff-0.15.18-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01a754cd6a1b630d3f97e33eb452cf7a98040482318e870f8bc52a5a30e62657", size = 11491458, upload-time = "2026-06-18T18:25:20.275Z" },
+    { url = "https://files.pythonhosted.org/packages/20/77/47a3484bea8521e14a203d98c389c5c97846675e4f02734672da4a69b52a/ruff-0.15.18-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ba7a07e03a44dbf10bb086ee06705b173625014ec99f73a7e6836a5e5590a0c", size = 12383752, upload-time = "2026-06-18T18:25:22.535Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/ca/054159590787023d83b658a1a1819c4c8910114e7015069340b71c0961cb/ruff-0.15.18-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a2c40a41a4cadbcf5897b548ab29dfe248b20c540961c0247d98a3973c70403", size = 11577923, upload-time = "2026-06-18T18:25:10.702Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ff/d353d6b7bbd73cc0ec37f4463d7540e45e894338abdd9964eee0de332708/ruff-0.15.18-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f0480ce690cbb6c4db6e5d08f19fce98e10ba131a8b60c1bcdac42771e3ae2d", size = 11583925, upload-time = "2026-06-18T18:25:32.391Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/4a/891f89b9c296ed3e5f3ece1a5629badc989d9a8fdaa30431aaf4774bc1c2/ruff-0.15.18-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:2330215f1f393fa8733f55edce04fcf94c36a2c460fcde31f78cc84e4951e9b1", size = 11582834, upload-time = "2026-06-18T18:25:27.309Z" },
+    { url = "https://files.pythonhosted.org/packages/32/a3/ed9e370154bf85de360b93c03026157f02d4943b2d01ff4945f4429f8e8a/ruff-0.15.18-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:a6aa6a3d979e48ae617578183674bf264fbe7d0114a796a26bd678d67963c7ff", size = 10927328, upload-time = "2026-06-18T18:25:34.676Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/d1/5cf5909329fedb5d39d555ee818ba5cf4638e1a301b89785d34f2905bfcb/ruff-0.15.18-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a81beadbbff2c9c245561ae3f77b16709d87f35eec650d0501679239d3449b22", size = 10693187, upload-time = "2026-06-18T18:25:08.245Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/44/ff6c635cf2c4f4e7b618b6640da057376baa36014695487d88aed4794268/ruff-0.15.18-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2186d9e940ae332ab293623a75b5f4fe49565f449954d50a72a046683aa6b809", size = 11208721, upload-time = "2026-06-18T18:25:41.327Z" },
+    { url = "https://files.pythonhosted.org/packages/88/d9/5baa2a30861adfb7022cf33c1e35b2fc18085b08c16f83eff4c7b99a5f48/ruff-0.15.18-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:5c2abf140438032bc77b2284a6c9944ecd8a19e5f1c7b52b1b8e4a0a80d19a7a", size = 11678599, upload-time = "2026-06-18T18:25:13.607Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1a/0725a7cfdc32ff769efb96ee782bec882e16448c5d9e3be947ec4c04ce27/ruff-0.15.18-py3-none-win32.whl", hash = "sha256:02299e6e9fa5b297a3f6d5d10d7bcd655c925b028bb8b9d4588214549c6b9ec4", size = 10901903, upload-time = "2026-06-18T18:25:24.755Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/51/805d9f6fb7970505c3504794a5ec350f605361b807fef4dcf214ebd35e72/ruff-0.15.18-py3-none-win_amd64.whl", hash = "sha256:dac80dc8d26b2257dbefabed62f5d255c3937b4ccb122da1fc634794fa3578b3", size = 12041189, upload-time = "2026-06-18T18:25:17.915Z" },
+    { url = "https://files.pythonhosted.org/packages/29/4c/67bb45e41609eb4726f1bfeb59e083cf91d14c696d4bd14c234a980be93d/ruff-0.15.18-py3-none-win_arm64.whl", hash = "sha256:b2c9257fcbd4a3e5b977a1904e6facca016bafe2edc17df24db67cfaee03b4e4", size = 11329958, upload-time = "2026-06-18T18:25:43.686Z" },
 ]
 
 [[package]]
diff --git a/website/guide/claude-code.md b/website/guide/claude-code.md
index a3b5f6121..8c6ad43df 100644
--- a/website/guide/claude-code.md
+++ b/website/guide/claude-code.md
@@ -15,7 +15,7 @@ Restart Claude Code, then type `/skills` to verify "mempalace" appears.
 
 With the plugin installed, Claude Code automatically:
 - Starts the MemPalace MCP server on launch
-- Has access to all 33 tools
+- Has access to all 34 tools
 - Learns the AAAK dialect and memory protocol from the `mempalace_status` response
 - Searches the palace before answering questions about past work
 
diff --git a/website/guide/mcp-integration.md b/website/guide/mcp-integration.md
index 6d8c7731a..2ce1ed064 100644
--- a/website/guide/mcp-integration.md
+++ b/website/guide/mcp-integration.md
@@ -1,6 +1,6 @@
 # MCP Integration
 
-MemPalace provides 33 tools through the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/), giving any MCP-compatible AI full read/write access to your palace.
+MemPalace provides 34 tools through the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/), giving any MCP-compatible AI full read/write access to your palace.
 
 ## Setup
 
@@ -26,7 +26,7 @@ claude mcp add mempalace -- python -m mempalace.mcp_server --palace /path/to/pal
 codex mcp add mempalace -- python -m mempalace.mcp_server --palace /path/to/palace
 ```
 
-Now your AI has all 33 tools available. Ask it anything:
+Now your AI has all 34 tools available. Ask it anything:
 
 > *"What did we decide about auth last month?"*
 
diff --git a/website/guide/openclaw.md b/website/guide/openclaw.md
index cdfe4f591..d244e3d62 100644
--- a/website/guide/openclaw.md
+++ b/website/guide/openclaw.md
@@ -27,7 +27,7 @@ Or by directly editing your OpenClaw configuration:
 
 ## How It Works
 
-Once connected, OpenClaw agents receive all 33 tools along with the **Memory Protocol**—a strict behavioral guide indicating they should:
+Once connected, OpenClaw agents receive all 34 tools along with the **Memory Protocol**—a strict behavioral guide indicating they should:
 1. **Never guess**: Query `mempalace_search` or `mempalace_kg_query` before confidently answering.
 2. **Keep an agent diary**: Maintain continuity between sessions by writing to `mempalace_diary_write`.
 3. **Manage the Knowledge Graph**: Update declarative facts when things change using `mempalace_kg_add` and `mempalace_kg_invalidate`.
diff --git a/website/reference/mcp-tools.md b/website/reference/mcp-tools.md
index 121014abd..cd157ceab 100644
--- a/website/reference/mcp-tools.md
+++ b/website/reference/mcp-tools.md
@@ -1,6 +1,6 @@
 # MCP Tools Reference
 
-Detailed parameter schemas for all 33 MCP tools.
+Detailed parameter schemas for all 35 MCP tools.
 
 ## Palace — Read Tools
 
@@ -102,6 +102,20 @@ File verbatim content into the palace. Identical content (same deterministic dra
 
 ---
 
+### `mempalace_checkpoint`
+
+Save a whole session in one call. Semantic-dedups each item, files the non-duplicates as drawers, then writes one diary entry. Use this instead of many separate `mempalace_check_duplicate` / `mempalace_add_drawer` / `mempalace_diary_write` calls — it renders as a single tool-call card in the host UI (and keeps the spinner up for the whole save). Reuses the same single-item handlers, so dedup, idempotency, and verbatim guarantees are identical.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `items` | array | **Yes** | Verbatim items to file. Each is `{ wing, room, content }` |
+| `diary` | object | No | Diary entry written after filing: `{ agent_name, entry, topic?, wing? }` (`entry` is AAAK-format) |
+| `dedup_threshold` | number | No | Similarity threshold 0–1 for the per-item dedup check (default 0.9) |
+
+**Returns:** `{ added: [...], duplicates: [...], errors: [...], diary? }`
+
+---
+
 ### `mempalace_delete_drawer`
 
 Delete a drawer by ID. Irreversible.
@@ -132,6 +146,20 @@ Mine a directory into the palace — the MCP equivalent of `mempalace mine`. Wra
 
 ---
 
+### `mempalace_delete_by_source`
+
+Bulk-delete every drawer mined from one `source_file` (exact match). Use this to clean up benchmark or test data that was accidentally mined into a user wing — for example ShareGPT dumps or `results_mempal_*.jsonl` eval files drowning out real memories in semantic search. Matching is pushed down to the storage backend via a `where` filter, so it is not subject to the SQLite variable limit no matter how many drawers share the source. Returns a dry-run match count and a small sample by default; pass `dry_run=false` to commit. Irreversible.
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `source_file` | string | **Yes** | Exact `source_file` metadata value to remove (e.g. the full path that was mined) |
+| `dry_run` | boolean | No | Preview the match count without deleting; default `true`. Pass `false` to actually delete |
+
+**Returns (dry run):** `{ success, dry_run, source_file, match_count, sample, hint }`
+**Returns (commit):** `{ success, dry_run, source_file, deleted }`
+
+---
+
 ### `mempalace_sync`
 
 Prune drawers whose source files are gitignored, deleted, or moved. Returns a dry-run report by default; pass `apply=true` to commit deletions.
diff --git a/website/reference/modules.md b/website/reference/modules.md
index 4c12ae9ce..8442171b1 100644
--- a/website/reference/modules.md
+++ b/website/reference/modules.md
@@ -9,7 +9,7 @@ mempalace/
 ├── README.md                  ← project documentation
 ├── mempalace/                 ← core package
 │   ├── cli.py                 ← CLI entry point
-│   ├── mcp_server.py          ← MCP server (33 tools)
+│   ├── mcp_server.py          ← MCP server (34 tools)
 │   ├── knowledge_graph.py     ← temporal entity graph
 │   ├── palace_graph.py        ← room navigation graph
 │   ├── dialect.py             ← AAAK compression
@@ -56,7 +56,7 @@ Argparse-based CLI with subcommands: `init`, `mine`, `split`, `search`, `compres
 
 ### `mcp_server.py` — MCP Server
 
-JSON-RPC over stdin/stdout. Implements the MCP protocol with 33 tools covering palace read/write, drawer CRUD, knowledge graph, navigation, tunnels, agent diary, and system operations. Includes the Memory Protocol and AAAK Spec in status responses.
+JSON-RPC over stdin/stdout. Implements the MCP protocol with 34 tools covering palace read/write, drawer CRUD, knowledge graph, navigation, tunnels, agent diary, and system operations. Includes the Memory Protocol and AAAK Spec in status responses.
 
 ### `searcher.py` — Semantic Search