From 8d41f0bc22ac1c26b65713e30d9e88d08a007c2b Mon Sep 17 00:00:00 2001 From: Tiago Napoli Date: Mon, 27 Apr 2026 18:37:46 -0700 Subject: [PATCH 1/2] Add test proving VectorSet DEL after eviction leaks DiskANN index Documents a known limitation: when a VectorSet key is evicted below HeadAddress and then deleted via DEL, the InPlaceDeleter gate (the only path that redirects to TryDeleteVectorSet) never fires because the record is not in memory. Tsavorite writes a blind tombstone, leaving the DiskANN native index unreleased and the context permanently marked as in-use. The test creates a VectorSet, forces eviction via ShiftHeadAddress, then DELs the key and asserts that InUseContextCount remains unchanged (proving DropIndex was never called). Also adds VectorManager.InUseContextCount internal accessor for test introspection of the context metadata bitmap. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Vector/VectorManager.ContextMetadata.cs | 11 +++ test/Garnet.test/RespVectorSetTests.cs | 80 +++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/libs/server/Resp/Vector/VectorManager.ContextMetadata.cs b/libs/server/Resp/Vector/VectorManager.ContextMetadata.cs index 62f338ac4c6..4ce9428b9cf 100644 --- a/libs/server/Resp/Vector/VectorManager.ContextMetadata.cs +++ b/libs/server/Resp/Vector/VectorManager.ContextMetadata.cs @@ -57,6 +57,11 @@ private struct HashSlots [FieldOffset(32)] private HashSlots slots; + /// + /// Returns the number of contexts currently marked as in use. + /// + public readonly int InUseCount => BitOperations.PopCount(inUse); + public readonly bool IsInUse(ulong context) { Debug.Assert(context > 0, "Context 0 is reserved, should never queried"); @@ -330,6 +335,12 @@ public override readonly string ToString() private ContextMetadata contextMetadata; + /// + /// Gets the number of DiskANN contexts currently marked as in use. + /// A context stays in use until completes cleanup. + /// + internal int InUseContextCount { get { lock (this) { return contextMetadata.InUseCount; } } } + /// /// Get a new unique context for a vector set. /// diff --git a/test/Garnet.test/RespVectorSetTests.cs b/test/Garnet.test/RespVectorSetTests.cs index 5b4d72ead1d..0a943560ec6 100644 --- a/test/Garnet.test/RespVectorSetTests.cs +++ b/test/Garnet.test/RespVectorSetTests.cs @@ -2342,5 +2342,85 @@ private static GarnetServer CreateGarnetServer(bool tryRecover, bool enableVecto [UnsafeAccessor(UnsafeAccessorKind.Field, Name = "opts")] private static extern ref GarnetServerOptions GetOpts(GarnetServer server); + + /// + /// Documents a known limitation: DEL on an evicted VectorSet key does NOT + /// free the native DiskANN index or schedule element data cleanup. + /// + /// The only gate that redirects DEL to TryDeleteVectorSet is + /// InPlaceDeleter, which only fires when the record is in memory. + /// For evicted records, Tsavorite writes a blind tombstone — the VectorManager + /// never learns about the deletion, so the DiskANN context stays marked as + /// in-use and the native index is never freed. + /// + [Test] + public void VectorSetDeleteAfterEvictionLeaksContextTest() + { + // Restart with VectorSets enabled (default memory — VectorSets need + // adequate page sizes for DiskANN element data). + // We force eviction via ShiftHeadAddress instead of lowMemory. + server.Dispose(); + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, + enableVectorSetPreview: true); + server.Start(); + + var vectorManager = server.Provider.StoreWrapper.DefaultDatabase.VectorManager; + var store = server.Provider.StoreWrapper.store; + + // Record baseline context count before creating any VectorSets + var baselineContexts = vectorManager.InUseContextCount; + + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig()); + var db = redis.GetDatabase(0); + + // Step 1: Create a VectorSet and add vectors (using XB8 format) + var vec1 = new byte[75]; + var vec2 = new byte[75]; + vec1[0] = 1; + vec2[0] = 75; + for (var i = 1; i < 75; i++) + { + vec1[i] = (byte)(vec1[i - 1] + 1); + vec2[i] = (byte)(vec2[i - 1] + 1); + } + + var addRes = (int)db.Execute("VADD", ["myvectors", "XB8", vec1, new byte[] { 0, 0, 0, 0 }, "XPREQ8"]); + ClassicAssert.AreEqual(1, addRes, "First vector should be inserted"); + + addRes = (int)db.Execute("VADD", ["myvectors", "XB8", vec2, new byte[] { 0, 0, 0, 1 }, "XPREQ8"]); + ClassicAssert.AreEqual(1, addRes, "Second vector should be inserted"); + + // Step 2: Verify context was allocated + ClassicAssert.AreEqual(baselineContexts + 1, vectorManager.InUseContextCount, + "One DiskANN context should be allocated after VADD"); + + // Step 3: Force eviction by shifting HeadAddress past all current records. + // First flush to read-only (required before moving HeadAddress), + // then shift HeadAddress to evict all pages to disk. + // Do NOT read the VectorSet key between eviction and DEL (reads can + // lazily recreate the native index, which would invalidate the test). + var headBefore = store.Log.HeadAddress; + store.Log.ShiftReadOnlyAddress(store.Log.TailAddress, wait: true); + store.Log.ShiftHeadAddress(store.Log.TailAddress, wait: true); + + // Write a filler key so TailAddress advances past the evicted region + db.StringSet("filler", "data"); + + ClassicAssert.IsTrue(store.Log.HeadAddress > headBefore, + "HeadAddress should have advanced (pages were evicted)"); + + // Step 4: Delete the evicted key — blind tombstone, no VectorSet cleanup + var delResult = db.KeyDelete("myvectors"); + // Known limitation: DEL on evicted keys returns false + ClassicAssert.IsFalse(delResult, + "DEL returns false for evicted keys (known limitation)"); + + // Step 5: The context is STILL in use — proves DropIndex was never called + // and the DiskANN native index was never freed. + ClassicAssert.AreEqual(baselineContexts + 1, vectorManager.InUseContextCount, + "DiskANN context should still be in use after DEL of evicted key (known limitation: " + + "InPlaceDeleter never fires for evicted records, so TryDeleteVectorSet is never called)"); + } } } \ No newline at end of file From 882a3a672da7a5b99ef0d2278e48286e73f2e343 Mon Sep 17 00:00:00 2001 From: Tiago Napoli Date: Tue, 28 Apr 2026 15:19:35 -0700 Subject: [PATCH 2/2] Fix VectorSet index keys not deleted after MIGRATE KEYS After transmitting VectorSet indexes in the KEYS migration path, mark the VectorSet keys in the sketch as transmitted (Item2 = true) so that the existing DeleteKeys() flow deletes them during the DELETING phase. Previously, TransmitKeys() skipped VectorSet keys (they are in the ignore set for out-of-band transmission), leaving Item2 = false, which caused DeleteKeys() to skip them as well. This resulted in VectorSet keys remaining on the source node after migration. Also adds a DEBUG EXISTS subcommand for raw store existence checks bypassing cluster routing/EPSM, and uses it to verify key deletion in the VectorSetMigrateByKeys test. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Server/Migration/MigrateSessionKeys.cs | 14 +++++++++++++ libs/server/Resp/AdminCommands.cs | 20 +++++++++++++++++++ .../VectorSets/ClusterVectorSetTests.cs | 7 ++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/libs/cluster/Server/Migration/MigrateSessionKeys.cs b/libs/cluster/Server/Migration/MigrateSessionKeys.cs index 0d0de71bd2b..43b84b0a561 100644 --- a/libs/cluster/Server/Migration/MigrateSessionKeys.cs +++ b/libs/cluster/Server/Migration/MigrateSessionKeys.cs @@ -104,6 +104,20 @@ private bool MigrateKeysFromMainStore() logger?.LogCritical("Final flush after Vector Set migration failed"); return false; } + + // Mark VectorSet keys for deletion so DeleteKeys() removes them during the DELETING phase + var sketchKeys = migrateTask.sketch.Keys; + for (var i = 0; i < sketchKeys.Count; i++) + { + if (sketchKeys[i].Item2) + continue; + + var spanByte = sketchKeys[i].Item1.SpanByte; + if (indexesToMigrate.ContainsKey(spanByte.ToByteArray())) + { + sketchKeys[i] = (sketchKeys[i].Item1, true); + } + } } // Final cleanup, which will also delete Vector Sets diff --git a/libs/server/Resp/AdminCommands.cs b/libs/server/Resp/AdminCommands.cs index fa134a1498f..3504fed95df 100644 --- a/libs/server/Resp/AdminCommands.cs +++ b/libs/server/Resp/AdminCommands.cs @@ -751,6 +751,23 @@ private bool NetworkDebug() return true; } + if (command.EqualsUpperCaseSpanIgnoringCase(CmdStrings.EXISTS)) + { + if (parseState.Count != 2) + { + return AbortWithWrongNumberOfArgumentsOrUnknownSubcommand(Encoding.ASCII.GetString(command), + nameof(RespCommand.DEBUG)); + } + + // Raw store existence check, bypassing cluster routing/EPSM + var key = parseState.GetArgSliceByRef(1); + var status = basicGarnetApi.EXISTS(key, StoreType.Main); + + while (!RespWriteUtils.TryWriteInt32(status == GarnetStatus.OK ? 1 : 0, ref dcurr, dend)) + SendAndReset(); + return true; + } + if (command.EqualsUpperCaseSpanIgnoringCase(CmdStrings.HELP)) { var help = new string[] @@ -759,6 +776,9 @@ private bool NetworkDebug() "ERROR ", "\tReturn a Redis protocol error with as message. Useful for clients", "\tunit tests to simulate Redis errors.", + "EXISTS ", + "\tCheck if exists in the raw store, bypassing cluster routing.", + "\tReturns 1 if key exists, 0 otherwise.", "LOG ", "\tWrite to the server log.", "PANIC", diff --git a/test/Garnet.test.cluster/VectorSets/ClusterVectorSetTests.cs b/test/Garnet.test.cluster/VectorSets/ClusterVectorSetTests.cs index badb7bc456b..9b5279d2f5e 100644 --- a/test/Garnet.test.cluster/VectorSets/ClusterVectorSetTests.cs +++ b/test/Garnet.test.cluster/VectorSets/ClusterVectorSetTests.cs @@ -1000,6 +1000,10 @@ public void VectorSetMigrateByKeys() var migrateKey = toMigrate[migrateSingleIx]; context.clusterTestUtils.MigrateKeys(context.clusterTestUtils.GetEndPoint(sourceNodeIndex), context.clusterTestUtils.GetEndPoint(targetNodeIndex), [migrateKey], NullLogger.Instance); + // Verify the key was deleted from the source store using DEBUG EXISTS (bypasses cluster routing/EPSM) + var rawExists = (int)context.clusterTestUtils.Execute(context.clusterTestUtils.GetEndPoint(sourceNodeIndex), "DEBUG", ["EXISTS", migrateKey]); + ClassicAssert.AreEqual(0, rawExists, $"Key {Encoding.ASCII.GetString(migrateKey)} should not exist in raw store on source after MIGRATE KEYS"); + toMigrate.RemoveAt(migrateSingleIx); } @@ -1051,7 +1055,7 @@ public void VectorSetMigrateByKeys() // Finish migration context.clusterTestUtils.WaitForMigrationCleanup(NullLogger.Instance); - // Validate vector sets coherent + // Validate vector sets coherent on target for (var i = 0; i < keys.Count; i++) { var _key = keys[i]; @@ -1063,6 +1067,7 @@ public void VectorSetMigrateByKeys() ClassicAssert.IsTrue(res[0].SequenceEqual(elem)); ClassicAssert.IsTrue(res[1].SequenceEqual(attrs)); } + } [Test]