From 8d5e7d468de1c5bca01549ffa555f2a38cb83df3 Mon Sep 17 00:00:00 2001 From: Tiago Napoli Date: Tue, 28 Apr 2026 21:36:47 -0700 Subject: [PATCH 1/3] Add failing test: unaccessed RI key lost after second checkpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After recovery, if an RI key is never accessed before a second checkpoint: - SnapshotAllTreesForCheckpoint skips it (not in liveIndexes) - PurgeOldCheckpointSnapshots deletes the old checkpoint snapshot - Next access fails with 'range index not found' — data is lost This test is expected to FAIL (no fix included). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/Garnet.test/RespRangeIndexTests.cs | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/test/Garnet.test/RespRangeIndexTests.cs b/test/Garnet.test/RespRangeIndexTests.cs index bbf78010546..e6e11b44da8 100644 --- a/test/Garnet.test/RespRangeIndexTests.cs +++ b/test/Garnet.test/RespRangeIndexTests.cs @@ -1954,6 +1954,61 @@ public void RIConcurrentOpsWithCheckpointTest() } } + /// + /// After checkpoint recovery, if an RI key is never accessed before a second + /// checkpoint, PurgeOldCheckpointSnapshots deletes the only snapshot file. + /// The tree was never restored (not in liveIndexes), so no new snapshot + /// was created. Accessing the key after the purge should still return data. + /// + [Test] + public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() + { + server.Dispose(); + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true); + server.Start(); + + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) + { + var db = redis.GetDatabase(0); + + // Create two RI keys with data + db.Execute("RI.CREATE", "idx1", "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); + db.Execute("RI.SET", "idx1", "key-a", "val-a"); + + db.Execute("RI.CREATE", "idx2", "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); + db.Execute("RI.SET", "idx2", "key-b", "val-b"); + + db.Execute("SAVE"); + } + + // Recover — both stubs get FlagRecovered=true, TreeHandle=0 + server.Dispose(); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true, tryRecover: true); + server.Start(); + + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) + { + var db = redis.GetDatabase(0); + + // Access idx1 only — this restores it and registers in liveIndexes + var val = db.Execute("RI.GET", "idx1", "key-a"); + ClassicAssert.AreEqual("val-a", (string)val); + + // Do NOT access idx2 — it stays unrestored (TreeHandle=0, FlagRecovered=true) + + // Second checkpoint: idx1 gets a new snapshot, idx2 does not. + // PurgeOldCheckpointSnapshots deletes the old checkpoint snapshot files. + db.Execute("SAVE"); + + // Now access idx2 — its old checkpoint snapshot was purged, + // and no flush.bftree was ever written. + val = db.Execute("RI.GET", "idx2", "key-b"); + ClassicAssert.AreEqual("val-b", (string)val, + "Unaccessed RI key should still be readable after second checkpoint purges old snapshots"); + } + } + /// /// Verifies pure AOF-only recovery (no checkpoint). RI.CREATE is replayed to /// recreate the BfTree, then RI.SET/RI.DEL operations rebuild the data. From 57e0607a2dada0d962739f9859e57cd98b7ec7b4 Mon Sep 17 00:00:00 2001 From: Tiago Napoli Date: Thu, 30 Apr 2026 12:28:46 -0700 Subject: [PATCH 2/3] Add file-level assertions to unaccessed key checkpoint test Verify disk file state at each stage: snapshot files after first checkpoint, LiveIndexCount after recovery, snapshot purge after second checkpoint, and data.bftree survival. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/Garnet.test/RespRangeIndexTests.cs | 35 +++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/Garnet.test/RespRangeIndexTests.cs b/test/Garnet.test/RespRangeIndexTests.cs index e6e11b44da8..cc4ffc20993 100644 --- a/test/Garnet.test/RespRangeIndexTests.cs +++ b/test/Garnet.test/RespRangeIndexTests.cs @@ -2,9 +2,11 @@ // Licensed under the MIT license. using System; +using System.IO; using System.Threading; using System.Threading.Tasks; using Allure.NUnit; +using Garnet.server; using NUnit.Framework; using NUnit.Framework.Legacy; using StackExchange.Redis; @@ -1968,6 +1970,11 @@ public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true); server.Start(); + var rangeIndexManager = server.Provider.StoreWrapper.rangeIndexManager; + var rangeIndexDir = Path.Combine(TestUtils.MethodTestDir, "checkpoints", "rangeindex"); + var idx1Dir = Path.Combine(rangeIndexDir, RangeIndexManager.HashKeyToDirectoryName("idx1"u8)); + var idx2Dir = Path.Combine(rangeIndexDir, RangeIndexManager.HashKeyToDirectoryName("idx2"u8)); + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) { var db = redis.GetDatabase(0); @@ -1979,14 +1986,28 @@ public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() db.Execute("RI.CREATE", "idx2", "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); db.Execute("RI.SET", "idx2", "key-b", "val-b"); + // First checkpoint — both trees get snapshot files db.Execute("SAVE"); } + // Both key directories should have data.bftree + snapshot file + ClassicAssert.IsTrue(Directory.Exists(idx1Dir), "idx1 directory should exist after first checkpoint"); + ClassicAssert.IsTrue(Directory.Exists(idx2Dir), "idx2 directory should exist after first checkpoint"); + ClassicAssert.IsTrue(File.Exists(Path.Combine(idx1Dir, "data.bftree")), "idx1 data.bftree should exist"); + ClassicAssert.IsTrue(File.Exists(Path.Combine(idx2Dir, "data.bftree")), "idx2 data.bftree should exist"); + var idx1Snapshots = Directory.GetFiles(idx1Dir, "snapshot.*.bftree"); + var idx2Snapshots = Directory.GetFiles(idx2Dir, "snapshot.*.bftree"); + ClassicAssert.AreEqual(1, idx1Snapshots.Length, "idx1 should have 1 snapshot after first checkpoint"); + ClassicAssert.AreEqual(1, idx2Snapshots.Length, "idx2 should have 1 snapshot after first checkpoint"); + // Recover — both stubs get FlagRecovered=true, TreeHandle=0 server.Dispose(); server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true, tryRecover: true); server.Start(); + rangeIndexManager = server.Provider.StoreWrapper.rangeIndexManager; + ClassicAssert.AreEqual(0, rangeIndexManager.LiveIndexCount, "No trees should be live right after recovery"); + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) { var db = redis.GetDatabase(0); @@ -1994,6 +2015,7 @@ public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() // Access idx1 only — this restores it and registers in liveIndexes var val = db.Execute("RI.GET", "idx1", "key-a"); ClassicAssert.AreEqual("val-a", (string)val); + ClassicAssert.AreEqual(1, rangeIndexManager.LiveIndexCount, "Only idx1 should be live after restore"); // Do NOT access idx2 — it stays unrestored (TreeHandle=0, FlagRecovered=true) @@ -2001,6 +2023,19 @@ public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() // PurgeOldCheckpointSnapshots deletes the old checkpoint snapshot files. db.Execute("SAVE"); + // idx1: old snapshot purged, new snapshot created + idx1Snapshots = Directory.GetFiles(idx1Dir, "snapshot.*.bftree"); + ClassicAssert.AreEqual(1, idx1Snapshots.Length, "idx1 should have 1 snapshot after second checkpoint (old purged, new created)"); + + // idx2: old snapshot purged, NO new snapshot created (tree was never restored) + idx2Snapshots = Directory.GetFiles(idx2Dir, "snapshot.*.bftree"); + ClassicAssert.AreEqual(0, idx2Snapshots.Length, + "idx2 old snapshot should be purged and no new snapshot created (tree was never restored)"); + + // idx2 data.bftree should still exist (working file from initial creation) + ClassicAssert.IsTrue(File.Exists(Path.Combine(idx2Dir, "data.bftree")), + "idx2 data.bftree should still exist (not deleted by purge)"); + // Now access idx2 — its old checkpoint snapshot was purged, // and no flush.bftree was ever written. val = db.Execute("RI.GET", "idx2", "key-b"); From 21ae868048e28b97e5a7e39669b7cc427b72f404 Mon Sep 17 00:00:00 2001 From: Tiago Napoli Date: Thu, 30 Apr 2026 12:36:14 -0700 Subject: [PATCH 3/3] Add file assertions and second-recovery test for unaccessed RI key purge Add file-level assertions to RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest verifying snapshot state at each stage. Add RIUnaccessedKeyLostAfterSecondRecoveryTest demonstrating that an unaccessed RI key is lost after: checkpoint -> recover -> access only other keys -> second checkpoint (purges old snapshot) -> second recover (no snapshot to restore from). Both tests currently fail, documenting the bug. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/Garnet.test/RespRangeIndexTests.cs | 79 +++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/test/Garnet.test/RespRangeIndexTests.cs b/test/Garnet.test/RespRangeIndexTests.cs index cc4ffc20993..eff1c84b782 100644 --- a/test/Garnet.test/RespRangeIndexTests.cs +++ b/test/Garnet.test/RespRangeIndexTests.cs @@ -2044,6 +2044,85 @@ public void RIUnaccessedKeyAfterRecoveryAndSecondCheckpointTest() } } + /// + /// After checkpoint + recovery, accessing only one RI key and taking a second + /// checkpoint causes PurgeOldCheckpointSnapshots to delete the unaccessed + /// key's only snapshot (no new one was created because it was never restored). + /// A second recovery then loses the unaccessed key entirely. + /// + [Test] + public void RIUnaccessedKeyLostAfterSecondRecoveryTest() + { + server.Dispose(); + TestUtils.DeleteDirectory(TestUtils.MethodTestDir, wait: true); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true); + server.Start(); + + var rangeIndexDir = Path.Combine(TestUtils.MethodTestDir, "checkpoints", "rangeindex"); + var idx1Dir = Path.Combine(rangeIndexDir, RangeIndexManager.HashKeyToDirectoryName("idx1"u8)); + var idx2Dir = Path.Combine(rangeIndexDir, RangeIndexManager.HashKeyToDirectoryName("idx2"u8)); + + // Step 1-3: Create two RI keys with data, then checkpoint + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) + { + var db = redis.GetDatabase(0); + + db.Execute("RI.CREATE", "idx1", "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); + db.Execute("RI.SET", "idx1", "key-a", "val-a"); + + db.Execute("RI.CREATE", "idx2", "DISK", "CACHESIZE", "65536", "MINRECORD", "8"); + db.Execute("RI.SET", "idx2", "key-b", "val-b"); + + db.Execute("SAVE"); + } + + // Both keys have snapshots + ClassicAssert.AreEqual(1, Directory.GetFiles(idx1Dir, "snapshot.*.bftree").Length, "idx1 should have 1 snapshot"); + ClassicAssert.AreEqual(1, Directory.GetFiles(idx2Dir, "snapshot.*.bftree").Length, "idx2 should have 1 snapshot"); + + // Step 4: Restart and recover from checkpoint + server.Dispose(); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true, tryRecover: true); + server.Start(); + + // Step 5: Access only idx1, then take a second checkpoint + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true))) + { + var db = redis.GetDatabase(0); + + var val = db.Execute("RI.GET", "idx1", "key-a"); + ClassicAssert.AreEqual("val-a", (string)val); + + // Do NOT access idx2 + + db.Execute("SAVE"); + } + + // idx1 has a new snapshot; idx2's old snapshot was purged with no replacement + ClassicAssert.AreEqual(1, Directory.GetFiles(idx1Dir, "snapshot.*.bftree").Length, + "idx1 should have 1 snapshot after second checkpoint"); + ClassicAssert.AreEqual(0, Directory.GetFiles(idx2Dir, "snapshot.*.bftree").Length, + "idx2 snapshot should have been purged with no replacement"); + + // Step 6: Second restart and recover + server.Dispose(); + server = TestUtils.CreateGarnetServer(TestUtils.MethodTestDir, enableRangeIndexPreview: true, enableAOF: true, tryRecover: true); + server.Start(); + + // Step 7: idx1 should exist, idx2 should be lost + using (var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig())) + { + var db = redis.GetDatabase(0); + + var val = db.Execute("RI.GET", "idx1", "key-a"); + ClassicAssert.AreEqual("val-a", (string)val, "idx1 should survive second recovery"); + + val = db.Execute("RI.GET", "idx2", "key-b"); + ClassicAssert.AreEqual("val-b", (string)val, + "idx2 should survive second recovery (unaccessed key must not be lost by snapshot purge)"); + } + } + /// /// Verifies pure AOF-only recovery (no checkpoint). RI.CREATE is replayed to /// recreate the BfTree, then RI.SET/RI.DEL operations rebuild the data.