Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
73f69f9
Add RangeIndex cluster migration with sketch protection
May 5, 2026
31e0017
Rename MigrationReceiveSession.cs to RangeIndexMigrationReceiveSessio…
May 21, 2026
4b1768c
Address PR review feedback for RangeIndex migration
May 21, 2026
ab97a36
Fix COPY option comment and restore key logging in RangeIndex migration
May 21, 2026
cea5d0f
Expand COPY option comment with crash recovery requirements
May 21, 2026
6774e03
Add using System.Text and simplify log messages in MigrateOperation
May 21, 2026
695068f
Refactor TransmitKeysAsync to use single HashSet<byte[]> skip set
May 21, 2026
a4cdaa4
Refactor migration key discovery to use PinnedSpanByte and callback
May 21, 2026
15afa23
Update DeleteRangeIndex doc comment to reflect COPY not yet supported
May 21, 2026
d61e50c
Rename keyPsb to key in VectorManager.Migration.cs
May 21, 2026
a23d491
Switch test classes to TestBase and minor comment cleanup
May 21, 2026
a09e9d8
w
May 21, 2026
74b9745
Align Migration.cs with current main APIs and update migration docs
May 26, 2026
0b6e188
Add TRYAGAIN response for RI commands during migration and fix build …
May 26, 2026
d6f5d4e
Lazy-init RangeIndex migration receive state and add documentation
May 26, 2026
4351563
Clean up TRYAGAIN parameter threading and fix double dispose
May 26, 2026
4e0879d
Remove unnecessary comment about source key deletion
May 26, 2026
f8f7a13
Add cancellation token support to RangeIndex migration
May 26, 2026
d3a8e74
Include key name in snapshot failure log message
May 26, 2026
c6c8577
Simplify TransmitRangeIndexAsync error handling
May 26, 2026
307e31e
Improve TransmitRangeIndexAsync log messages: add key and fix prefixes
May 26, 2026
5e25df3
Add CancellationToken parameter to TransmitRangeIndexAsync
May 26, 2026
c72adcf
Add CancellationToken parameter to MigrateRangeIndexKeysAsync
May 26, 2026
7726d89
Add method name and key to all log messages in MigrateSession.RangeIn…
May 26, 2026
d0beb96
Fix unsafe use of FromPinnedSpan on unpinned byte array
May 26, 2026
8baf55a
Simplify DeleteRangeIndex call: remove temp variable
May 26, 2026
8775e39
Fix SnapshotForMigration to take PinnedSpanByte and pin key in caller
May 26, 2026
1eadb44
Fix unpinned FromPinnedSpan in PublishMigratedIndex
May 26, 2026
cc25c27
Refactor migration temp directory: create once in constructor
May 26, 2026
ee6e473
Fix migration reads to use Read_RangeIndex to suppress CTT
May 26, 2026
178cf4e
Eliminate TOCTOU: read authoritative stub in SnapshotForMigration
May 26, 2026
3c19a4c
Remove TRYAGAIN changes from bftree-migration branch
May 26, 2026
328972b
Add RI migration fault injection tests and observability logging
May 26, 2026
d58b1bc
Refactor migration observability into metric structs
May 26, 2026
3517fee
Make metric struct fields public and use space-separated log format
May 26, 2026
593c33a
Extract metric structs into Server/Migration/RangeIndex/ folder
May 26, 2026
f38217c
Initialize RangeIndexMigrationReceiveState in ClusterSession constructor
May 26, 2026
14a54d4
Remove redundant isReceiving field, derive from currentDeserializer !…
May 26, 2026
7c5ecba
Add thread-safety documentation to Reset() addressing review comment
May 26, 2026
da135fd
Use WaitForSnapshot pattern in SnapshotForMigration instead of spin-wait
May 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions libs/client/ClientSession/GarnetClientSessionIncremental.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ public enum MigrationRecordSpanType : byte
/// Bespoke encoding for Vector Set indexes.
/// </summary>
VectorSetIndex = 3,

/// <summary>
/// Chunked serialization stream for a RangeIndex key during migration.
/// The receiver uses a state machine to track the in-progress stream.
/// </summary>
SerializedRangeIndexStream = 4,
}

public sealed unsafe partial class GarnetClientSession : IServerHook, IMessageConsumer
Expand Down
52 changes: 32 additions & 20 deletions libs/cluster/Server/Migration/MigrateOperation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Threading.Tasks;
using Garnet.client;
using Garnet.server;
Expand All @@ -22,15 +23,20 @@ internal sealed partial class MigrateOperation
public StoreScan storeScan;

private readonly ConcurrentDictionary<byte[], byte[]> vectorSetsIndexKeysToMigrate;
private readonly ConcurrentDictionary<byte[], byte> rangeIndexKeysToMigrate;

readonly MigrateSession session;
readonly GarnetClientSession gcs;
readonly LocalServerSession localServerSession;

public GarnetClientSession Client => gcs;

public LocalServerSession LocalSession => localServerSession;

public IEnumerable<KeyValuePair<byte[], byte[]>> VectorSets => vectorSetsIndexKeysToMigrate;

public IEnumerable<byte[]> RangeIndexKeys => rangeIndexKeysToMigrate.Keys;

public void ThrowIfCancelled() => session._cts.Token.ThrowIfCancellationRequested();

public bool Contains(int slot) => session._sslots.Contains(slot);
Expand All @@ -46,6 +52,8 @@ public bool ContainsNamespace(ReadOnlySpan<byte> namespaceBytes)
public void EncounteredVectorSet(byte[] key, byte[] value)
=> vectorSetsIndexKeysToMigrate.TryAdd(key, value);

public void EncounteredRangeIndex(byte[] key) => rangeIndexKeysToMigrate.TryAdd(key, 0);

public MigrateOperation(MigrateSession session, Sketch sketch = null, int batchSize = 1 << 18)
{
this.session = session;
Expand All @@ -55,6 +63,7 @@ public MigrateOperation(MigrateSession session, Sketch sketch = null, int batchS
storeScan = new StoreScan(this);
keysToDelete = [];
vectorSetsIndexKeysToMigrate = new(ByteArrayComparer.Instance);
rangeIndexKeysToMigrate = new(ByteArrayComparer.Instance);
}

public async ValueTask<bool> InitializeAsync()
Expand Down Expand Up @@ -127,18 +136,14 @@ public async Task<bool> TransmitSlotsAsync()
return true;
}

public async Task<bool> TransmitKeysAsync(Dictionary<byte[], byte[]> vectorSetKeysToIgnore)
public async Task<bool> TransmitKeysAsync(Func<PinnedSpanByte, bool> shouldSkipKey)
{
// Use this for both stores; main store will just use the SpanByteAndMemory directly. We want it to be outside iterations
// so we can reuse the SpanByteAndMemory.Memory across iterations.
// TODO: initialize 'output' based on gcs curr and end; make sure it has the initial part of the "send" set, and call gcs.IncrementRecordDirect().
// This will still allow SBAM.Memory to be reused.
var output = new UnifiedOutput();

#if NET9_0_OR_GREATER
var ignoreLookup = vectorSetKeysToIgnore.GetAlternateLookup<ReadOnlySpan<byte>>();
#endif

try
{
var keys = sketch.Keys;
Expand All @@ -152,21 +157,9 @@ public async Task<bool> TransmitKeysAsync(Dictionary<byte[], byte[]> vectorSetKe
if (keys[i].Item2)
continue;

var spanByte = keys[i].Item1;

// Don't transmit if a Vector Set
var isVectorSet =
vectorSetKeysToIgnore.Count > 0 &&
#if NET9_0_OR_GREATER
ignoreLookup.ContainsKey(spanByte.ReadOnlySpan);
#else
vectorSetKeysToIgnore.ContainsKey(spanByte.ToArray());
#endif

if (isVectorSet)
{
// Skip keys that require special handling
if (shouldSkipKey(keys[i].Item1))
continue;
}

if (!await session.WriteOrSendRecordAsync(gcs, localServerSession, keys[i].Item1, ref input, ref output, out var status).ConfigureAwait(false))
return false;
Expand Down Expand Up @@ -283,7 +276,26 @@ public void DeleteVectorSet(PinnedSpanByte key)

var delRes = localServerSession.BasicGarnetApi.DELETE(key);

session.logger?.LogDebug("Deleting Vector Set {key} after migration: {delRes}", System.Text.Encoding.UTF8.GetString(key), delRes);
session.logger?.LogDebug("Deleting Vector Set {key} after migration: {delRes}", Encoding.UTF8.GetString(key), delRes);
}

/// <summary>
/// Delete a RangeIndex after migration. COPY option is not yet supported for RangeIndex keys.
/// </summary>
public void DeleteRangeIndex(PinnedSpanByte key)
{
// COPY option is not yet supported for RangeIndex keys. Supporting it would require
// an atomic swap or transactional approach for replacing BfTree data files when the
// stub already exists at the destination, with proper recovery semantics in case the
// process crashes mid-swap. For now, we always delete the source key.
if (session._copyOption)
{
session.logger?.LogWarning("COPY option ignored for RangeIndex key {key}", Encoding.UTF8.GetString(key));
}

var delRes = localServerSession.BasicGarnetApi.DELETE(key);

session.logger?.LogDebug("Deleted RangeIndex key {key} after migration: {delRes}", Encoding.UTF8.GetString(key), delRes);
}
}
}
Expand Down
7 changes: 6 additions & 1 deletion libs/cluster/Server/Migration/MigrateScanFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ public bool Reader<TSourceLogRecord>(in TSourceLogRecord srcLogRecord, RecordMet
// Check if key belongs to slot that is being migrated and if it can be added to our buffer
if (migrateOperation.Contains(slot))
{
if (srcLogRecord.RecordType == VectorManager.RecordType)
if (srcLogRecord.RecordType == RangeIndexManager.RangeIndexRecordType)
{
// RangeIndex keys need out-of-band migration (snapshot + chunks)
migrateOperation.EncounteredRangeIndex(key.ToArray());
}
else if (srcLogRecord.RecordType == VectorManager.RecordType)
{
// We can't delete the vector set _yet_ nor can we migrate it,
// we just need to remember it to migrate once the associated namespaces are all moved over
Expand Down
174 changes: 174 additions & 0 deletions libs/cluster/Server/Migration/MigrateSession.RangeIndex.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

using System;
using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Garnet.client;
using Garnet.common;
using Garnet.server;
using Microsoft.Extensions.Logging;
using Tsavorite.core;

namespace Garnet.cluster
{
/// <summary>
/// RangeIndex migration support: source-side transmit driver.
/// </summary>
internal sealed partial class MigrateSession : IDisposable
{
/// <summary>
/// Transmit a single RangeIndex key to the destination node.
/// Uses <see cref="RangeIndexManager.SnapshotRangeIndexAndCreateReader"/> to obtain an async
/// migration reader that snapshots and streams the BfTree data.
/// Forces a flush and awaits ACK.
/// </summary>
private async Task<bool> TransmitRangeIndexAsync(MigrateOperation migrateOperation, byte[] keyBytes, int chunkSize, CancellationToken cancellationToken)
{
var rangeIndexManager = clusterProvider.storeWrapper.DefaultDatabase.RangeIndexManager;
if (rangeIndexManager == null)
{
logger?.LogError("TransmitRangeIndexAsync: RangeIndex feature is not enabled, skipping key {key}", Encoding.UTF8.GetString(keyBytes));
return false;
}

var sessionClient = migrateOperation.Client;
var buffer = ArrayPool<byte>.Shared.Rent(chunkSize);
var metrics = new TransmitRangeIndexMetrics { tsStart = Stopwatch.GetTimestamp() };
try
{
using var reader = rangeIndexManager.SnapshotRangeIndexAndCreateReader(migrateOperation.LocalSession, keyBytes, chunkSize);
metrics.snapshotTicks = Stopwatch.GetElapsedTime(metrics.tsStart).Ticks;
metrics.fileSizeBytes = reader.TotalFileBytes;

while (!reader.IsComplete)
{
cancellationToken.ThrowIfCancellationRequested();

var payloadLen = await reader.ReadNextChunkAsync(buffer, cancellationToken).ConfigureAwait(false);
if (payloadLen == 0)
{
logger?.LogError("TransmitRangeIndexAsync: reader returned zero-length payload with a {Size}-byte buffer for key {key}", chunkSize, Encoding.UTF8.GetString(keyBytes));
return false;
}

metrics.totalBytesSent += payloadLen;

if (!await WriteOrSendRecordSpanAsync(sessionClient, MigrationRecordSpanType.SerializedRangeIndexStream, buffer.AsSpan(0, payloadLen)).ConfigureAwait(false))
{
logger?.LogError("TransmitRangeIndexAsync: failed to write chunk for key {key}", Encoding.UTF8.GetString(keyBytes));
return false;
}
}

// Force flush and await ACK
if (!await HandleMigrateTaskResponseAsync(sessionClient.SendAndResetIterationBuffer()).ConfigureAwait(false))
{
logger?.LogError("TransmitRangeIndexAsync: flush failed for key {key}", Encoding.UTF8.GetString(keyBytes));
return false;
}

metrics.success = true;
return true;
}
catch (Exception ex)
{
logger?.LogError(ex, "TransmitRangeIndexAsync: error during snapshot or transmission for key {key}", Encoding.UTF8.GetString(keyBytes));
return false;
}
finally
{
ArrayPool<byte>.Shared.Return(buffer);
metrics.LogSummary(logger, keyBytes);
}
}

/// <summary>
/// Migrate a batch of RangeIndex keys with sketch protection.
/// Adds all keys to the sketch, transitions through TRANSMITTING → DELETING → MIGRATED
/// with epoch barriers, ensuring concurrent operations are properly gated.
/// </summary>
private async Task<bool> MigrateRangeIndexKeysAsync(MigrateOperation migrateOperation, HashSet<byte[]> rangeIndexKeys, CancellationToken cancellationToken)
{
var metrics = new MigrateRangeIndexMetrics
{
tsStart = Stopwatch.GetTimestamp(),
keyCount = rangeIndexKeys.Count,
};

logger?.LogWarning("MigrateRangeIndexKeysAsync: migrating {count} RangeIndex keys", metrics.keyCount);

// Add all RI keys to sketch during INITIALIZING (no gating yet)
migrateOperation.sketch.Clear();
migrateOperation.sketch.SetStatus(SketchStatus.INITIALIZING);
foreach (var key in rangeIndexKeys)
migrateOperation.sketch.TryHashAndStore(key);

ExceptionInjectionHelper.TriggerException(ExceptionInjectionType.RangeIndex_Migration_Before_Transmitting);

// Block writes during snapshot + transmit
migrateOperation.sketch.SetStatus(SketchStatus.TRANSMITTING);
await WaitForConfigPropagationAsync().ConfigureAwait(false);
metrics.tsTransmitting = Stopwatch.GetTimestamp();

#if DEBUG
await ExceptionInjectionHelper.ResetAndWaitAsync(ExceptionInjectionType.RangeIndex_Migration_After_Transmitting).ConfigureAwait(false);
#endif

try
{
foreach (var key in rangeIndexKeys)
{
cancellationToken.ThrowIfCancellationRequested();

if (!await TransmitRangeIndexAsync(migrateOperation, key, RangeIndexManager.DefaultMigrationChunkSize, cancellationToken).ConfigureAwait(false))
{
logger?.LogError("MigrateRangeIndexKeysAsync: failed to migrate RangeIndex key {key}", Encoding.UTF8.GetString(key));
return false;
}
}

ExceptionInjectionHelper.TriggerException(ExceptionInjectionType.RangeIndex_Migration_Before_Deleting);

// Block reads + writes during delete
migrateOperation.sketch.SetStatus(SketchStatus.DELETING);
await WaitForConfigPropagationAsync().ConfigureAwait(false);
metrics.tsDeleting = Stopwatch.GetTimestamp();

#if DEBUG
await ExceptionInjectionHelper.ResetAndWaitAsync(ExceptionInjectionType.RangeIndex_Migration_After_Deleting).ConfigureAwait(false);
#endif

foreach (var key in rangeIndexKeys)
{
try
{
unsafe
{
fixed (byte* keyPtr = key)
migrateOperation.DeleteRangeIndex(PinnedSpanByte.FromPinnedPointer(keyPtr, key.Length));
}
}
catch (Exception ex)
{
logger?.LogError(ex, "MigrateRangeIndexKeysAsync: failed to delete RangeIndex key {key} after migration", Encoding.UTF8.GetString(key));
throw;
}
}

metrics.success = true;
return true;
}
finally
Comment thread
tiagonapoli marked this conversation as resolved.
{
// Always clean up the sketch, even on failure, to unblock client operations
migrateOperation.sketch.Clear();
metrics.LogSummary(logger);
}
}
}
}
49 changes: 46 additions & 3 deletions libs/cluster/Server/Migration/MigrateSessionKeys.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,13 @@ private async Task<bool> MigrateKeysFromStoreAsync()
await WaitForConfigPropagationAsync().ConfigureAwait(false);

// Discover Vector Sets linked namespaces
var allKeys = migrateTask.sketch.Keys.Select(t => t.Item1);
var indexesToMigrate = new Dictionary<byte[], byte[]>(ByteArrayComparer.Instance);
_namespaces = clusterProvider.storeWrapper.DefaultDatabase.VectorManager.GetNamespacesForKeys(clusterProvider.storeWrapper, migrateTask.sketch.Keys.Select(t => t.Item1.ToArray()), indexesToMigrate);
_namespaces = clusterProvider.storeWrapper.DefaultDatabase.VectorManager.GetNamespacesForKeys(clusterProvider.storeWrapper, allKeys, indexesToMigrate);

// Discover RangeIndex keys upfront
var rangeIndexKeysToMigrate = clusterProvider.storeWrapper.DefaultDatabase.RangeIndexManager?.GetRangeIndexKeysForMigration(clusterProvider.storeWrapper, allKeys)
?? new HashSet<byte[]>(ByteArrayComparer.Instance);

// If we have any namespaces, that implies Vector Sets, and if we have any of THOSE
// we need to reserve destination sets on the other side
Expand All @@ -46,8 +51,19 @@ private async Task<bool> MigrateKeysFromStoreAsync()
return false;
}

// Transmit keys from store
if (!await migrateTask.TransmitKeysAsync(indexesToMigrate).ConfigureAwait(false))
// Transmit keys from store (skipping VectorSet and RangeIndex keys, which are handled out-of-band)
#if NET9_0_OR_GREATER
var vectorSetLookup = indexesToMigrate.GetAlternateLookup<ReadOnlySpan<byte>>();
var rangeIndexLookup = rangeIndexKeysToMigrate.GetAlternateLookup<ReadOnlySpan<byte>>();
bool ShouldSkipKey(PinnedSpanByte key) =>
(indexesToMigrate.Count > 0 && vectorSetLookup.ContainsKey(key.ReadOnlySpan)) ||
(rangeIndexKeysToMigrate.Count > 0 && rangeIndexLookup.Contains(key.ReadOnlySpan));
#else
bool ShouldSkipKey(PinnedSpanByte key) =>
(indexesToMigrate.Count > 0 && indexesToMigrate.ContainsKey(key.ToArray())) ||
(rangeIndexKeysToMigrate.Count > 0 && rangeIndexKeysToMigrate.Contains(key.ToArray()));
#endif
if (!await migrateTask.TransmitKeysAsync(ShouldSkipKey).ConfigureAwait(false))
{
logger?.LogError("Failed transmitting keys from store");
return false;
Expand Down Expand Up @@ -122,6 +138,33 @@ private async Task<bool> MigrateKeysFromStoreAsync()
return false;
}
}

// Migrate RangeIndex keys (snapshot + chunk stream).
// Keys are already in the sketch (added by caller during key enumeration),
// so they're protected by the TRANSMITTING status. Mark for deletion so
// DeleteKeysAsync() handles them in the DELETING sketch status sequence.
if (rangeIndexKeysToMigrate.Count > 0)
{
logger?.LogWarning("Migrating {count} RangeIndex keys via KEYS path", rangeIndexKeysToMigrate.Count);

foreach (var key in rangeIndexKeysToMigrate)
{
if (!await TransmitRangeIndexAsync(migrateTask, key, RangeIndexManager.DefaultMigrationChunkSize, _cts.Token).ConfigureAwait(false))
{
logger?.LogError("Failed to migrate RangeIndex key via KEYS path");
return false;
}
}

// Mark all transmitted RI keys in the sketch for deletion by DeleteKeysAsync()
var keys = migrateTask.sketch.Keys;
for (var i = 0; i < keys.Count; i++)
{
if (rangeIndexKeysToMigrate.Contains(keys[i].Item1.ToArray()))
keys[i] = (keys[i].Item1, true);
}
}

// Final cleanup, which will also delete Vector Sets
await DeleteKeysAsync().ConfigureAwait(false);
}
Expand Down
Loading
Loading