Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9cd5f4b
sketch out quantization tasks and calls into DiskANN
kevin-montrose Apr 22, 2026
fc65149
check everyting in; handle case where quantization isn't actually req…
kevin-montrose Apr 22, 2026
b363ac5
formatting
kevin-montrose Apr 22, 2026
d36dd6e
Merge branch 'main' into users/kmontrose/vectorSetQuantization
kevin-montrose Apr 27, 2026
346aa7a
fixup tests
kevin-montrose Apr 27, 2026
3ca51f8
add VectorSetQuantizationTaskCount config to control concurrency of b…
kevin-montrose Apr 27, 2026
ebd0e89
formatting
kevin-montrose Apr 27, 2026
521cf87
add an assert to detect create_index failures in debug builds
kevin-montrose Apr 28, 2026
7a5cb48
merge main
kevin-montrose Apr 29, 2026
ccdbaaf
always initialize VectorSetManager, even if recovery is disabled
kevin-montrose Apr 29, 2026
8e0880a
Merge branch 'main' into users/kmontrose/vectorSetQuantization
kevin-montrose Apr 29, 2026
d46e2d7
Merge branch 'main' into users/kmontrose/vectorSetQuantization
kevin-montrose Apr 30, 2026
9cedf60
Merge branch 'main' into users/kmontrose/vectorSetQuantization
kevin-montrose May 1, 2026
6c75d30
Merge branch 'release/v1' into users/kmontrose/vectorSetQuantization
kevin-montrose May 12, 2026
0e2d888
Merge branch 'release/v1' into users/kmontrose/vectorSetQuantization
kevin-montrose May 18, 2026
22f9c5d
stopgap commit; horrendously broken as DiskANN version isn't availabl…
kevin-montrose May 18, 2026
3678776
Merge branch 'users/kmontrose/vectorSetQuantization' of https://githu…
kevin-montrose May 18, 2026
00b21c8
stopgap commit; update docs to match vector value handling
kevin-montrose May 18, 2026
516a875
add XNoQuant_I8 and XBin_U8
kevin-montrose May 21, 2026
97ea61e
Merge branch 'release/v1' into users/kmontrose/vectorSetQuantization
kevin-montrose May 22, 2026
203a2d0
fix off-by-one logic error in VSIM checking for extension formats
kevin-montrose May 22, 2026
a0d99d7
fix another off-by-one in VSIM extensions
kevin-montrose May 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion libs/host/Configuration/Options.cs
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,10 @@ public IEnumerable<string> LuaAllowedFunctions
[Option("vector-set-replay-task-count", Required = false, HelpText = "Configure how many replay tasks are used to replay VectorSet operations at the replica (default: 0 uses the machine CPU count)")]
public int VectorSetReplayTaskCount { get; set; }

[IntRangeValidation(0, int.MaxValue, isRequired: false)]
[Option("vector-set-quantization-task-count", Required = false, HelpText = "Configure how many quantization tasks are used to optimize Vector Set operations (default: 0 uses the machine CPU count)")]
public int VectorSetQuantizationTaskCount { get; set; }

/// <summary>
/// This property contains all arguments that were not parsed by the command line argument parser
/// </summary>
Expand Down Expand Up @@ -980,7 +984,8 @@ endpoint is IPEndPoint listenEp && clusterAnnounceEndpoint[0] is IPEndPoint anno
ClusterReplicationReestablishmentTimeout = ClusterReplicationReestablishmentTimeout,
ClusterReplicaResumeWithData = ClusterReplicaResumeWithData,
EnableVectorSetPreview = EnableVectorSetPreview,
VectorSetReplayTaskCount = VectorSetReplayTaskCount
VectorSetReplayTaskCount = VectorSetReplayTaskCount,
VectorSetQuantizationTaskCount = VectorSetQuantizationTaskCount,
};
}

Expand Down
5 changes: 4 additions & 1 deletion libs/host/defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -459,5 +459,8 @@
"EnableVectorSetPreview": false,

/* Configure how many replay tasks are used to replay VectorSet operations at the replica (default: 0 uses the machine CPU count) */
"VectorSetReplayTaskCount": 0
"VectorSetReplayTaskCount": 0,

/* Configure how many quantization tasks are used to optimize Vector Set operations (default: 0 uses the machine CPU count) */
"VectorSetQuantizationTaskCount": 0
}
4 changes: 2 additions & 2 deletions libs/server/API/GarnetApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,8 @@ public unsafe GarnetStatus VectorSetRemove(ArgSlice key, ArgSlice element)
=> storageSession.VectorSetRemove(SpanByte.FromPinnedPointer(key.ptr, key.length), SpanByte.FromPinnedPointer(element.ptr, element.length));

/// <inheritdoc />
public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap)
=> storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap);
public unsafe GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice values, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, out ReadOnlySpan<byte> errorMessage, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap)
=> storageSession.VectorSetValueSimilarity(SpanByte.FromPinnedPointer(key.ptr, key.length), valueType, values, count, delta, searchExplorationFactor, filter.ReadOnlySpan, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, out errorMessage, ref outputDistances, ref outputAttributes, out result, ref filterBitmap);

/// <inheritdoc />
public unsafe GarnetStatus VectorSetElementSimilarity(ArgSlice key, ArgSlice element, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap)
Expand Down
4 changes: 2 additions & 2 deletions libs/server/API/GarnetWatchApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -650,10 +650,10 @@ public bool ResetScratchBuffer(int offset)

#region Vector Sets
/// <inheritdoc/>
public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap)
public GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, out ReadOnlySpan<byte> errorMessage, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap)
{
garnetApi.WATCH(key, StoreType.Main);
return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, ref outputDistances, ref outputAttributes, out result, ref filterBitmap);
return garnetApi.VectorSetValueSimilarity(key, valueType, value, count, delta, searchExplorationFactor, filter, maxFilteringEffort, includeAttributes, ref outputIds, out outputIdFormat, out errorMessage, ref outputDistances, ref outputAttributes, out result, ref filterBitmap);
}

/// <inheritdoc/>
Expand Down
2 changes: 1 addition & 1 deletion libs/server/API/IGarnetApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2041,7 +2041,7 @@ public bool IterateObjectStore<TScanFunctions>(ref TScanFunctions scanFunctions,
/// Ids are encoded in <paramref name="outputIds"/> as length prefixed blobs of bytes.
/// Attributes are encoded in <paramref name="outputAttributes"/> as length prefixed blobs of bytes.
/// </summary>
GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap);
GarnetStatus VectorSetValueSimilarity(ArgSlice key, VectorValueType valueType, ArgSlice value, int count, float delta, int searchExplorationFactor, ArgSlice filter, int maxFilteringEffort, bool includeAttributes, ref SpanByteAndMemory outputIds, out VectorIdFormat outputIdFormat, out ReadOnlySpan<byte> errorMessage, ref SpanByteAndMemory outputDistances, ref SpanByteAndMemory outputAttributes, out VectorManagerResult result, ref SpanByteAndMemory filterBitmap);

/// <summary>
/// Perform a similarity search given an element already in the vector set and these parameters.
Expand Down
1 change: 1 addition & 0 deletions libs/server/Databases/MultiDatabaseManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,7 @@ public override void RecoverVectorSets()
for (var i = 0; i < activeDbIdsMapSize; i++)
{
var dbId = activeDbIdsMapSnapshot[i];
databasesMapSnapshot[dbId].VectorManager.Initialize();
databasesMapSnapshot[dbId].VectorManager.ResumePostRecovery();
}
}
Expand Down
3 changes: 3 additions & 0 deletions libs/server/Databases/SingleDatabaseManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,9 @@ private void SafeTruncateAOF(AofEntryType entryType, bool unsafeTruncateLog)
/// <inheritdoc/>
public override void RecoverVectorSets()
{
// Guarantee initialize has happened before we attempt to recover
defaultDatabase.VectorManager.Initialize();

defaultDatabase.VectorManager.ResumePostRecovery();
}

Expand Down
95 changes: 56 additions & 39 deletions libs/server/Resp/Vector/DiskANNService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,20 @@ public nint CreateIndex(
delegate* unmanaged[Cdecl]<ulong, uint, nint, nuint, nint, nint, void> readCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
out bool quantizationRequested
)
{
// TODO: This needs to be set appropriately - requires DiskANN changes
quantizationRequested = false;

unsafe
{
return NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, distanceMetric, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback);
var ret = NativeDiskANNMethods.create_index(context, dimensions, reduceDims, quantType, distanceMetric, buildExplorationFactor, numLinks, (nint)readCallback, (nint)writeCallback, (nint)deleteCallback, (nint)readModifyWriteCallback);

Debug.Assert(ret != 0, "create_index failed, returning a null pointer - this shouldn't be possible");

return ret;
}
}

Expand All @@ -51,40 +59,45 @@ public nint RecreateIndex(
delegate* unmanaged[Cdecl]<ulong, uint, nint, nuint, nint, nint, void> readCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nint, nuint, byte> writeCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, byte> deleteCallback,
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback
delegate* unmanaged[Cdecl]<ulong, nint, nuint, nuint, nint, nint, byte> readModifyWriteCallback,
out bool quantizationRequested
)
=> CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback);
=> CreateIndex(context, dimensions, reduceDims, quantType, buildExplorationFactor, numLinks, distanceMetricType, readCallback, writeCallback, deleteCallback, readModifyWriteCallback, out quantizationRequested);

public void DropIndex(ulong context, nint index)
{
NativeDiskANNMethods.drop_index(context, index);
}

public bool Insert(ulong context, nint index, ReadOnlySpan<byte> id, VectorValueType vectorType, ReadOnlySpan<byte> vector, ReadOnlySpan<byte> attributes)
public bool Insert(ulong context, nint index, ReadOnlySpan<byte> id, ReadOnlySpan<byte> vector, int vectorElementCount, ReadOnlySpan<byte> attributes, out bool needsQuantization)
{
var id_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(id));
var id_len = id.Length;

var vector_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(vector));
int vector_len;

if (vectorType == VectorValueType.FP32)
{
vector_len = vector.Length / sizeof(float);
}
else if (vectorType == VectorValueType.XB8)
{
vector_len = vector.Length;
}
else
var attributes_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(attributes));
var attributes_len = attributes.Length;

var res = NativeDiskANNMethods.insert(context, index, (nint)id_data, (nuint)id_len, (nint)vector_data, (nuint)vectorElementCount, (nint)attributes_data, (nuint)attributes_len);
if (res == NativeDiskANNMethods.DiskANNInsertResult.False)
{
throw new NotImplementedException($"{vectorType}");
needsQuantization = false;
return false;
}

var attributes_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(attributes));
var attributes_len = attributes.Length;
needsQuantization = res == NativeDiskANNMethods.DiskANNInsertResult.QuantizationRequested;
return true;
}

return NativeDiskANNMethods.insert(context, index, (nint)id_data, (nuint)id_len, vectorType, (nint)vector_data, (nuint)vector_len, (nint)attributes_data, (nuint)attributes_len) == 1;
public bool BuildQuantizationTable(ulong context, nint index)
{
return NativeDiskANNMethods.build_quant_table(context, index) == 1;
}

public void BackfillQuantizedVectors(ulong context, nint index, int taskIndex, int taskCount)
{
NativeDiskANNMethods.backfill_quant_vectors(context, index, (nuint)taskIndex, (nuint)taskCount);
}

public bool Remove(ulong context, nint index, ReadOnlySpan<byte> id)
Expand All @@ -98,8 +111,8 @@ public bool Remove(ulong context, nint index, ReadOnlySpan<byte> id)
public int SearchVector(
ulong context,
nint index,
VectorValueType vectorType,
ReadOnlySpan<byte> vector,
int vectorElementCount,
float delta,
int searchExplorationFactor,
ReadOnlySpan<byte> filter,
Expand All @@ -110,20 +123,6 @@ out nint continuation
)
{
var vector_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(vector));
int vector_len;

if (vectorType == VectorValueType.FP32)
{
vector_len = vector.Length / sizeof(float);
}
else if (vectorType == VectorValueType.XB8)
{
vector_len = vector.Length;
}
else
{
throw new NotImplementedException($"{vectorType}");
}

var filter_data = Unsafe.AsPointer(ref MemoryMarshal.GetReference(filter));
var filter_len = filter.Length;
Expand Down Expand Up @@ -174,9 +173,8 @@ out nint continuation
return NativeDiskANNMethods.search_vector(
context,
index,
vectorType,
(nint)vector_data,
(nuint)vector_len,
(nuint)vectorElementCount,
delta,
searchExplorationFactor,
(nint)filter_data,
Expand Down Expand Up @@ -306,6 +304,13 @@ public bool CheckExternalIdValid(ulong context, nint index, ReadOnlySpan<byte> e

public static partial class NativeDiskANNMethods
{
public enum DiskANNInsertResult : byte
{
False = 0,
True = 1,
QuantizationRequested = 2,
}

const string DISKANN_GARNET = "diskann_garnet";

[LibraryImport(DISKANN_GARNET)]
Expand All @@ -330,12 +335,11 @@ nint index
);

[LibraryImport(DISKANN_GARNET)]
public static partial byte insert(
public static partial DiskANNInsertResult insert(
ulong context,
nint index,
nint id_data,
nuint id_len,
VectorValueType vector_value_type,
nint vector_data,
nuint vector_len,
nint attribute_data,
Expand Down Expand Up @@ -364,7 +368,6 @@ nuint attribute_len
public static partial int search_vector(
ulong context,
nint index,
VectorValueType vector_value_type,
nint vector_data,
nuint vector_len,
float delta,
Expand Down Expand Up @@ -430,5 +433,19 @@ public static partial byte check_external_id_valid(
nint external_id,
nuint external_id_len
);

[LibraryImport(DISKANN_GARNET)]
public static partial byte build_quant_table(
ulong context,
nint index
);

[LibraryImport(DISKANN_GARNET)]
public static partial void backfill_quant_vectors(
ulong context,
nint index,
nuint task_index,
nuint task_count
);
}
}
Loading
Loading