Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions protobufs/agent/livekit_agent_turn_detector.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Copyright 2026 LiveKit, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";

package livekit.agent;

option go_package = "github.com/livekit/protocol/livekit/agent";
option csharp_namespace = "LiveKit.Proto";
option ruby_package = "LiveKit::Proto";
option optimize_for = SPEED;

import "agent/livekit_agent_session.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/timestamp.proto";

enum TdAudioEncoding {
TD_AUDIO_ENCODING_OPUS = 0;
}

message TdSessionSettings {
uint32 sample_rate = 1;
TdAudioEncoding encoding = 2;
}

message TdInferenceStats {
google.protobuf.Duration e2e_latency = 1;
google.protobuf.Duration preprocessing_duration = 2;
google.protobuf.Duration inference_duration = 3;
}

message TdError {
string message = 1;
// error code follows the HTTP status code convention
// 4xx for client errors
// 5xx for server errors
uint32 code = 2;
}

// --- Client -> Server ---

message TdSessionCreate {
TdSessionSettings settings = 1;
}

message TdInputAudio {
bytes audio = 1;
google.protobuf.Timestamp created_at = 2;
}

message TdInputChatContext {
repeated ChatMessage messages = 1;
}

message TdSessionFlush {}

message TdSessionClose {}

message TdInferenceStart {
string request_id = 1;
}

message TdInferenceStop {
string request_id = 1;
}

message TdClientMessage {
oneof message {
TdSessionCreate session_create = 1;
TdInputAudio input_audio = 2;
TdInputChatContext input_chat_context = 3;
TdSessionFlush session_flush = 4;
TdSessionClose session_close = 5;
TdInferenceStart inference_start = 6;
TdInferenceStop inference_stop = 7;
}
google.protobuf.Timestamp created_at = 8;
}

// --- Server -> Model ---

message TdInferenceRequest {
bytes audio = 1;
string assistant_text = 2;
TdAudioEncoding encoding = 3;
uint32 sample_rate = 4;
}

// --- Model -> Server ---

message TdInferenceResponse {
float probability = 1;
TdInferenceStats stats = 2;
}

// --- Server -> Client ---

message TdSessionCreated {}

message TdProcessingStats {
google.protobuf.Timestamp earliest_client_created_at = 1;
google.protobuf.Timestamp latest_client_created_at = 2;
// server-side E2E latency
google.protobuf.Duration e2e_latency = 3;
// stats including model-side E2E latency
TdInferenceStats inference_stats = 4;
}

message TdEouPrediction {
float probability = 1;
TdProcessingStats processing_stats = 2;
}

message TdInferenceStarted {}

message TdInferenceStopped {}

message TdSessionClosed {}

message TdServerMessage {
oneof message {
TdSessionCreated session_created = 1;
TdInferenceStarted inference_started = 2;
TdInferenceStopped inference_stopped = 3;
TdEouPrediction eou_prediction = 4;
TdSessionClosed session_closed = 5;
TdError error = 6;
}
optional string request_id = 7;
google.protobuf.Timestamp server_created_at = 8;
optional google.protobuf.Timestamp client_created_at = 9;
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we split this into a Request/Response message?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the RemoteSession, I have Request/Response and events
https://github.com/livekit/protocol/blob/main/protobufs/agent/livekit_agent_session.proto

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's nit tho

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess it depends on how you look at them: e.g. TdInferenceStart and TdInferenceStarted is a Request-and-Response pair, we just wrap all the requests and responses in TdClientMessage and TdServerMessage.

Loading