livekit · chenghao-mou · Apr 24, 2026 · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.changeset/many-seas-fry.md b/.changeset/many-seas-fry.md
@@ -0,0 +1,5 @@
+---
+"@fake-scope/fake-pkg": patch
+---
+
+Add turn detection protobufs
diff --git a/protobufs/agent/livekit_agent_inference.proto b/protobufs/agent/livekit_agent_inference.proto
@@ -0,0 +1,219 @@
+// Copyright 2026 LiveKit, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package livekit.agent;
+
+option go_package = "github.com/livekit/protocol/livekit/agent";
+option csharp_namespace = "LiveKit.Proto";
+option ruby_package = "LiveKit::Proto";
+option optimize_for = SPEED;
+
+import "agent/livekit_agent_session.proto";
+import "google/protobuf/duration.proto";
+import "google/protobuf/timestamp.proto";
+
+// --- Shared Types ---
+
+enum AudioEncoding {
+  AUDIO_ENCODING_PCM_S16LE = 0;
+  AUDIO_ENCODING_OPUS = 1;
+}
+
+message SessionSettings {
+  uint32 sample_rate = 1;
+  AudioEncoding encoding = 2;
+  oneof type_settings {
+    EotSettings eot_settings = 3;
+    InterruptionSettings interruption_settings = 4;
+  }
+}
+
+message Error {
+  string message = 1;
+  // error code follows the HTTP status code convention
+  // 4xx for client errors
+  // 5xx for server errors
+  uint32 code = 2;
+}
+
+// --- End of Turn (EOT) Settings ---
+
+message EotSettings {
+  google.protobuf.Duration detection_interval = 1;
+}
+
+// --- Interruption Settings ---
+
+message InterruptionSettings {
+  // detection threshold in range [0.0, 1.0]; higher values are less sensitive
+  float threshold = 1;
+  // minimum number of frames with probability greater than threshold to trigger an interruption
+  uint32 min_frames = 2;
+  google.protobuf.Duration max_audio_duration = 3;
+  google.protobuf.Duration audio_prefix_duration = 4;
+  google.protobuf.Duration detection_interval = 5;
+}
+
+// --- Client -> Server ---
+
+message SessionCreate {
+  SessionSettings settings = 1;
+}
+
+message InputAudio {
+  bytes audio = 1;
+  google.protobuf.Timestamp created_at = 2;
+  uint32 num_samples = 3;
+}
+
+message EotInputChatContext {
+  repeated ChatMessage messages = 1;
+}
+
+message SessionFlush {}
+
+message SessionClose {}
+
+message InferenceStart {
+  string request_id = 1;
+}
+
+message InferenceStop {
+  string request_id = 1;
+}
+
+// audio buffer sentinel messages
+message BufferStart {}
+
+message BufferStop {}
+
+message ClientMessage {
+  google.protobuf.Timestamp created_at = 1;
+  oneof message {
+    SessionCreate session_create = 2;
+    InputAudio input_audio = 3;
+    SessionFlush session_flush = 4;
+    SessionClose session_close = 5;
+    InferenceStart inference_start = 6;
+    InferenceStop inference_stop = 7;
+    BufferStart buffer_start = 8;
+    BufferStop buffer_stop = 9;
+    // only for end of turn
+    EotInputChatContext eot_input_chat_context = 10;
+  }
+}
+
+// --- Server -> Model ---
+
+message EotInferenceRequest {
+  bytes audio = 1;
+  string assistant_text = 2;
+  AudioEncoding encoding = 3;
+  uint32 sample_rate = 4;
+}
+
+message InterruptionInferenceRequest {
+  bytes audio = 1;
+  AudioEncoding encoding = 2;
+  uint32 sample_rate = 3;
+}
+
+message InferenceRequest {
+  oneof request {
+    EotInferenceRequest eot_inference_request = 1;
+    InterruptionInferenceRequest interruption_inference_request = 2;
+  }
+}
+
+message InferenceStats {
+  // server-side e2e latency (server input to server output)
+  google.protobuf.Duration e2e_latency = 1;
+  google.protobuf.Duration preprocessing_duration = 2;
+  google.protobuf.Duration inference_duration = 3;
+}
+
+message ProcessingStats {
+  google.protobuf.Timestamp earliest_client_created_at = 1;
+  google.protobuf.Timestamp latest_client_created_at = 2;
+  // client-side e2e latency (client send to client receive)
+  google.protobuf.Duration e2e_latency = 3;
+  InferenceStats inference_stats = 4;
+}
-message InferenceStats {
-  // server-side e2e latency (server input to server output)
-  google.protobuf.Duration e2e_latency = 1;
-  google.protobuf.Duration preprocessing_duration = 2;
-  google.protobuf.Duration inference_duration = 3;
-}
-
-message ProcessingStats {
-  google.protobuf.Timestamp earliest_client_created_at = 1;
-  google.protobuf.Timestamp latest_client_created_at = 2;
-  // client-side e2e latency (client send to client receive)
-  google.protobuf.Duration e2e_latency = 3;
-  InferenceStats inference_stats = 4;
-}
+message ProcessingStats {
+  message InferenceStats {
+    // server-side e2e latency (server input to server output)
+    google.protobuf.Duration e2e_latency = 1;
+    google.protobuf.Duration preprocessing_duration = 2;
+    google.protobuf.Duration inference_duration = 3;
+  }
+
+  google.protobuf.Timestamp earliest_client_created_at = 1;
+  google.protobuf.Timestamp latest_client_created_at = 2;
+  // client-side e2e latency (client send to client receive)
+  google.protobuf.Duration e2e_latency = 3;
+  InferenceStats inference_stats = 4;
+}
-message InferenceStats {
-  // server-side e2e latency (server input to server output)
-  google.protobuf.Duration e2e_latency = 1;
-  google.protobuf.Duration preprocessing_duration = 2;
-  google.protobuf.Duration inference_duration = 3;
-}
-
-message ProcessingStats {
-  google.protobuf.Timestamp earliest_client_created_at = 1;
-  google.protobuf.Timestamp latest_client_created_at = 2;
-  // client-side e2e latency (client send to client receive)
-  google.protobuf.Duration e2e_latency = 3;
-  InferenceStats inference_stats = 4;
-}
+message ProcessingStats {
+  message InferenceStats {
+    // server-side e2e latency (server input to server output)
+    google.protobuf.Duration e2e_latency = 1;
+    google.protobuf.Duration preprocessing_duration = 2;
+    google.protobuf.Duration inference_duration = 3;
+  }
+
+  google.protobuf.Timestamp earliest_client_created_at = 1;
+  google.protobuf.Timestamp latest_client_created_at = 2;
+  // client-side e2e latency (client send to client receive)
+  google.protobuf.Duration e2e_latency = 3;
+  InferenceStats inference_stats = 4;
+}
+
+
+message EotInferenceResponse {
+  float probability = 1;
+  InferenceStats stats = 2;
+}
+
+message InterruptionInferenceResponse {
+  bool is_interruption = 1;
+  // per frame probabilities
+  repeated float probabilities = 2;
+  InferenceStats stats = 3;
+}
+
+message InferenceResponse {
+  oneof response {
+    EotInferenceResponse eot_inference_response = 1;
+    InterruptionInferenceResponse interruption_inference_response = 2;
+  }
+}
+
+// --- Server -> Client ---
+
+message SessionCreated {}
+
+message InferenceStarted {}
+
+message InferenceStopped {}
+
+message SessionClosed {}
+
+message EotPrediction {
+  float probability = 1;
+  ProcessingStats processing_stats = 2;
+}
+
+message InterruptionPrediction {
+  bool is_interruption = 1;
+  repeated float probabilities = 2;
+  ProcessingStats processing_stats = 3;
+  google.protobuf.Timestamp created_at = 4;
+  google.protobuf.Duration prediction_duration = 5;
+}
+
+message Prediction {
+  oneof prediction {
+    EotPrediction eot_prediction = 1;
+    InterruptionPrediction interruption_prediction = 2;
+  }
+}
+
+message ServerMessage {
+  google.protobuf.Timestamp server_created_at = 1;
+  optional string request_id = 2;
+  // echoes the client-side created_at timestamp
+  optional google.protobuf.Timestamp client_created_at = 3;
+  oneof message {
+    SessionCreated session_created = 4;
+    InferenceStarted inference_started = 5;
+    InferenceStopped inference_stopped = 6;
+    SessionClosed session_closed = 7;
+    Error error = 8;
+    Prediction prediction = 9;
+  }
+}