Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
086a3cb
Add E2E test for session.todos_changed event + readSqlTodosWithDepend…
SteveSandersonMS Jun 10, 2026
266555c
Use PlanTodo / PlanTodoDependency names, drop CREATE TABLE from prompt
SteveSandersonMS Jun 10, 2026
1fcc1c0
Add E2E coverage for session.todos_changed in .NET, Go, Python, Rust
SteveSandersonMS Jun 12, 2026
031ea1e
Add Java E2E coverage for session.todos_changed (WIP)
SteveSandersonMS Jun 12, 2026
9ffba90
Regenerate RPC bindings against @github/copilot 1.0.62
SteveSandersonMS Jun 15, 2026
762a2e1
Apply prettier formatting to session_todos_changed e2e test
SteveSandersonMS Jun 15, 2026
9f3a8ee
Apply ruff format to session_todos_changed python e2e test
SteveSandersonMS Jun 15, 2026
9221e06
Apply Java spotless formatting to SessionTodosChangedTest
SteveSandersonMS Jun 15, 2026
7b3e35a
Fix .NET test to use OrderBy for net472 compatibility
SteveSandersonMS Jun 15, 2026
ab07a04
Fix Rust e2e to use SessionTodosChanged variant now that codegen reco…
SteveSandersonMS Jun 15, 2026
001f6f4
Fix model switchTo e2e assertion to match runtime behavior
SteveSandersonMS Jun 15, 2026
c07cfb8
Revert "Fix model switchTo e2e assertion to match runtime behavior"
SteveSandersonMS Jun 15, 2026
5245ddd
Make model switchTo e2e wait for the switch to take effect
SteveSandersonMS Jun 15, 2026
f58c955
Tighten model switchTo e2e to assert the switch takes effect (5s poll)
SteveSandersonMS Jun 15, 2026
3d3d439
Use typed Rust RPC API for session_todos_changed E2E test
SteveSandersonMS Jun 15, 2026
b5772fe
Use gpt-5.4 in model switchto E2E tests
SteveSandersonMS Jun 15, 2026
f1460dd
Use active event-wait pattern in todos_changed E2E tests
SteveSandersonMS Jun 15, 2026
c50207a
Add gpt-5.4 to getcurrent snapshot to avoid cross-test model cache co…
SteveSandersonMS Jun 15, 2026
0e3d686
Document why getcurrent snapshot lists gpt-5.4
SteveSandersonMS Jun 15, 2026
ebb2a90
Isolate model switchTo test in its own SDK context
SteveSandersonMS Jun 15, 2026
aa99b00
Isolate model switchTo e2e in Go/Python/.NET; tighten Rust assertions
SteveSandersonMS Jun 15, 2026
fa47e5b
Rename Python switch_to test to switchto so it finds the shared snapshot
SteveSandersonMS Jun 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions dotnet/test/E2E/RpcSessionStateE2ETests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,31 @@ public async Task Should_Call_Session_Rpc_Model_GetCurrent()
[Fact]
public async Task Should_Call_Session_Rpc_Model_SwitchTo()
{
await using var session = await CreateSessionAsync(new SessionConfig { Model = "claude-sonnet-4.5" });
// The runtime caches /models per (auth, base_url) for 30 minutes (see
// capi_client.rs LIST_MODELS_CACHE). Tests in this class share one CLI
// subprocess and proxy URL via E2ETestFixture, so the first snapshot's
// models list is reused by every later test. SwitchTo needs gpt-5.4 in
// the cache; rather than poisoning every other snapshot we spin up an
// isolated context with its own proxy → its own (auth, base_url) cache
// key.
await using var isolatedCtx = await E2ETestContext.CreateAsync();
await isolatedCtx.ConfigureForTestAsync("rpc_session_state", nameof(Should_Call_Session_Rpc_Model_SwitchTo));
var isolatedClient = isolatedCtx.CreateClient();

await using var session = await isolatedClient.CreateSessionAsync(new SessionConfig
{
Model = "claude-sonnet-4.5",
OnPermissionRequest = PermissionHandler.ApproveAll,
});

var before = await session.Rpc.Model.GetCurrentAsync();
Assert.Equal("claude-sonnet-4.5", before.ModelId);

var result = await session.Rpc.Model.SwitchToAsync(modelId: "gpt-4.1", reasoningEffort: "high");
var after = await session.Rpc.Model.GetCurrentAsync();
var result = await session.Rpc.Model.SwitchToAsync(modelId: "gpt-5.4", reasoningEffort: "high");
Assert.Equal("gpt-5.4", result.ModelId);

Assert.Equal("gpt-4.1", result.ModelId);
Assert.True(after.ModelId is "gpt-4.1" || after.ModelId == before.ModelId, $"Unexpected current model after switch: {after.ModelId}");
var after = await session.Rpc.Model.GetCurrentAsync();
Assert.Equal("gpt-5.4", after.ModelId);
}

[Fact]
Expand Down
55 changes: 55 additions & 0 deletions dotnet/test/E2E/SessionTodosChangedE2ETests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
*--------------------------------------------------------------------------------------------*/

using GitHub.Copilot.Rpc;
using GitHub.Copilot.Test.Harness;
using Xunit;
using Xunit.Abstractions;

namespace GitHub.Copilot.Test.E2E;

public class SessionTodosChangedE2ETests(E2ETestFixture fixture, ITestOutputHelper output)
: E2ETestBase(fixture, "session_todos_changed", output)
{
private static readonly string[] ExpectedTodoIds = ["alpha", "beta"];

[Fact]
public async Task Fires_Session_Todos_Changed_And_Exposes_Rows_And_Dependencies()
{
await using var session = await CreateSessionAsync(new SessionConfig
{
OnPermissionRequest = PermissionHandler.ApproveAll,
});

var todosChangedTask = TestHelper.GetNextEventOfTypeAsync<SessionTodosChangedEvent>(
session,
TimeSpan.FromSeconds(30));

await session.SendAndWaitAsync(new MessageOptions
{
Prompt =
"Use the sql tool to execute exactly these statements, in order, with no extra rows:\n" +
"1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');\n" +
"2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');\n" +
"3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\n" +
"Then stop. Do not insert any other rows or create any other tables.",
});

await todosChangedTask;

var result = await session.Rpc.Plan.ReadSqlTodosWithDependenciesAsync();

var ids = result.Rows
.Select(row => row.Id)
.OfType<string>()
.OrderBy(id => id, StringComparer.Ordinal)
.ToArray();

Assert.Equal(ExpectedTodoIds, ids);

Assert.Contains(result.Dependencies, dependency =>
dependency.TodoId == "beta" &&
dependency.DependsOn == "alpha");
}
}
26 changes: 20 additions & 6 deletions go/internal/e2e/rpc_session_state_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,22 @@ func TestRPCSessionStateE2E(t *testing.T) {
}
})

// The runtime caches /models per (auth, base_url) for 30 minutes (see
// capi_client.rs LIST_MODELS_CACHE). Within this test function all subtests
// share one CLI subprocess and proxy URL, so the first subtest's snapshot
// models list is reused by every later one. SwitchTo needs gpt-5.4 in the
// cache; rather than poison every other snapshot we give this subtest its
// own dedicated client + proxy → its own cache entry.
t.Run("should call session rpc model switchTo", func(t *testing.T) {
session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
switchCtx := testharness.NewTestContext(t)
switchClient := switchCtx.NewClient()
t.Cleanup(func() { switchClient.ForceStop() })
if err := switchClient.Start(t.Context()); err != nil {
t.Fatalf("Failed to start switch client: %v", err)
}
switchCtx.ConfigureForTest(t)

session, err := switchClient.CreateSession(t.Context(), &copilot.SessionConfig{
Model: "claude-sonnet-4.5",
OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
})
Expand All @@ -61,21 +75,21 @@ func TestRPCSessionStateE2E(t *testing.T) {

reasoningEffort := "high"
result, err := session.RPC.Model.SwitchTo(t.Context(), &rpc.ModelSwitchToRequest{
ModelID: "gpt-4.1",
ModelID: "gpt-5.4",
ReasoningEffort: &reasoningEffort,
})
if err != nil {
t.Fatalf("Model.SwitchTo failed: %v", err)
}
if result.ModelID == nil || *result.ModelID != "gpt-4.1" {
t.Fatalf("Expected switch result model gpt-4.1, got %+v", result)
if result.ModelID == nil || *result.ModelID != "gpt-5.4" {
t.Fatalf("Expected switch result model gpt-5.4, got %+v", result)
}
after, err := session.RPC.Model.GetCurrent(t.Context())
if err != nil {
t.Fatalf("Model.GetCurrent after switch failed: %v", err)
}
if after.ModelID == nil || (*after.ModelID != "gpt-4.1" && *after.ModelID != *before.ModelID) {
t.Fatalf("Unexpected current model after switch; before=%q after=%+v", *before.ModelID, after)
if after.ModelID == nil || *after.ModelID != "gpt-5.4" {
t.Fatalf("Model.GetCurrent did not reflect SwitchTo; before=%q after=%+v", *before.ModelID, after)
}
})

Expand Down
79 changes: 79 additions & 0 deletions go/internal/e2e/session_todos_changed_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package e2e

import (
"context"
"slices"
"sort"
"testing"
"time"

copilot "github.com/github/copilot-sdk/go"
"github.com/github/copilot-sdk/go/internal/e2e/testharness"
)

func TestFiresSessionTodosChangedAndExposesRowsAndDependencies(t *testing.T) {
ctx := testharness.NewTestContext(t)
client := ctx.NewClient()
t.Cleanup(func() { client.ForceStop() })

t.Run("fires session.todos_changed and exposes rows and dependencies", func(t *testing.T) {
ctx.ConfigureForTest(t)

session, err := client.CreateSession(t.Context(), &copilot.SessionConfig{
OnPermissionRequest: copilot.PermissionHandler.ApproveAll,
})
if err != nil {
t.Fatalf("Failed to create session: %v", err)
}
defer session.Disconnect()

awaitTodosChanged := waitForMatchingEvent(
session,
copilot.SessionEventType("session.todos_changed"),
func(copilot.SessionEvent) bool { return true },
"session.todos_changed event",
)

sendCtx, cancel := context.WithTimeout(t.Context(), 120*time.Second)
defer cancel()
_, err = session.SendAndWait(sendCtx, copilot.MessageOptions{
Prompt: "Use the sql tool to execute exactly these statements, in order, with no extra rows:\n" +
"1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');\n" +
"2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');\n" +
"3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\n" +
"Then stop. Do not insert any other rows or create any other tables.",
})
if err != nil {
t.Fatalf("Failed to send message: %v", err)
}

awaitEvent(t, awaitTodosChanged)

result, err := session.RPC.Plan.ReadSqlTodosWithDependencies(t.Context())
if err != nil {
t.Fatalf("Plan.ReadSqlTodosWithDependencies failed: %v", err)
}

var ids []string
for _, row := range result.Rows {
if row.ID != nil && *row.ID != "" {
ids = append(ids, *row.ID)
}
}
sort.Strings(ids)
if !slices.Equal(ids, []string{"alpha", "beta"}) {
t.Fatalf("Expected todo ids [alpha beta], got %v", ids)
}

foundDependency := false
for _, dependency := range result.Dependencies {
if dependency.TodoID == "beta" && dependency.DependsOn == "alpha" {
foundDependency = true
break
}
}
if !foundDependency {
t.Fatalf("Expected dependency beta -> alpha, got %+v", result.Dependencies)
}
})
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
*--------------------------------------------------------------------------------------------*/

package com.github.copilot;

import static org.junit.jupiter.api.Assertions.*;

import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;

import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

import com.github.copilot.generated.SessionTodosChangedEvent;
import com.github.copilot.generated.rpc.PlanSqlTodoDependency;
import com.github.copilot.rpc.MessageOptions;
import com.github.copilot.rpc.PermissionHandler;
import com.github.copilot.rpc.SessionConfig;

public class SessionTodosChangedTest {

private static E2ETestContext ctx;

@BeforeAll
static void setup() throws Exception {
ctx = E2ETestContext.create();
}

@AfterAll
static void teardown() throws Exception {
if (ctx != null) {
ctx.close();
}
}

@Test
void firesSessionTodosChangedAndExposesRowsAndDependencies() throws Exception {
ctx.configureForTest("session_todos_changed", "fires_session_todos_changed_and_exposes_rows_and_dependencies");

try (CopilotClient client = ctx.createClient()) {
CopilotSession session = client
.createSession(new SessionConfig().setOnPermissionRequest(PermissionHandler.APPROVE_ALL)).get();

CompletableFuture<SessionTodosChangedEvent> todosChanged = new CompletableFuture<>();
session.on(event -> {
if (event instanceof SessionTodosChangedEvent todosEvent && !todosChanged.isDone()) {
todosChanged.complete(todosEvent);
}
});

session.sendAndWait(new MessageOptions()
.setPrompt("Use the sql tool to execute exactly these statements, in order, with no extra rows:\n"
+ "1. INSERT INTO todos (id, title, status) VALUES ('alpha', 'First todo', 'pending');\n"
+ "2. INSERT INTO todos (id, title, status) VALUES ('beta', 'Second todo', 'done');\n"
+ "3. INSERT INTO todo_deps (todo_id, depends_on) VALUES ('beta', 'alpha');\n"
Comment on lines +55 to +57

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible contributor to the known Java test failure: no post-send event wait

The session.todos_changed event can fire asynchronously after sendAndWait returns (e.g., once the runtime finishes persisting state). The .NET, Go, and Rust tests guard against this by setting up an explicit awaitable before the send and then awaiting it after with a timeout:

// .NET pattern
var todosChangedTask = TestHelper.GetNextEventOfTypeAsync<SessionTodosChangedEvent>(session, TimeSpan.FromSeconds(30));
await session.SendAndWaitAsync(...);
await todosChangedTask; // waits for event even if it fires after SendAndWait returns

This test checks events immediately after sendAndWait with no additional wait. If the Java SDK's session.todos_changed notification arrives slightly after the response is received (which the PR description hints at), the assertTrue will fail even though the event does eventually arrive. Consider using a polling-wait or a CompletableFuture-based listener pattern similar to GetNextEventOfTypeAsync in .NET.

+ "Then stop. Do not insert any other rows or create any other tables."))
.get(120, TimeUnit.SECONDS);

assertNotNull(todosChanged.get(15, TimeUnit.SECONDS),
"Should have received at least one session.todos_changed event");

var result = session.getRpc().plan.readSqlTodosWithDependencies().get(15, TimeUnit.SECONDS);
assertEquals(2, result.rows().size());
var ids = result.rows().stream().map(row -> row.id()).filter(id -> id != null).sorted().toList();

assertEquals(java.util.List.of("alpha", "beta"), ids);
assertTrue(result.dependencies().stream().anyMatch(SessionTodosChangedTest::isBetaDependsOnAlpha),
"Should contain beta -> alpha dependency");

session.close();
}
}

private static boolean isBetaDependsOnAlpha(PlanSqlTodoDependency dependency) {
return "beta".equals(dependency.todoId()) && "alpha".equals(dependency.dependsOn());
}
}
44 changes: 29 additions & 15 deletions nodejs/test/e2e/rpc_session_state.e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,25 +49,39 @@ describe("Session-scoped RPC", async () => {
await session.disconnect();
});

it("should call session rpc model switchto", async () => {
const session = await client.createSession({
onPermissionRequest: approveAll,
model: "claude-sonnet-4.5",
});
// The runtime caches the /models response per (auth, base_url) for 30
// minutes (see capi_client.rs LIST_MODELS_CACHE), so within a single
// describe — where all tests share one CLI subprocess and proxy URL —
// the cache is primed by whichever test creates a session first. That
// makes any test which calls switchTo to a model not present in the
// first snapshot's models list fail silently (the runtime accepts the
// switch synchronously, then tool revalidation refetches the cached
// list, doesn't see the model, and reverts _selectedModel). Wrapping
// switchTo in its own describe gives it a dedicated subprocess + proxy
// → its own cache entry, so its snapshot's models list is authoritative.
describe("model switchTo (isolated to avoid models cache contamination)", async () => {
const { copilotClient: switchClient } = await createSdkTestContext();

it("should call session rpc model switchto", async () => {
const session = await switchClient.createSession({
onPermissionRequest: approveAll,
model: "claude-sonnet-4.5",
});

const before = await session.rpc.model.getCurrent();
expect(before.modelId).toBeTruthy();
const before = await session.rpc.model.getCurrent();
expect(before.modelId).toBeTruthy();

const result = await session.rpc.model.switchTo({
modelId: "gpt-4.1",
reasoningEffort: "high",
});
const after = await session.rpc.model.getCurrent();
const result = await session.rpc.model.switchTo({
modelId: "gpt-5.4",
reasoningEffort: "high",
});
const after = await session.rpc.model.getCurrent();

expect(result.modelId).toBe("gpt-4.1");
expect(after.modelId).toBe(before.modelId);
expect(result.modelId).toBe("gpt-5.4");
expect(after.modelId).toBe("gpt-5.4");

await session.disconnect();
await session.disconnect();
});
});

it("should shutdown session with routine type", async () => {
Expand Down
Loading
Loading