Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions apps/server/src/orchestration/Layers/ProviderCommandReactor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import { resolveThreadWorkspaceCwd } from "../../checkpointing/Utils.ts";
import { increment, orchestrationEventsProcessedTotal } from "../../observability/Metrics.ts";
import { ProviderAdapterRequestError } from "../../provider/Errors.ts";
import type { ProviderServiceError } from "../../provider/Errors.ts";
import { classifyProviderServiceFailure } from "../../provider/providerFallback.ts";
import { TextGeneration } from "../../textGeneration/TextGeneration.ts";
import { ProviderService } from "../../provider/Services/ProviderService.ts";
import { ProviderRegistry } from "../../provider/Services/ProviderRegistry.ts";
Expand All @@ -41,6 +42,7 @@ import {
import { ServerSettingsService } from "../../serverSettings.ts";
import { VcsStatusBroadcaster } from "../../vcs/VcsStatusBroadcaster.ts";
import { GitWorkflowService } from "../../git/GitWorkflowService.ts";
import { attemptProviderFallback } from "../providerFallbackWorkflow.ts";
const isProviderAdapterRequestError = Schema.is(ProviderAdapterRequestError);
const isProviderDriverKind = Schema.is(ProviderDriverKind);

Expand Down Expand Up @@ -825,8 +827,46 @@ const make = Effect.gen(function* () {
);
};

const attemptFallbackBeforeReporting = Effect.fnUntraced(function* (
cause: Cause.Cause<unknown>,
) {
const failure = classifyProviderServiceFailure(cause);
if (!failure) return false;
const modelSelection = event.payload.modelSelection ?? thread.modelSelection;
const fallback = yield* attemptProviderFallback({
threadId: event.payload.threadId,
currentInstanceId: modelSelection.instanceId,
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
modelSelection,
runtimeMode: event.payload.runtimeMode,
sendTurnInput: {
threadId: event.payload.threadId,
...(toNonEmptyProviderInput(message.text)
? { input: toNonEmptyProviderInput(message.text) }
: {}),
...(message.attachments && message.attachments.length > 0
? { attachments: message.attachments }
: {}),
modelSelection,
interactionMode: event.payload.interactionMode,
},
failure,
requireCompatibleContinuation: !isFirstUserMessageTurn,
createdAt: event.payload.createdAt,
});
Comment thread
cursor[bot] marked this conversation as resolved.
return fallback.switched;
});

const recoverTurnStartFailure = (cause: Cause.Cause<unknown>) =>
handleTurnStartFailure(cause).pipe(
attemptFallbackBeforeReporting(cause).pipe(
Effect.catchCause((fallbackCause) =>
Effect.logWarning("provider command reactor fallback attempt failed", {
eventType: event.type,
threadId: event.payload.threadId,
cause: Cause.pretty(fallbackCause),
originalCause: Cause.pretty(cause),
}).pipe(Effect.as(false)),
),
Effect.flatMap((switched) => (switched ? Effect.void : handleTurnStartFailure(cause))),
Effect.catchCause((recoveryCause) =>
Effect.logWarning("provider command reactor failed to recover turn start failure", {
eventType: event.type,
Expand All @@ -848,7 +888,7 @@ const make = Effect.gen(function* () {
createdAt: event.payload.createdAt,
}).pipe(
Effect.map(Option.some),
Effect.catchCause((cause) => handleTurnStartFailure(cause).pipe(Effect.as(Option.none()))),
Effect.catchCause((cause) => recoverTurnStartFailure(cause).pipe(Effect.as(Option.none()))),
);

if (Option.isNone(sendTurnRequest)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import {
ProviderService,
type ProviderServiceShape,
} from "../../provider/Services/ProviderService.ts";
import { makeProviderRegistryLayer } from "../../provider/testUtils/providerRegistryMock.ts";
import * as RepositoryIdentityResolver from "../../project/RepositoryIdentityResolver.ts";
import { OrchestrationEngineLive } from "./OrchestrationEngine.ts";
import { OrchestrationProjectionPipelineLive } from "./ProjectionPipeline.ts";
Expand Down Expand Up @@ -238,6 +239,7 @@ describe("ProviderRuntimeIngestion", () => {
Layer.provideMerge(projectionSnapshotLayer),
Layer.provideMerge(SqlitePersistenceMemory),
Layer.provideMerge(Layer.succeed(ProviderService, provider.service)),
Layer.provideMerge(makeProviderRegistryLayer([])),
Layer.provideMerge(makeTestServerSettingsLayer(options?.serverSettings)),
Layer.provideMerge(ServerConfig.layerTest(process.cwd(), process.cwd())),
Layer.provideMerge(NodeServices.layer),
Expand Down
60 changes: 60 additions & 0 deletions apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import * as Stream from "effect/Stream";
import { makeDrainableWorker } from "@t3tools/shared/DrainableWorker";

import { ProviderService } from "../../provider/Services/ProviderService.ts";
import { classifyProviderRuntimeFailure } from "../../provider/providerFallback.ts";
import { ProjectionTurnRepository } from "../../persistence/Services/ProjectionTurns.ts";
import { ProjectionTurnRepositoryLive } from "../../persistence/Layers/ProjectionTurns.ts";
import { isGitRepository } from "../../git/Utils.ts";
Expand All @@ -38,6 +39,7 @@ import {
type ProviderRuntimeIngestionShape,
} from "../Services/ProviderRuntimeIngestion.ts";
import { ServerSettingsService } from "../../serverSettings.ts";
import { attemptProviderFallback } from "../providerFallbackWorkflow.ts";

const providerTurnKey = (threadId: ThreadId, turnId: TurnId) => `${threadId}:${turnId}`;

Expand All @@ -54,6 +56,8 @@ const BUFFERED_MESSAGE_TEXT_BY_MESSAGE_ID_TTL = Duration.minutes(120);
const BUFFERED_PROPOSED_PLAN_BY_ID_CACHE_CAPACITY = 10_000;
const BUFFERED_PROPOSED_PLAN_BY_ID_TTL = Duration.minutes(120);
const MAX_BUFFERED_ASSISTANT_CHARS = 24_000;
const HANDLED_FALLBACK_EVENT_CACHE_CAPACITY = 10_000;
const HANDLED_FALLBACK_EVENT_TTL = Duration.minutes(120);
const STRICT_PROVIDER_LIFECYCLE_GUARD = process.env.T3CODE_STRICT_PROVIDER_LIFECYCLE_GUARD !== "0";

type TurnStartRequestedDomainEvent = Extract<
Expand Down Expand Up @@ -666,6 +670,12 @@ const make = Effect.gen(function* () {
lookup: () => Effect.succeed({ text: "", createdAt: "" }),
});

const handledFallbackEvents = yield* Cache.make<string, true>({
capacity: HANDLED_FALLBACK_EVENT_CACHE_CAPACITY,
timeToLive: HANDLED_FALLBACK_EVENT_TTL,
lookup: () => Effect.succeed(true),
});

const resolveThreadDetail = Effect.fn("resolveThreadDetail")(function* (threadId: ThreadId) {
return yield* projectionSnapshotQuery
.getThreadDetailById(threadId)
Expand Down Expand Up @@ -1208,6 +1218,17 @@ const make = Effect.gen(function* () {
const thread = yield* resolveThreadShell(event.threadId);
if (!thread) return;

// Trial sessions can emit before a fallback handoff commits or after it
// rolls back. Ignore events from an instance that no longer owns the
// thread so stale trial output cannot overwrite the restored binding.
if (
event.providerInstanceId !== undefined &&
thread.session?.providerInstanceId !== undefined &&
event.providerInstanceId !== thread.session.providerInstanceId
) {
return;
Comment thread
cursor[bot] marked this conversation as resolved.
}

let loadedThreadDetail: OrchestrationThread | null | undefined;
const getLoadedThreadDetail = () =>
Effect.gen(function* () {
Expand All @@ -1222,6 +1243,45 @@ const make = Effect.gen(function* () {
const eventTurnId = toTurnId(event.turnId);
const activeTurnId = thread.session?.activeTurnId ?? null;

const fallbackFailure = classifyProviderRuntimeFailure(event);
const fallbackInstanceId = event.providerInstanceId ?? thread.session?.providerInstanceId;
if (
fallbackFailure &&
fallbackInstanceId !== undefined &&
(activeTurnId !== null || eventTurnId !== undefined)
) {
const fallbackKey = `${thread.id}:${fallbackInstanceId}:${eventTurnId ?? activeTurnId ?? event.eventId}`;
const handled = yield* Cache.getOption(handledFallbackEvents, fallbackKey);
if (Option.isNone(handled)) {
yield* Cache.set(handledFallbackEvents, fallbackKey, true);
const fallback = yield* attemptProviderFallback({
threadId: thread.id,
currentInstanceId: fallbackInstanceId,
modelSelection: thread.modelSelection,
runtimeMode: thread.runtimeMode,
sendTurnInput: {
threadId: thread.id,
input: "Continue.",
modelSelection: thread.modelSelection,
interactionMode: thread.interactionMode,
},
failure: fallbackFailure,
requireCompatibleContinuation: true,
createdAt: now,
}).pipe(
Effect.catchCause((cause) =>
Effect.logWarning("provider runtime fallback attempt failed", {
eventId: event.eventId,
eventType: event.type,
threadId: thread.id,
cause: Cause.pretty(cause),
}).pipe(Effect.as({ switched: false, skipped: [] })),
),
);
if (fallback.switched) return;
}
}

const conflictsWithActiveTurn =
activeTurnId !== null && eventTurnId !== undefined && !sameId(activeTurnId, eventTurnId);
const missingTurnForActiveTurn = activeTurnId !== null && eventTurnId === undefined;
Expand Down
Loading
Loading