Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 239 additions & 0 deletions apps/daemon/src/codex-image-auth.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
// Pre-flight auth detection + failure classification for the codex-cli image
// provider (see the `Provider: codex-cli` block in media.ts).
//
// The built-in `image_gen` tool is a ChatGPT-hosted tool: it only works when
// the local Codex CLI is signed in with a genuine ChatGPT plan (OAuth tokens)
// AND routes through OpenAI's default model provider. An API key, a
// programmatic credential, or a third-party `model_provider` override cannot
// reach it. Rather than spawn a full `codex exec` turn and let the user wait
// ~minutes only to get a vague "no image" error, we read the same files the
// Codex CLI reads (`$CODEX_HOME/auth.json` + `config.toml`) and fail fast with
// a precise, actionable message.
//
// Schema ground truth: openai/codex `codex-rs/login/src/auth/storage.rs`
// (AuthDotJson) + `codex-rs/app-server-protocol/src/protocol/common.rs`
// (AuthMode, `#[serde(rename_all = "lowercase")]` → "apikey"/"chatgpt", plus
// "agentIdentity"/"personalAccessToken"/… for programmatic modes). `auth_mode`
// is optional in the file, so token/key presence is the robust signal and the
// field is only a corroborating hint — this mirrors codex's own
// `resolved_mode()` (OAuth tokens win, then OPENAI_API_KEY).
import { readFile } from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';

export type CodexImageAuthReason =
| 'not-signed-in'
| 'api-key'
| 'programmatic'
| 'third-party-provider';

export type CodexImageAuthVerdict =
| { ok: true }
| { ok: false; reason: CodexImageAuthReason; detail?: string };

/** Resolve CODEX_HOME exactly as the Codex CLI does: $CODEX_HOME or ~/.codex. */
export function resolveCodexHome(env: NodeJS.ProcessEnv = process.env): string {
const fromEnv = env.CODEX_HOME?.trim();
return fromEnv && fromEnv.length > 0 ? fromEnv : path.join(os.homedir(), '.codex');
}

function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === 'object' && value !== null && !Array.isArray(value);
}

function nestedString(obj: unknown, keys: string[]): string {
let cur: unknown = obj;
for (const key of keys) {
if (!isRecord(cur)) return '';
cur = cur[key];
}
return typeof cur === 'string' && cur.trim() ? cur.trim() : '';
}

/**
* The first top-level `model_provider = "..."` assignment in a config.toml.
*
* TOML top-level keys must precede any `[table]` header, so we scan until the
* first header and stop. We deliberately do NOT resolve `profile`-scoped
* overrides (`[profiles.x] model_provider = …`): that's a rare, advanced
* config, and resolving it correctly needs a real TOML parser we don't ship.
* A profile-scoped third-party provider therefore slips past this check and is
* caught later by the runtime "image_gen unavailable" path — documented here
* so the gap is visible rather than silent.
*/
export function topLevelModelProvider(configToml: string | null): string {
if (!configToml) return '';
for (const rawLine of configToml.split(/\r?\n/)) {
const line = rawLine.trim();
if (!line || line.startsWith('#')) continue;
if (line.startsWith('[')) break; // entered a table; top-level region ended
const match = /^model_provider\s*=\s*["']([^"']+)["']/.exec(line);
if (match?.[1]) return match[1].trim();
}
return '';
}

/**
* Pure classifier: given parsed auth.json and raw config.toml text, decide
* whether the local Codex CLI can drive the built-in image_gen tool.
*/
export function classifyCodexImageAuth(
authJson: unknown,
configToml: string | null,
): CodexImageAuthVerdict {
const auth = isRecord(authJson) ? authJson : null;
const accessToken = nestedString(auth, ['tokens', 'access_token']);
const apiKey = nestedString(auth, ['OPENAI_API_KEY']);
const authMode = nestedString(auth, ['auth_mode']);

// 1. Base auth mode (mirror codex resolved_mode: OAuth tokens take priority).
if (accessToken.length === 0) {
if (apiKey.length > 0 || authMode === 'apikey') {
return { ok: false, reason: 'api-key' };
}
if (authMode && authMode !== 'chatgpt') {
// agentIdentity / personalAccessToken / bedrock* — programmatic creds
// that don't carry the ChatGPT-hosted image_gen tool.
return { ok: false, reason: 'programmatic', detail: authMode };
}
return { ok: false, reason: 'not-signed-in' };
}

// 2. ChatGPT tokens present, but a third-party model_provider routes the turn
// away from OpenAI's hosted image_gen tool.
const provider = topLevelModelProvider(configToml);
if (provider && provider.toLowerCase() !== 'openai') {
return { ok: false, reason: 'third-party-provider', detail: provider };
}

return { ok: true };
}

async function readJsonIfPresent(file: string): Promise<unknown> {
try {
return JSON.parse(await readFile(file, 'utf8'));
} catch {
// Missing or malformed auth cache: treat as "no credential" rather than
// crashing the render — the classifier maps null → not-signed-in.
return null;
}
}

async function readTextIfPresent(file: string): Promise<string | null> {
try {
return await readFile(file, 'utf8');
} catch {
return null;
}
}

/** Read $CODEX_HOME/auth.json + config.toml and classify image_gen auth. */
export async function inspectCodexImageAuth(
env: NodeJS.ProcessEnv = process.env,
): Promise<CodexImageAuthVerdict> {
const home = resolveCodexHome(env);
const [authJson, configToml] = await Promise.all([
readJsonIfPresent(path.join(home, 'auth.json')),
readTextIfPresent(path.join(home, 'config.toml')),
]);
return classifyCodexImageAuth(authJson, configToml);
}

/** Turn a non-ok verdict into a precise, actionable error message. */
export function codexImageAuthErrorMessage(
verdict: Extract<CodexImageAuthVerdict, { ok: false }>,
): string {
switch (verdict.reason) {
case 'not-signed-in':
return (
'Codex CLI is not signed in, so its built-in image_gen tool is '
+ 'unavailable. codex-image-gen needs an official ChatGPT login '
+ '(Plus, Pro, Business, Team or Edu). Run `codex login`, choose '
+ '"Sign in with ChatGPT", then retry.'
);
case 'api-key':
return (
'Codex CLI is signed in with an API key, but the built-in image_gen '
+ 'tool is only available on a ChatGPT plan — an API key bills the '
+ 'Images API and exposes no image_gen tool here. Run `codex login` '
+ 'and choose "Sign in with ChatGPT" (Plus/Pro/Business/Team) to use '
+ 'codex-image-gen.'
);
case 'programmatic':
return (
`Codex CLI is using ${verdict.detail ?? 'a programmatic'} auth, which `
+ 'cannot reach the ChatGPT-hosted image_gen tool. Sign in with a '
+ 'ChatGPT plan via `codex login` ("Sign in with ChatGPT") to use '
+ 'codex-image-gen.'
);
case 'third-party-provider':
return (
`Codex CLI is routed through a third-party model provider `
+ `(model_provider = "${verdict.detail}"), which does not expose `
+ "OpenAI's hosted image_gen tool. Use the default OpenAI provider on "
+ 'a ChatGPT plan: remove the model_provider override in '
+ '$CODEX_HOME/config.toml (or select a profile without it), then retry.'
);
}
}

// ── Runtime failure classification ──────────────────────────────────────────
// After codex exec exits non-zero we have only its stderr + agent text. Split
// the one generic "exited" bucket into the three operator-actionable classes
// fancyboi asked for: auth (session died mid-run), quota (ChatGPT usage limit),
// and transient (network/crash/everything else → safe to retry).

export type CodexImageFailureKind = 'auth' | 'quota' | 'transient';

// Keep these high-precision: a TRANSIENT blip mislabelled as quota tells the
// user to stop retrying for ~5h, and as auth tells them to re-login — both
// worse than the safe "just retry" default. Numeric HTTP codes are guarded
// against word/hyphen neighbours so an embedded request id (`req-401-abc`)
// can't trip them, and we deliberately omit a bare "try again later" (a
// classic transient phrase) from the quota set.
const AUTH_SIGNALS =
/(?<![\w-])(401|403)(?![\w-])|\b(unauthorized|unauthenticated)\b|not (logged|signed) in|invalid[\s_-]*(api[\s_-]*key|token)|expired[\s_-]*(token|session|credential)|codex login|sign in (again|with)/i;
const QUOTA_SIGNALS =
/(?<![\w-])429(?![\w-])|\bquota\b|usage limit|rate[\s_-]*limit|too many requests|insufficient[\s_-]*quota|hit your [\w\s]*limit/i;

/** Classify a codex image_gen failure from its combined output text. */
export function classifyCodexImageFailure(output: string): CodexImageFailureKind {
const text = output || '';
if (AUTH_SIGNALS.test(text)) return 'auth';
if (QUOTA_SIGNALS.test(text)) return 'quota';
return 'transient';
}

/**
* Build the failure message for a non-zero codex exit. `reason` is the
* `exit N` / `signal X` summary; `tail` is the trailing codex output we attach
* for context; `output` is the full combined stderr+agent text we classify on.
*/
export function codexImageFailureMessage(opts: {
reason: string;
tail: string;
output: string;
}): string {
const { reason, tail, output } = opts;
const kind = classifyCodexImageFailure(output);
const said = tail ? `\nCodex said:\n${tail}` : '';
switch (kind) {
case 'auth':
return (
'Codex rejected the image_gen request as unauthenticated — your '
+ 'ChatGPT session may have expired mid-run. Run `codex login` (sign in '
+ `with your ChatGPT plan) and retry.${said}`
);
case 'quota':
return (
"Codex hit your ChatGPT plan's usage limit while generating the image. "
+ 'image_gen shares your ChatGPT quota, which resets on a rolling '
+ 'window (~every 5 hours). Wait for the reset shown above or upgrade '
+ `your plan, then retry.${said}`
);
case 'transient':
// Preserve the original wording so a generic blip reads the same as
// before and stays obviously retry-able.
return `codex image_gen exited ${reason}${tail ? `\n${tail}` : ''}`;
}
}
11 changes: 11 additions & 0 deletions apps/daemon/src/media-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ export type MediaModel = {

export const MEDIA_PROVIDERS: MediaProvider[] = [
{ id: 'openai', label: 'OpenAI', hint: 'gpt-image-2 / dall-e-3', integrated: true, defaultBaseUrl: 'https://api.openai.com/v1' },
// Local Codex CLI built-in image_gen — drives the operator's own
// already-signed-in Codex CLI (ChatGPT subscription), so it needs no
// OPENAI_API_KEY. Like `hyperframes`, it's a local renderer: no settings
// card, no stored credentials.
{ id: 'codex-cli', label: 'Codex CLI', hint: 'Local Codex CLI image_gen — uses ChatGPT subscription, no API key', integrated: true, credentialsRequired: false, settingsVisible: false },
{ id: 'volcengine', label: 'Volcengine Ark (Doubao)', hint: 'Seedance 2.0 / Seedream', integrated: true, defaultBaseUrl: 'https://ark.cn-beijing.volces.com/api/v3' },
{ id: 'grok', label: 'xAI Grok Imagine', hint: 'grok-imagine — image + video with native audio', integrated: true, defaultBaseUrl: 'https://api.x.ai/v1' },
{ id: 'hyperframes', label: 'HyperFrames', hint: 'Local HTML -> MP4 renderer', integrated: true, credentialsRequired: false, settingsVisible: false },
Expand Down Expand Up @@ -91,6 +96,12 @@ export const IMAGE_MODELS: MediaModel[] = [
{ id: 'dall-e-3', label: 'dall-e-3', hint: 'OpenAI · classic', provider: 'openai', caps: ['t2i'] },
{ id: 'dall-e-2', label: 'dall-e-2', hint: 'OpenAI · legacy', provider: 'openai', caps: ['t2i'] },

// No-API-key path to ChatGPT-grade image generation: the daemon spawns a
// headless `codex exec` turn and lets the operator's signed-in Codex CLI
// run its built-in image_gen tool. Works for ANY coding agent (Claude
// Code, Gemini, …), not just when Codex is the chat agent.
{ id: 'codex-image-gen', label: 'codex-image-gen', hint: 'Codex CLI · built-in image_gen · ChatGPT subscription, no API key', provider: 'codex-cli', caps: ['t2i'] },

{ id: 'doubao-seedream-3-0-t2i-250415', label: 'seedream-3.0', hint: 'ByteDance · Doubao image', provider: 'volcengine', caps: ['t2i'] },
{ id: 'doubao-seededit-3-0-i2i-250628', label: 'seededit-3.0', hint: 'ByteDance · image edit', provider: 'volcengine', caps: ['i2i'] },

Expand Down
Loading
Loading