From d9f195ea39fc961d4bd78715ee97257ccb6c5e06 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Sat, 13 Jun 2026 14:04:42 -0700 Subject: [PATCH] fix(codex): budget oauth route at codex context Separate model-native context metadata from provider-effective runtime capability so OpenAI API gpt-5.5 stays at its documented 1.05M window while the openai-codex OAuth route budgets preflight, recovery, capacity checks, prompt text, and TUI context indicators against the Codex-family 400K envelope. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 4 + .../tui/src/commands/groups/config/status.rs | 4 +- crates/tui/src/config.rs | 9 +- crates/tui/src/core/engine.rs | 19 +++- crates/tui/src/core/engine/capacity_flow.rs | 10 +-- crates/tui/src/core/engine/context.rs | 21 +++-- crates/tui/src/core/engine/tests.rs | 29 +++++- crates/tui/src/core/engine/turn_loop.rs | 4 +- crates/tui/src/models.rs | 88 ++++++++++++++++--- crates/tui/src/prompts.rs | 52 +++++++++-- crates/tui/src/tui/context_inspector.rs | 9 +- crates/tui/src/tui/sidebar.rs | 3 +- crates/tui/src/tui/ui.rs | 12 +-- docs/PROVIDERS.md | 5 +- 14 files changed, 216 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 363f0862..b3a46923 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -222,6 +222,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 CodeWhale aliases now use OpenAI's documented 1,050,000-token context window and 128,000 max-output metadata for context pressure, prompts, and doctor capability output. +- **OpenAI Codex effective context budgeting.** The public OpenAI API metadata + for `gpt-5.5` remains 1,050,000 tokens, but the `openai-codex` OAuth route now + budgets prompts against the 400K Codex-family effective window so preflight + compaction runs before the backend returns `context_length_exceeded`. - **OpenRouter Nemotron 3 Ultra preset.** The OpenRouter preset and model registry now emit `nvidia/nemotron-3-ultra-550b-a55b` while keeping the old Ultra aliases compatible. diff --git a/crates/tui/src/commands/groups/config/status.rs b/crates/tui/src/commands/groups/config/status.rs index fb1a7e6d..27d1a282 100644 --- a/crates/tui/src/commands/groups/config/status.rs +++ b/crates/tui/src/commands/groups/config/status.rs @@ -5,7 +5,7 @@ use std::path::Path; use super::CommandResult; use crate::compaction::estimate_input_tokens_conservative; -use crate::models::{LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, context_window_for_model}; +use crate::config::provider_capability; use crate::tui::app::App; use crate::utils::{display_path, estimate_message_chars}; @@ -166,7 +166,7 @@ fn footer_items(app: &App) -> String { } fn context_usage(app: &App) -> (usize, u32, f64) { - let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS); + let max = provider_capability(app.api_provider, &app.model).context_window; let estimated = estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref()); let total_chars = estimate_message_chars(&app.api_messages); diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index c73bbeb0..e8eb8e72 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -156,6 +156,7 @@ pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1"; pub const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5"; pub const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api"; +pub const OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS: u32 = 400_000; /// Legacy `deepseek-cn` provider alias. /// /// DeepSeek's official API host is the same worldwide. Keep this alias for @@ -433,8 +434,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi return ProviderCapability { provider, resolved_model: resolved_model.to_string(), - context_window: crate::models::context_window_for_model(resolved_model) - .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS), + context_window: OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS, max_output: crate::models::max_output_tokens_for_model(resolved_model).unwrap_or(4096), thinking_supported: true, cache_telemetry_supported: false, @@ -11442,7 +11442,10 @@ model = "deepseek-ai/deepseek-v4-pro" let cap = provider_capability(ApiProvider::OpenaiCodex, DEFAULT_OPENAI_CODEX_MODEL); assert_eq!(cap.provider, ApiProvider::OpenaiCodex); assert_eq!(cap.resolved_model, DEFAULT_OPENAI_CODEX_MODEL); - assert_eq!(cap.context_window, 1_050_000); + assert_eq!( + cap.context_window, + OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS + ); assert_eq!(cap.max_output, 128_000); assert!(cap.thinking_supported); assert!(!cap.cache_telemetry_supported); diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index d8a3bac3..da030603 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -518,6 +518,7 @@ pub struct Engine { subagent_manager: SharedSubAgentManager, shell_manager: SharedShellManager, mcp_pool: Option>>, + api_provider: ApiProvider, rx_op: mpsc::Receiver, rx_approval: mpsc::Receiver, rx_user_input: mpsc::Receiver, @@ -688,6 +689,7 @@ impl Engine { Ok(client) => (Some(client), None), Err(err) => (None, Some(err.to_string())), }; + let api_provider = api_config.api_provider(); let api_key_env_only_recovery = Self::env_only_api_key_recovery_hint(api_config); let mut session = Session::new( @@ -718,6 +720,10 @@ impl Engine { locale_tag: &config.locale_tag, translation_enabled: config.translation_enabled, model_id: &config.model, + context_window_override: Some( + crate::config::provider_capability(api_provider, &config.model) + .context_window, + ), show_thinking: config.show_thinking, verbosity: config.verbosity.as_deref(), }, @@ -821,6 +827,7 @@ impl Engine { subagent_manager, shell_manager, mcp_pool: None, + api_provider, rx_op, rx_approval, rx_user_input, @@ -2159,7 +2166,9 @@ impl Engine { } async fn recover_context_overflow(&mut self, client: &DeepSeekClient, reason: &str) -> bool { - let Some(target_budget) = context_input_budget(&self.session.model) else { + let Some(target_budget) = + context_input_budget_for_provider(self.api_provider, &self.session.model) + else { return false; }; @@ -2500,6 +2509,10 @@ impl Engine { locale_tag: &self.config.locale_tag, translation_enabled: self.config.translation_enabled, model_id: &self.config.model, + context_window_override: Some( + crate::config::provider_capability(self.api_provider, &self.config.model) + .context_window, + ), show_thinking: self.config.show_thinking, verbosity: self.config.verbosity.as_deref(), }, @@ -2828,8 +2841,8 @@ mod handle; pub(crate) use context::compact_tool_result_for_context; use context::{ COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP, - context_input_budget, effective_max_output_tokens, extract_compaction_summary_prompt, - is_context_length_error_message, summarize_text, + context_input_budget_for_provider, effective_max_output_tokens, + extract_compaction_summary_prompt, is_context_length_error_message, summarize_text, }; mod dispatch; mod loop_guard; diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index 3385f1e7..e9f21aae 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -7,7 +7,7 @@ use super::*; -use crate::models::context_window_for_model; +use crate::config::provider_capability; impl Engine { pub(super) async fn run_capacity_pre_request_checkpoint( @@ -156,8 +156,7 @@ impl Engine { let unique_reference_ids_recent_window = self.recent_unique_reference_count(message_window, turn); let context_window = usize::try_from( - context_window_for_model(&self.session.model) - .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS), + provider_capability(self.api_provider, &self.session.model).context_window, ) .unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000)) .max(1); @@ -432,8 +431,9 @@ impl Engine { } if !refreshed { - let target_budget = context_input_budget(&self.session.model) - .unwrap_or(self.config.compaction.token_threshold.max(1)); + let target_budget = + context_input_budget_for_provider(self.api_provider, &self.session.model) + .unwrap_or(self.config.compaction.token_threshold.max(1)); if self.estimated_input_tokens() > target_budget { let trimmed = self.trim_oldest_messages_to_budget(target_budget); refreshed = trimmed > 0; diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs index 86e97f0d..c46e6162 100644 --- a/crates/tui/src/core/engine/context.rs +++ b/crates/tui/src/core/engine/context.rs @@ -5,6 +5,7 @@ //! engine module from accumulating unrelated context-policy details. use crate::compaction::estimate_tokens; +use crate::config::{ApiProvider, provider_capability}; use crate::error_taxonomy::ErrorCategory; use crate::models::{Message, SystemPrompt, context_window_for_model}; use crate::tools::spec::ToolResult; @@ -562,9 +563,12 @@ pub(super) fn estimate_input_tokens_conservative( /// window does not underflow to a negative budget. const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000; -/// Internal input-side token budget for a model: `window - reserved_output - -/// headroom`. Used by the preflight check, emergency recovery, and capacity -/// trimming to decide when to compact. +/// Internal input-side token budget for a provider/model route: +/// `window - reserved_output - headroom`. Used by the preflight check, +/// emergency recovery, and capacity trimming to decide when to compact. +/// Unknown model ids fall back to the provider's conservative default instead +/// of disabling preflight; custom long-context deployments can still advertise +/// their window with a `-256k`/`-1024k` model suffix. /// /// The reserved-output term is window-dependent: /// * `window >= 500K` (V4-class large-context) -> [`TURN_MAX_OUTPUT_TOKENS`] @@ -575,8 +579,15 @@ const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000; /// `256K - 262K - 1K`, which underflows `checked_sub` to `None` and /// *silently disables every preflight and emergency recovery path* — the /// session then runs until the provider hard-rejects on context length. -pub(super) fn context_input_budget(model: &str) -> Option { - let window_tokens = context_window_for_model(model)?; +pub(super) fn context_input_budget_for_provider( + provider: ApiProvider, + model: &str, +) -> Option { + let capability = provider_capability(provider, model); + context_input_budget_for_window(model, capability.context_window) +} + +fn context_input_budget_for_window(model: &str, window_tokens: u32) -> Option { let window = usize::try_from(window_tokens).ok()?; let reserved_output = if window_tokens >= INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD { TURN_MAX_OUTPUT_TOKENS diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index b21175ab..e2b005a9 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1,6 +1,7 @@ use super::*; use super::context::TURN_MAX_OUTPUT_TOKENS; +use crate::config::ApiProvider; use crate::models::SystemBlock; use crate::test_support::lock_test_env; use crate::tools::plan::{PlanItemArg, PlanSnapshot, StepStatus}; @@ -2103,13 +2104,31 @@ fn context_budget_reserves_output_and_headroom() { let _lock = lock_test_env(); // V4 has a 1M context window — the only family that comfortably hosts // a 256K output reservation without saturating the input budget to 0. - let budget = context_input_budget("deepseek-v4-pro") + let budget = context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro") .expect("deepseek-v4-pro should have a known context window"); let v4_window: usize = 1_000_000; let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize; assert_eq!(budget, expected); } +#[test] +fn context_budget_uses_conservative_fallback_for_unknown_models() { + let _lock = lock_test_env(); + let budget = context_input_budget_for_provider(ApiProvider::Openai, "auto") + .expect("unknown/auto model ids should still get a conservative hard preflight budget"); + let expected = 128_000usize - effective_max_output_tokens("auto") as usize - 1_024usize; + assert_eq!(budget, expected); +} + +#[test] +fn context_budget_uses_provider_effective_window_for_openai_codex() { + let _lock = lock_test_env(); + let budget = context_input_budget_for_provider(ApiProvider::OpenaiCodex, "gpt-5.5") + .expect("OpenAI Codex should use the route-effective context window"); + let expected = 400_000usize - effective_max_output_tokens("gpt-5.5") as usize - 1_024usize; + assert_eq!(budget, expected); +} + #[test] fn effective_max_output_tokens_caps_api_request_for_large_window_models() { // Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so @@ -2213,7 +2232,8 @@ fn internal_context_budget_tiers_reserved_output_by_window() { // Large-context (>=500K) models reserve the full TURN_MAX_OUTPUT_TOKENS // headroom so long V4 sessions don't compact prematurely. let internal_budget = - context_input_budget("deepseek-v4-pro").expect("V4 should have a known context window"); + context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro") + .expect("V4 should have a known context window"); let v4_window: usize = 1_000_000; let expected_internal = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize; assert_eq!(internal_budget, expected_internal); @@ -2222,8 +2242,9 @@ fn internal_context_budget_tiers_reserved_output_by_window() { // deployment must yield a usable positive budget rather than None. The // previous formula reserved the full 262K and computed 256K - 262K - 1K, // which underflowed to None and silently disabled preflight/recovery. - let small_window_budget = context_input_budget("qwen3-32b-256k") - .expect("a 256K-suffix model must yield Some budget via the effective-cap branch"); + let small_window_budget = + context_input_budget_for_provider(ApiProvider::Openai, "qwen3-32b-256k") + .expect("a 256K-suffix model must yield Some budget via the effective-cap branch"); let effective_output = effective_max_output_tokens("qwen3-32b-256k") as usize; let expected_small = 256_000 - effective_output - 1_024; assert_eq!(small_window_budget, expected_small); diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index fe740f78..848edb56 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -205,7 +205,9 @@ impl Engine { continue; } - if let Some(input_budget) = context_input_budget(&self.session.model) { + if let Some(input_budget) = + context_input_budget_for_provider(self.api_provider, &self.session.model) + { let estimated_input = self.estimated_input_tokens(); if estimated_input > input_budget { if context_recovery_attempts >= MAX_CONTEXT_RECOVERY_ATTEMPTS { diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index fe7e60b8..cabd889a 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -246,6 +246,12 @@ pub fn context_window_for_model(model: &str) -> Option { } return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS); } + if is_openai_gpt_55_api_model(&lower) { + return Some(1_050_000); + } + if is_openai_codex_model(&lower) { + return Some(400_000); + } if let Some(window) = known_context_window_for_model(&lower) { return Some(window); } @@ -259,7 +265,8 @@ fn known_context_window_for_model(model_lower: &str) -> Option { match model_lower { // OpenAI API model docs, verified 2026-06-12: // https://developers.openai.com/api/docs/models/gpt-5.5 - "gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" => Some(1_050_000), + // Family aliases and snapshots are handled by + // `is_openai_gpt_55_api_model` before this table. // OpenAI Codex model docs, verified 2026-06-12: // https://developers.openai.com/api/docs/models/gpt-5-codex // https://developers.openai.com/api/docs/models/gpt-5.3-codex @@ -320,9 +327,11 @@ pub fn max_output_tokens_for_model(model: &str) -> Option { if lower.contains("deepseek") && lower.contains("v4") { return Some(384_000); } + if is_openai_gpt_55_api_model(&lower) || is_openai_codex_model(&lower) { + return Some(128_000); + } match lower.as_str() { - "gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" | "gpt-5-codex" - | "gpt-5.3-codex" => Some(128_000), + "gpt-5-codex" | "gpt-5.3-codex" => Some(128_000), "claude-opus-4-8" => Some(128_000), "claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000), "arcee-ai/trinity-large-thinking" @@ -369,10 +378,6 @@ pub fn model_supports_reasoning(model: &str) -> bool { lower.as_str(), "claude-opus-4-8" | "claude-sonnet-4-6" - | "gpt-5.5" - | "gpt-5.5-pro" - | "codex-gpt-5.5" - | "chatgpt-gpt-5.5" | "gpt-5-codex" | "gpt-5.3-codex" | "arcee-ai/trinity-large-thinking" @@ -414,9 +419,48 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "z-ai/glm-5.2" | "glm-5.1" | "glm-5.2" + ) || is_openai_gpt_55_api_model(&lower) + || is_openai_codex_model(&lower) +} + +fn is_openai_gpt_55_api_model(model_lower: &str) -> bool { + matches!(model_lower, "gpt-5.5" | "gpt-5.5-pro") + || has_date_snapshot_suffix(model_lower, "gpt-5.5-") + || has_date_snapshot_suffix(model_lower, "gpt-5.5-pro-") +} + +fn is_openai_codex_model(model_lower: &str) -> bool { + matches!( + model_lower, + "gpt-5-codex" + | "gpt-5.1-codex" + | "gpt-5.1-codex-mini" + | "gpt-5.1-codex-max" + | "gpt-5.2-codex" + | "gpt-5.3-codex" + | "codex-gpt-5.5" + | "chatgpt-gpt-5.5" + | "gpt-5.5-codex" + | "gpt-5.5-codex-preview" + | "codex-gpt-5.5-preview" + | "chatgpt-gpt-5.5-preview" ) } +fn has_date_snapshot_suffix(model_lower: &str, prefix: &str) -> bool { + let Some(rest) = model_lower.strip_prefix(prefix) else { + return false; + }; + let bytes = rest.as_bytes(); + bytes.len() == 10 + && bytes[4] == b'-' + && bytes[7] == b'-' + && bytes + .iter() + .enumerate() + .all(|(idx, byte)| idx == 4 || idx == 7 || byte.is_ascii_digit()) +} + /// Parse an explicit `_Nk` context-window hint from a model name (vendor /// agnostic). Returns the window in tokens for `N` in `8..=1024`. fn explicit_context_window_hint(model_lower: &str) -> Option { @@ -632,8 +676,13 @@ mod tests { } #[test] - fn openai_codex_models_have_verified_context_metadata() { - for model in ["gpt-5.5", "codex-gpt-5.5", "chatgpt-gpt-5.5"] { + fn openai_api_and_codex_models_have_verified_context_metadata() { + for model in [ + "gpt-5.5", + "gpt-5.5-pro", + "gpt-5.5-2026-04-23", + "gpt-5.5-pro-2026-04-23", + ] { assert_eq!(context_window_for_model(model), Some(1_050_000)); assert_eq!(max_output_tokens_for_model(model), Some(128_000)); assert!(model_supports_reasoning(model)); @@ -643,11 +692,30 @@ mod tests { ); } - for model in ["gpt-5-codex", "gpt-5.3-codex"] { + for model in [ + "gpt-5-codex", + "gpt-5.1-codex", + "gpt-5.1-codex-mini", + "gpt-5.1-codex-max", + "gpt-5.2-codex", + "gpt-5.3-codex", + "codex-gpt-5.5", + "chatgpt-gpt-5.5", + "gpt-5.5-codex", + "gpt-5.5-codex-preview", + ] { assert_eq!(context_window_for_model(model), Some(400_000)); assert_eq!(max_output_tokens_for_model(model), Some(128_000)); assert!(model_supports_reasoning(model)); + assert_eq!( + compaction_threshold_for_model_at_percent(model, 80.0), + 320_000 + ); } + + assert_eq!(context_window_for_model("gpt-5.5-nano"), None); + assert_eq!(max_output_tokens_for_model("gpt-5.5-nano"), None); + assert!(!model_supports_reasoning("gpt-5.5-nano")); } #[test] diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index b3f1cfd1..f7e13ffa 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -34,6 +34,9 @@ pub struct PromptSessionContext<'a> { /// preserving backward compatibility with existing call sites /// that predate dynamic model injection. pub model_id: &'a str, + /// Route-effective context window, when known. This can differ from the + /// model-family maximum when a provider wrapper exposes a smaller envelope. + pub context_window_override: Option, /// Whether the user-visible transcript renders thinking blocks. /// When false, the prompt should not spend localization pressure on /// `reasoning_content` the user will never see. @@ -52,6 +55,7 @@ impl Default for PromptSessionContext<'_> { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, } @@ -838,12 +842,17 @@ pub(crate) fn render_runtime_policy_reference() -> String { /// constant; this function produces a per-session variant so the prompt /// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead /// of a static placeholder. -fn apply_model_template(prompt: &str, model_id: &str) -> String { +fn apply_model_template( + prompt: &str, + model_id: &str, + context_window_override: Option, +) -> String { let mut prompt = prompt.replace("{model_id}", model_id); // #3025: Substitute model-specific facts so non-DeepSeek models don't // get V4 architecture claims, 1M-window assumptions, or Flash pricing. - let ctx_window = crate::models::context_window_for_model(model_id); + let ctx_window = + context_window_override.or_else(|| crate::models::context_window_for_model(model_id)); let window_note = if let Some(window) = ctx_window { format!( "You have a {}-token context window. Do not summarize or delete \ @@ -999,7 +1008,7 @@ fn compose_default_static_layers(_personality: Personality, model_id: &str) -> S // Personality is now folded into the YAML constitution (constitution.yaml). // No separate overlay is appended — the base prompt already carries voice, // tone, and presentation guidance via the preamble and article text. - apply_model_template(effective_base_prompt().trim(), model_id) + apply_model_template(effective_base_prompt().trim(), model_id, None) } fn apply_static_prompt_composer( @@ -1069,6 +1078,7 @@ pub fn system_prompt_for_mode_with_context_and_skills( locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -1098,8 +1108,17 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( instructions: Option<&[InstructionSource]>, session_context: PromptSessionContext<'_>, ) -> SystemPrompt { - let mode_prompt = - compose_prompt_with_approval_model_and_shell(Personality::Calm, session_context.model_id); + let default_layers = apply_model_template( + effective_base_prompt().trim(), + session_context.model_id, + session_context.context_window_override, + ); + let mode_prompt = apply_static_prompt_composer( + effective_static_prompt_composer(), + Personality::Calm, + session_context.model_id, + &default_layers, + ); // Load project context from workspace let project_context = load_project_context_with_parents(workspace); @@ -1548,7 +1567,7 @@ mod tests { } #[test] - fn compose_prompt_for_openai_codex_uses_verified_context_window() { + fn compose_prompt_for_openai_api_gpt_55_uses_verified_context_window() { let prompt = compose_prompt_with_approval_model_and_shell(Personality::Calm, "gpt-5.5"); assert!(!prompt.contains("Your V4 Characteristics")); assert!(prompt.contains("1050000-token context window")); @@ -1577,11 +1596,18 @@ mod tests { #[test] fn apply_model_template_replaces_placeholder() { - let result = apply_model_template("You are {model_id}", "deepseek-v4-pro"); + let result = apply_model_template("You are {model_id}", "deepseek-v4-pro", None); assert_eq!(result, "You are deepseek-v4-pro"); assert!(!result.contains("{model_id}")); } + #[test] + fn apply_model_template_uses_context_window_override() { + let result = apply_model_template("{context_window_note}", "gpt-5.5", Some(400_000)); + assert!(result.contains("400000-token context window")); + assert!(!result.contains("1050000-token context window")); + } + #[test] fn compose_prompt_injects_model_id() { let prompt = @@ -1978,6 +2004,7 @@ mod tests { locale_tag: "zh-Hans", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2048,6 +2075,7 @@ mod tests { locale_tag: "zh-Hans", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2091,6 +2119,7 @@ mod tests { locale_tag: "zh-Hans", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: false, verbosity: None, }, @@ -2144,6 +2173,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2248,6 +2278,7 @@ mod tests { locale_tag: "ja", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2285,6 +2316,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2314,6 +2346,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2372,6 +2405,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2401,6 +2435,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2608,6 +2643,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -2643,6 +2679,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: None, }, @@ -3186,6 +3223,7 @@ mod tests { locale_tag: "en", translation_enabled: false, model_id: "codewhale", + context_window_override: None, show_thinking: true, verbosity: Some(" Concise "), }, diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs index 752674df..2ebc3488 100644 --- a/crates/tui/src/tui/context_inspector.rs +++ b/crates/tui/src/tui/context_inspector.rs @@ -4,10 +4,9 @@ use std::collections::HashSet; use std::fmt::Write; use crate::compaction::estimate_input_tokens_conservative; +use crate::config::provider_capability; use crate::localization::{Locale, MessageId, tr}; -use crate::models::{ - LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model, -}; +use crate::models::SystemPrompt; use crate::session_manager::SessionContextReference; use crate::tui::app::{App, ToolDetailRecord}; use crate::tui::file_mention::ContextReferenceSource; @@ -154,8 +153,8 @@ pub fn build_context_inspector_text(app: &App, locale: Locale) -> String { } fn context_usage(app: &App) -> (usize, u32, f64) { - let max = context_window_for_model(app.effective_model_for_budget()) - .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS); + let max = + provider_capability(app.api_provider, app.effective_model_for_budget()).context_window; let estimated = estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref()); let total_chars = estimate_message_chars(&app.api_messages); diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index d542af97..8455a6db 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -31,6 +31,7 @@ use super::app::{ use super::history::{GenericToolCell, HistoryCell, ToolCell, ToolStatus, summarize_tool_output}; use super::subagent_routing::active_fanout_counts; use super::ui_text::{concise_shell_command_label, truncate_line_to_width}; +use crate::config::provider_capability; /// Tolerance for floating-point cost comparison in the sidebar breakdown. /// Must be large enough that accumulated f64 error across hundreds of turns @@ -2407,7 +2408,7 @@ fn render_context_panel(f: &mut Frame, area: Rect, app: &mut App) { // ── Token usage ────────────────────────────────────────────── let total_tokens = app.session.total_conversation_tokens; - let window = crate::models::context_window_for_model(&app.model).unwrap_or(1_048_576); + let window = provider_capability(app.api_provider, &app.model).context_window; let pct = if window > 0 { ((total_tokens as f64 / window as f64) * 100.0).clamp(0.0, 100.0) } else { diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 38d2c8ed..b86b439f 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -43,7 +43,7 @@ use crate::commands; use crate::compaction::estimate_input_tokens_conservative; use crate::config::{ ApiProvider, Config, DEFAULT_NVIDIA_NIM_BASE_URL, ProviderConfig, ProvidersConfig, StatusItem, - UpdateConfig, save_provider_auth_mode_for, + UpdateConfig, provider_capability, save_provider_auth_mode_for, }; use crate::config_ui::{self, ConfigUiMode, WebConfigSession, WebConfigSessionEvent}; use crate::core::engine::{EngineConfig, EngineHandle, spawn_engine}; @@ -52,9 +52,7 @@ use crate::core::ops::{Op, USER_SHELL_TOOL_ID_PREFIX}; use crate::hooks::{HookEvent, HookExecutor, TurnEndPayloadInput, TurnEndTotals}; use crate::llm_client::LlmClient; use crate::localization::{MessageId, tr}; -use crate::models::{ - ContentBlock, Message, MessageRequest, SystemPrompt, Usage, context_window_for_model, -}; +use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage}; use crate::palette; use crate::prompts; use crate::session_manager::{ @@ -5659,6 +5657,9 @@ async fn dispatch_user_message( locale_tag: app.ui_locale.tag(), translation_enabled: app.translation_enabled, model_id: &app.model, + context_window_override: Some( + provider_capability(app.api_provider, &app.model).context_window, + ), show_thinking: app.show_thinking, verbosity: app.verbosity.as_deref(), }, @@ -9425,7 +9426,8 @@ fn estimated_context_tokens(app: &App) -> Option { } pub(crate) fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> { - let max = context_window_for_model(app.effective_model_for_budget())?; + let max = + provider_capability(app.api_provider, app.effective_model_for_budget()).context_window; let max_i64 = i64::from(max); let reported = app .session diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index 36fb466b..b9a838f6 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -143,7 +143,7 @@ endpoint. | `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. | | `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`, `HF_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible router route. Accepted aliases: `huggingface`, `hugging-face`, `hugging_face`, `hf`. Org-prefixed model IDs pass through. `HUGGINGFACE_MODEL` and `HF_MODEL` are accepted. Hub browsing/export are separate future features. | | `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. | -| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. | +| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. CodeWhale budgets this route with the 400K Codex-family effective context window even when the public API model table lists a larger native `gpt-5.5` window. | | `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. | ### Hugging Face Provider vs MCP vs Hub @@ -262,7 +262,8 @@ Anthropic uses Messages, and `openai-codex` uses Responses. | OpenRouter Qwen 3.6 Flash / Plus | 1,000,000 | 65,536 | yes | no | not documented in code | | OpenRouter Qwen 3.6 35B / 27B | 262,144 | 262,140 | yes | no | not documented in code | | OpenRouter Qwen 3.6 Max Preview | 262,144 | 65,536 | yes | no | not documented in code | -| OpenAI Codex / ChatGPT `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code | +| OpenAI API `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code | +| OpenAI Codex / ChatGPT route (`openai-codex`) | 400,000 effective | 128,000 | yes | no | route uses Responses payload at `/codex/responses` | | Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code | | Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code | | Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code |