From d9f195ea39fc961d4bd78715ee97257ccb6c5e06 Mon Sep 17 00:00:00 2001
From: Hunter B <hmbown@gmail.com>
Date: Sat, 13 Jun 2026 14:04:42 -0700
Subject: [PATCH] fix(codex): budget oauth route at codex context

Separate model-native context metadata from provider-effective runtime capability so OpenAI API gpt-5.5 stays at its documented 1.05M window while the openai-codex OAuth route budgets preflight, recovery, capacity checks, prompt text, and TUI context indicators against the Codex-family 400K envelope.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  |  4 +
 .../tui/src/commands/groups/config/status.rs  |  4 +-
 crates/tui/src/config.rs                      |  9 +-
 crates/tui/src/core/engine.rs                 | 19 +++-
 crates/tui/src/core/engine/capacity_flow.rs   | 10 +--
 crates/tui/src/core/engine/context.rs         | 21 +++--
 crates/tui/src/core/engine/tests.rs           | 29 +++++-
 crates/tui/src/core/engine/turn_loop.rs       |  4 +-
 crates/tui/src/models.rs                      | 88 ++++++++++++++++---
 crates/tui/src/prompts.rs                     | 52 +++++++++--
 crates/tui/src/tui/context_inspector.rs       |  9 +-
 crates/tui/src/tui/sidebar.rs                 |  3 +-
 crates/tui/src/tui/ui.rs                      | 12 +--
 docs/PROVIDERS.md                             |  5 +-
 14 files changed, 216 insertions(+), 53 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 363f0862..b3a46923 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -222,6 +222,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   CodeWhale aliases now use OpenAI's documented 1,050,000-token context window
   and 128,000 max-output metadata for context pressure, prompts, and doctor
   capability output.
+- **OpenAI Codex effective context budgeting.** The public OpenAI API metadata
+  for `gpt-5.5` remains 1,050,000 tokens, but the `openai-codex` OAuth route now
+  budgets prompts against the 400K Codex-family effective window so preflight
+  compaction runs before the backend returns `context_length_exceeded`.
 - **OpenRouter Nemotron 3 Ultra preset.** The OpenRouter preset and model
   registry now emit `nvidia/nemotron-3-ultra-550b-a55b` while keeping the old
   Ultra aliases compatible.
diff --git a/crates/tui/src/commands/groups/config/status.rs b/crates/tui/src/commands/groups/config/status.rs
index fb1a7e6d..27d1a282 100644
--- a/crates/tui/src/commands/groups/config/status.rs
+++ b/crates/tui/src/commands/groups/config/status.rs
@@ -5,7 +5,7 @@ use std::path::Path;
 
 use super::CommandResult;
 use crate::compaction::estimate_input_tokens_conservative;
-use crate::models::{LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, context_window_for_model};
+use crate::config::provider_capability;
 use crate::tui::app::App;
 use crate::utils::{display_path, estimate_message_chars};
 
@@ -166,7 +166,7 @@ fn footer_items(app: &App) -> String {
 }
 
 fn context_usage(app: &App) -> (usize, u32, f64) {
-    let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
+    let max = provider_capability(app.api_provider, &app.model).context_window;
     let estimated =
         estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
     let total_chars = estimate_message_chars(&app.api_messages);
diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs
index c73bbeb0..e8eb8e72 100644
--- a/crates/tui/src/config.rs
+++ b/crates/tui/src/config.rs
@@ -156,6 +156,7 @@ pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
 pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1";
 pub const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5";
 pub const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api";
+pub const OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS: u32 = 400_000;
 /// Legacy `deepseek-cn` provider alias.
 ///
 /// DeepSeek's official API host is the same worldwide. Keep this alias for
@@ -433,8 +434,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
         return ProviderCapability {
             provider,
             resolved_model: resolved_model.to_string(),
-            context_window: crate::models::context_window_for_model(resolved_model)
-                .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
+            context_window: OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS,
             max_output: crate::models::max_output_tokens_for_model(resolved_model).unwrap_or(4096),
             thinking_supported: true,
             cache_telemetry_supported: false,
@@ -11442,7 +11442,10 @@ model = "deepseek-ai/deepseek-v4-pro"
         let cap = provider_capability(ApiProvider::OpenaiCodex, DEFAULT_OPENAI_CODEX_MODEL);
         assert_eq!(cap.provider, ApiProvider::OpenaiCodex);
         assert_eq!(cap.resolved_model, DEFAULT_OPENAI_CODEX_MODEL);
-        assert_eq!(cap.context_window, 1_050_000);
+        assert_eq!(
+            cap.context_window,
+            OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS
+        );
         assert_eq!(cap.max_output, 128_000);
         assert!(cap.thinking_supported);
         assert!(!cap.cache_telemetry_supported);
diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index d8a3bac3..da030603 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -518,6 +518,7 @@ pub struct Engine {
     subagent_manager: SharedSubAgentManager,
     shell_manager: SharedShellManager,
     mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
+    api_provider: ApiProvider,
     rx_op: mpsc::Receiver<Op>,
     rx_approval: mpsc::Receiver<ApprovalDecision>,
     rx_user_input: mpsc::Receiver<UserInputDecision>,
@@ -688,6 +689,7 @@ impl Engine {
             Ok(client) => (Some(client), None),
             Err(err) => (None, Some(err.to_string())),
         };
+        let api_provider = api_config.api_provider();
         let api_key_env_only_recovery = Self::env_only_api_key_recovery_hint(api_config);
 
         let mut session = Session::new(
@@ -718,6 +720,10 @@ impl Engine {
                     locale_tag: &config.locale_tag,
                     translation_enabled: config.translation_enabled,
                     model_id: &config.model,
+                    context_window_override: Some(
+                        crate::config::provider_capability(api_provider, &config.model)
+                            .context_window,
+                    ),
                     show_thinking: config.show_thinking,
                     verbosity: config.verbosity.as_deref(),
                 },
@@ -821,6 +827,7 @@ impl Engine {
             subagent_manager,
             shell_manager,
             mcp_pool: None,
+            api_provider,
             rx_op,
             rx_approval,
             rx_user_input,
@@ -2159,7 +2166,9 @@ impl Engine {
     }
 
     async fn recover_context_overflow(&mut self, client: &DeepSeekClient, reason: &str) -> bool {
-        let Some(target_budget) = context_input_budget(&self.session.model) else {
+        let Some(target_budget) =
+            context_input_budget_for_provider(self.api_provider, &self.session.model)
+        else {
             return false;
         };
 
@@ -2500,6 +2509,10 @@ impl Engine {
                 locale_tag: &self.config.locale_tag,
                 translation_enabled: self.config.translation_enabled,
                 model_id: &self.config.model,
+                context_window_override: Some(
+                    crate::config::provider_capability(self.api_provider, &self.config.model)
+                        .context_window,
+                ),
                 show_thinking: self.config.show_thinking,
                 verbosity: self.config.verbosity.as_deref(),
             },
@@ -2828,8 +2841,8 @@ mod handle;
 pub(crate) use context::compact_tool_result_for_context;
 use context::{
     COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
-    context_input_budget, effective_max_output_tokens, extract_compaction_summary_prompt,
-    is_context_length_error_message, summarize_text,
+    context_input_budget_for_provider, effective_max_output_tokens,
+    extract_compaction_summary_prompt, is_context_length_error_message, summarize_text,
 };
 mod dispatch;
 mod loop_guard;
diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs
index 3385f1e7..e9f21aae 100644
--- a/crates/tui/src/core/engine/capacity_flow.rs
+++ b/crates/tui/src/core/engine/capacity_flow.rs
@@ -7,7 +7,7 @@
 
 use super::*;
 
-use crate::models::context_window_for_model;
+use crate::config::provider_capability;
 
 impl Engine {
     pub(super) async fn run_capacity_pre_request_checkpoint(
@@ -156,8 +156,7 @@ impl Engine {
         let unique_reference_ids_recent_window =
             self.recent_unique_reference_count(message_window, turn);
         let context_window = usize::try_from(
-            context_window_for_model(&self.session.model)
-                .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
+            provider_capability(self.api_provider, &self.session.model).context_window,
         )
         .unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
         .max(1);
@@ -432,8 +431,9 @@ impl Engine {
         }
 
         if !refreshed {
-            let target_budget = context_input_budget(&self.session.model)
-                .unwrap_or(self.config.compaction.token_threshold.max(1));
+            let target_budget =
+                context_input_budget_for_provider(self.api_provider, &self.session.model)
+                    .unwrap_or(self.config.compaction.token_threshold.max(1));
             if self.estimated_input_tokens() > target_budget {
                 let trimmed = self.trim_oldest_messages_to_budget(target_budget);
                 refreshed = trimmed > 0;
diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs
index 86e97f0d..c46e6162 100644
--- a/crates/tui/src/core/engine/context.rs
+++ b/crates/tui/src/core/engine/context.rs
@@ -5,6 +5,7 @@
 //! engine module from accumulating unrelated context-policy details.
 
 use crate::compaction::estimate_tokens;
+use crate::config::{ApiProvider, provider_capability};
 use crate::error_taxonomy::ErrorCategory;
 use crate::models::{Message, SystemPrompt, context_window_for_model};
 use crate::tools::spec::ToolResult;
@@ -562,9 +563,12 @@ pub(super) fn estimate_input_tokens_conservative(
 /// window does not underflow to a negative budget.
 const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000;
 
-/// Internal input-side token budget for a model: `window - reserved_output -
-/// headroom`. Used by the preflight check, emergency recovery, and capacity
-/// trimming to decide when to compact.
+/// Internal input-side token budget for a provider/model route:
+/// `window - reserved_output - headroom`. Used by the preflight check,
+/// emergency recovery, and capacity trimming to decide when to compact.
+/// Unknown model ids fall back to the provider's conservative default instead
+/// of disabling preflight; custom long-context deployments can still advertise
+/// their window with a `-256k`/`-1024k` model suffix.
 ///
 /// The reserved-output term is window-dependent:
 ///   * `window >= 500K` (V4-class large-context) -> [`TURN_MAX_OUTPUT_TOKENS`]
@@ -575,8 +579,15 @@ const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000;
 ///     `256K - 262K - 1K`, which underflows `checked_sub` to `None` and
 ///     *silently disables every preflight and emergency recovery path* — the
 ///     session then runs until the provider hard-rejects on context length.
-pub(super) fn context_input_budget(model: &str) -> Option<usize> {
-    let window_tokens = context_window_for_model(model)?;
+pub(super) fn context_input_budget_for_provider(
+    provider: ApiProvider,
+    model: &str,
+) -> Option<usize> {
+    let capability = provider_capability(provider, model);
+    context_input_budget_for_window(model, capability.context_window)
+}
+
+fn context_input_budget_for_window(model: &str, window_tokens: u32) -> Option<usize> {
     let window = usize::try_from(window_tokens).ok()?;
     let reserved_output = if window_tokens >= INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD {
         TURN_MAX_OUTPUT_TOKENS
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index b21175ab..e2b005a9 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -1,6 +1,7 @@
 use super::*;
 
 use super::context::TURN_MAX_OUTPUT_TOKENS;
+use crate::config::ApiProvider;
 use crate::models::SystemBlock;
 use crate::test_support::lock_test_env;
 use crate::tools::plan::{PlanItemArg, PlanSnapshot, StepStatus};
@@ -2103,13 +2104,31 @@ fn context_budget_reserves_output_and_headroom() {
     let _lock = lock_test_env();
     // V4 has a 1M context window — the only family that comfortably hosts
     // a 256K output reservation without saturating the input budget to 0.
-    let budget = context_input_budget("deepseek-v4-pro")
+    let budget = context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro")
         .expect("deepseek-v4-pro should have a known context window");
     let v4_window: usize = 1_000_000;
     let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
     assert_eq!(budget, expected);
 }
 
+#[test]
+fn context_budget_uses_conservative_fallback_for_unknown_models() {
+    let _lock = lock_test_env();
+    let budget = context_input_budget_for_provider(ApiProvider::Openai, "auto")
+        .expect("unknown/auto model ids should still get a conservative hard preflight budget");
+    let expected = 128_000usize - effective_max_output_tokens("auto") as usize - 1_024usize;
+    assert_eq!(budget, expected);
+}
+
+#[test]
+fn context_budget_uses_provider_effective_window_for_openai_codex() {
+    let _lock = lock_test_env();
+    let budget = context_input_budget_for_provider(ApiProvider::OpenaiCodex, "gpt-5.5")
+        .expect("OpenAI Codex should use the route-effective context window");
+    let expected = 400_000usize - effective_max_output_tokens("gpt-5.5") as usize - 1_024usize;
+    assert_eq!(budget, expected);
+}
+
 #[test]
 fn effective_max_output_tokens_caps_api_request_for_large_window_models() {
     // Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
@@ -2213,7 +2232,8 @@ fn internal_context_budget_tiers_reserved_output_by_window() {
     // Large-context (>=500K) models reserve the full TURN_MAX_OUTPUT_TOKENS
     // headroom so long V4 sessions don't compact prematurely.
     let internal_budget =
-        context_input_budget("deepseek-v4-pro").expect("V4 should have a known context window");
+        context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro")
+            .expect("V4 should have a known context window");
     let v4_window: usize = 1_000_000;
     let expected_internal = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
     assert_eq!(internal_budget, expected_internal);
@@ -2222,8 +2242,9 @@ fn internal_context_budget_tiers_reserved_output_by_window() {
     // deployment must yield a usable positive budget rather than None. The
     // previous formula reserved the full 262K and computed 256K - 262K - 1K,
     // which underflowed to None and silently disabled preflight/recovery.
-    let small_window_budget = context_input_budget("qwen3-32b-256k")
-        .expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
+    let small_window_budget =
+        context_input_budget_for_provider(ApiProvider::Openai, "qwen3-32b-256k")
+            .expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
     let effective_output = effective_max_output_tokens("qwen3-32b-256k") as usize;
     let expected_small = 256_000 - effective_output - 1_024;
     assert_eq!(small_window_budget, expected_small);
diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs
index fe740f78..848edb56 100644
--- a/crates/tui/src/core/engine/turn_loop.rs
+++ b/crates/tui/src/core/engine/turn_loop.rs
@@ -205,7 +205,9 @@ impl Engine {
                 continue;
             }
 
-            if let Some(input_budget) = context_input_budget(&self.session.model) {
+            if let Some(input_budget) =
+                context_input_budget_for_provider(self.api_provider, &self.session.model)
+            {
                 let estimated_input = self.estimated_input_tokens();
                 if estimated_input > input_budget {
                     if context_recovery_attempts >= MAX_CONTEXT_RECOVERY_ATTEMPTS {
diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs
index fe7e60b8..cabd889a 100644
--- a/crates/tui/src/models.rs
+++ b/crates/tui/src/models.rs
@@ -246,6 +246,12 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
         }
         return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
     }
+    if is_openai_gpt_55_api_model(&lower) {
+        return Some(1_050_000);
+    }
+    if is_openai_codex_model(&lower) {
+        return Some(400_000);
+    }
     if let Some(window) = known_context_window_for_model(&lower) {
         return Some(window);
     }
@@ -259,7 +265,8 @@ fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
     match model_lower {
         // OpenAI API model docs, verified 2026-06-12:
         // https://developers.openai.com/api/docs/models/gpt-5.5
-        "gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" => Some(1_050_000),
+        // Family aliases and snapshots are handled by
+        // `is_openai_gpt_55_api_model` before this table.
         // OpenAI Codex model docs, verified 2026-06-12:
         // https://developers.openai.com/api/docs/models/gpt-5-codex
         // https://developers.openai.com/api/docs/models/gpt-5.3-codex
@@ -320,9 +327,11 @@ pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
     if lower.contains("deepseek") && lower.contains("v4") {
         return Some(384_000);
     }
+    if is_openai_gpt_55_api_model(&lower) || is_openai_codex_model(&lower) {
+        return Some(128_000);
+    }
     match lower.as_str() {
-        "gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" | "gpt-5-codex"
-        | "gpt-5.3-codex" => Some(128_000),
+        "gpt-5-codex" | "gpt-5.3-codex" => Some(128_000),
         "claude-opus-4-8" => Some(128_000),
         "claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000),
         "arcee-ai/trinity-large-thinking"
@@ -369,10 +378,6 @@ pub fn model_supports_reasoning(model: &str) -> bool {
         lower.as_str(),
         "claude-opus-4-8"
             | "claude-sonnet-4-6"
-            | "gpt-5.5"
-            | "gpt-5.5-pro"
-            | "codex-gpt-5.5"
-            | "chatgpt-gpt-5.5"
             | "gpt-5-codex"
             | "gpt-5.3-codex"
             | "arcee-ai/trinity-large-thinking"
@@ -414,9 +419,48 @@ pub fn model_supports_reasoning(model: &str) -> bool {
             | "z-ai/glm-5.2"
             | "glm-5.1"
             | "glm-5.2"
+    ) || is_openai_gpt_55_api_model(&lower)
+        || is_openai_codex_model(&lower)
+}
+
+fn is_openai_gpt_55_api_model(model_lower: &str) -> bool {
+    matches!(model_lower, "gpt-5.5" | "gpt-5.5-pro")
+        || has_date_snapshot_suffix(model_lower, "gpt-5.5-")
+        || has_date_snapshot_suffix(model_lower, "gpt-5.5-pro-")
+}
+
+fn is_openai_codex_model(model_lower: &str) -> bool {
+    matches!(
+        model_lower,
+        "gpt-5-codex"
+            | "gpt-5.1-codex"
+            | "gpt-5.1-codex-mini"
+            | "gpt-5.1-codex-max"
+            | "gpt-5.2-codex"
+            | "gpt-5.3-codex"
+            | "codex-gpt-5.5"
+            | "chatgpt-gpt-5.5"
+            | "gpt-5.5-codex"
+            | "gpt-5.5-codex-preview"
+            | "codex-gpt-5.5-preview"
+            | "chatgpt-gpt-5.5-preview"
     )
 }
 
+fn has_date_snapshot_suffix(model_lower: &str, prefix: &str) -> bool {
+    let Some(rest) = model_lower.strip_prefix(prefix) else {
+        return false;
+    };
+    let bytes = rest.as_bytes();
+    bytes.len() == 10
+        && bytes[4] == b'-'
+        && bytes[7] == b'-'
+        && bytes
+            .iter()
+            .enumerate()
+            .all(|(idx, byte)| idx == 4 || idx == 7 || byte.is_ascii_digit())
+}
+
 /// Parse an explicit `_Nk` context-window hint from a model name (vendor
 /// agnostic). Returns the window in tokens for `N` in `8..=1024`.
 fn explicit_context_window_hint(model_lower: &str) -> Option<u32> {
@@ -632,8 +676,13 @@ mod tests {
     }
 
     #[test]
-    fn openai_codex_models_have_verified_context_metadata() {
-        for model in ["gpt-5.5", "codex-gpt-5.5", "chatgpt-gpt-5.5"] {
+    fn openai_api_and_codex_models_have_verified_context_metadata() {
+        for model in [
+            "gpt-5.5",
+            "gpt-5.5-pro",
+            "gpt-5.5-2026-04-23",
+            "gpt-5.5-pro-2026-04-23",
+        ] {
             assert_eq!(context_window_for_model(model), Some(1_050_000));
             assert_eq!(max_output_tokens_for_model(model), Some(128_000));
             assert!(model_supports_reasoning(model));
@@ -643,11 +692,30 @@ mod tests {
             );
         }
 
-        for model in ["gpt-5-codex", "gpt-5.3-codex"] {
+        for model in [
+            "gpt-5-codex",
+            "gpt-5.1-codex",
+            "gpt-5.1-codex-mini",
+            "gpt-5.1-codex-max",
+            "gpt-5.2-codex",
+            "gpt-5.3-codex",
+            "codex-gpt-5.5",
+            "chatgpt-gpt-5.5",
+            "gpt-5.5-codex",
+            "gpt-5.5-codex-preview",
+        ] {
             assert_eq!(context_window_for_model(model), Some(400_000));
             assert_eq!(max_output_tokens_for_model(model), Some(128_000));
             assert!(model_supports_reasoning(model));
+            assert_eq!(
+                compaction_threshold_for_model_at_percent(model, 80.0),
+                320_000
+            );
         }
+
+        assert_eq!(context_window_for_model("gpt-5.5-nano"), None);
+        assert_eq!(max_output_tokens_for_model("gpt-5.5-nano"), None);
+        assert!(!model_supports_reasoning("gpt-5.5-nano"));
     }
 
     #[test]
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index b3f1cfd1..f7e13ffa 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -34,6 +34,9 @@ pub struct PromptSessionContext<'a> {
     /// preserving backward compatibility with existing call sites
     /// that predate dynamic model injection.
     pub model_id: &'a str,
+    /// Route-effective context window, when known. This can differ from the
+    /// model-family maximum when a provider wrapper exposes a smaller envelope.
+    pub context_window_override: Option<u32>,
     /// Whether the user-visible transcript renders thinking blocks.
     /// When false, the prompt should not spend localization pressure on
     /// `reasoning_content` the user will never see.
@@ -52,6 +55,7 @@ impl Default for PromptSessionContext<'_> {
             locale_tag: "en",
             translation_enabled: false,
             model_id: "codewhale",
+            context_window_override: None,
             show_thinking: true,
             verbosity: None,
         }
@@ -838,12 +842,17 @@ pub(crate) fn render_runtime_policy_reference() -> String {
 /// constant; this function produces a per-session variant so the prompt
 /// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead
 /// of a static placeholder.
-fn apply_model_template(prompt: &str, model_id: &str) -> String {
+fn apply_model_template(
+    prompt: &str,
+    model_id: &str,
+    context_window_override: Option<u32>,
+) -> String {
     let mut prompt = prompt.replace("{model_id}", model_id);
 
     // #3025: Substitute model-specific facts so non-DeepSeek models don't
     // get V4 architecture claims, 1M-window assumptions, or Flash pricing.
-    let ctx_window = crate::models::context_window_for_model(model_id);
+    let ctx_window =
+        context_window_override.or_else(|| crate::models::context_window_for_model(model_id));
     let window_note = if let Some(window) = ctx_window {
         format!(
             "You have a {}-token context window. Do not summarize or delete \
@@ -999,7 +1008,7 @@ fn compose_default_static_layers(_personality: Personality, model_id: &str) -> S
     // Personality is now folded into the YAML constitution (constitution.yaml).
     // No separate overlay is appended — the base prompt already carries voice,
     // tone, and presentation guidance via the preamble and article text.
-    apply_model_template(effective_base_prompt().trim(), model_id)
+    apply_model_template(effective_base_prompt().trim(), model_id, None)
 }
 
 fn apply_static_prompt_composer(
@@ -1069,6 +1078,7 @@ pub fn system_prompt_for_mode_with_context_and_skills(
             locale_tag: "en",
             translation_enabled: false,
             model_id: "codewhale",
+            context_window_override: None,
             show_thinking: true,
             verbosity: None,
         },
@@ -1098,8 +1108,17 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
     instructions: Option<&[InstructionSource]>,
     session_context: PromptSessionContext<'_>,
 ) -> SystemPrompt {
-    let mode_prompt =
-        compose_prompt_with_approval_model_and_shell(Personality::Calm, session_context.model_id);
+    let default_layers = apply_model_template(
+        effective_base_prompt().trim(),
+        session_context.model_id,
+        session_context.context_window_override,
+    );
+    let mode_prompt = apply_static_prompt_composer(
+        effective_static_prompt_composer(),
+        Personality::Calm,
+        session_context.model_id,
+        &default_layers,
+    );
 
     // Load project context from workspace
     let project_context = load_project_context_with_parents(workspace);
@@ -1548,7 +1567,7 @@ mod tests {
     }
 
     #[test]
-    fn compose_prompt_for_openai_codex_uses_verified_context_window() {
+    fn compose_prompt_for_openai_api_gpt_55_uses_verified_context_window() {
         let prompt = compose_prompt_with_approval_model_and_shell(Personality::Calm, "gpt-5.5");
         assert!(!prompt.contains("Your V4 Characteristics"));
         assert!(prompt.contains("1050000-token context window"));
@@ -1577,11 +1596,18 @@ mod tests {
 
     #[test]
     fn apply_model_template_replaces_placeholder() {
-        let result = apply_model_template("You are {model_id}", "deepseek-v4-pro");
+        let result = apply_model_template("You are {model_id}", "deepseek-v4-pro", None);
         assert_eq!(result, "You are deepseek-v4-pro");
         assert!(!result.contains("{model_id}"));
     }
 
+    #[test]
+    fn apply_model_template_uses_context_window_override() {
+        let result = apply_model_template("{context_window_note}", "gpt-5.5", Some(400_000));
+        assert!(result.contains("400000-token context window"));
+        assert!(!result.contains("1050000-token context window"));
+    }
+
     #[test]
     fn compose_prompt_injects_model_id() {
         let prompt =
@@ -1978,6 +2004,7 @@ mod tests {
                 locale_tag: "zh-Hans",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2048,6 +2075,7 @@ mod tests {
                 locale_tag: "zh-Hans",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2091,6 +2119,7 @@ mod tests {
                 locale_tag: "zh-Hans",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: false,
                 verbosity: None,
             },
@@ -2144,6 +2173,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2248,6 +2278,7 @@ mod tests {
                 locale_tag: "ja",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2285,6 +2316,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2314,6 +2346,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2372,6 +2405,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2401,6 +2435,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2608,6 +2643,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -2643,6 +2679,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: None,
             },
@@ -3186,6 +3223,7 @@ mod tests {
                 locale_tag: "en",
                 translation_enabled: false,
                 model_id: "codewhale",
+                context_window_override: None,
                 show_thinking: true,
                 verbosity: Some(" Concise "),
             },
diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs
index 752674df..2ebc3488 100644
--- a/crates/tui/src/tui/context_inspector.rs
+++ b/crates/tui/src/tui/context_inspector.rs
@@ -4,10 +4,9 @@ use std::collections::HashSet;
 use std::fmt::Write;
 
 use crate::compaction::estimate_input_tokens_conservative;
+use crate::config::provider_capability;
 use crate::localization::{Locale, MessageId, tr};
-use crate::models::{
-    LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model,
-};
+use crate::models::SystemPrompt;
 use crate::session_manager::SessionContextReference;
 use crate::tui::app::{App, ToolDetailRecord};
 use crate::tui::file_mention::ContextReferenceSource;
@@ -154,8 +153,8 @@ pub fn build_context_inspector_text(app: &App, locale: Locale) -> String {
 }
 
 fn context_usage(app: &App) -> (usize, u32, f64) {
-    let max = context_window_for_model(app.effective_model_for_budget())
-        .unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
+    let max =
+        provider_capability(app.api_provider, app.effective_model_for_budget()).context_window;
     let estimated =
         estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
     let total_chars = estimate_message_chars(&app.api_messages);
diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs
index d542af97..8455a6db 100644
--- a/crates/tui/src/tui/sidebar.rs
+++ b/crates/tui/src/tui/sidebar.rs
@@ -31,6 +31,7 @@ use super::app::{
 use super::history::{GenericToolCell, HistoryCell, ToolCell, ToolStatus, summarize_tool_output};
 use super::subagent_routing::active_fanout_counts;
 use super::ui_text::{concise_shell_command_label, truncate_line_to_width};
+use crate::config::provider_capability;
 
 /// Tolerance for floating-point cost comparison in the sidebar breakdown.
 /// Must be large enough that accumulated f64 error across hundreds of turns
@@ -2407,7 +2408,7 @@ fn render_context_panel(f: &mut Frame, area: Rect, app: &mut App) {
 
     // ── Token usage ──────────────────────────────────────────────
     let total_tokens = app.session.total_conversation_tokens;
-    let window = crate::models::context_window_for_model(&app.model).unwrap_or(1_048_576);
+    let window = provider_capability(app.api_provider, &app.model).context_window;
     let pct = if window > 0 {
         ((total_tokens as f64 / window as f64) * 100.0).clamp(0.0, 100.0)
     } else {
diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
index 38d2c8ed..b86b439f 100644
--- a/crates/tui/src/tui/ui.rs
+++ b/crates/tui/src/tui/ui.rs
@@ -43,7 +43,7 @@ use crate::commands;
 use crate::compaction::estimate_input_tokens_conservative;
 use crate::config::{
     ApiProvider, Config, DEFAULT_NVIDIA_NIM_BASE_URL, ProviderConfig, ProvidersConfig, StatusItem,
-    UpdateConfig, save_provider_auth_mode_for,
+    UpdateConfig, provider_capability, save_provider_auth_mode_for,
 };
 use crate::config_ui::{self, ConfigUiMode, WebConfigSession, WebConfigSessionEvent};
 use crate::core::engine::{EngineConfig, EngineHandle, spawn_engine};
@@ -52,9 +52,7 @@ use crate::core::ops::{Op, USER_SHELL_TOOL_ID_PREFIX};
 use crate::hooks::{HookEvent, HookExecutor, TurnEndPayloadInput, TurnEndTotals};
 use crate::llm_client::LlmClient;
 use crate::localization::{MessageId, tr};
-use crate::models::{
-    ContentBlock, Message, MessageRequest, SystemPrompt, Usage, context_window_for_model,
-};
+use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage};
 use crate::palette;
 use crate::prompts;
 use crate::session_manager::{
@@ -5659,6 +5657,9 @@ async fn dispatch_user_message(
                 locale_tag: app.ui_locale.tag(),
                 translation_enabled: app.translation_enabled,
                 model_id: &app.model,
+                context_window_override: Some(
+                    provider_capability(app.api_provider, &app.model).context_window,
+                ),
                 show_thinking: app.show_thinking,
                 verbosity: app.verbosity.as_deref(),
             },
@@ -9425,7 +9426,8 @@ fn estimated_context_tokens(app: &App) -> Option<i64> {
 }
 
 pub(crate) fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
-    let max = context_window_for_model(app.effective_model_for_budget())?;
+    let max =
+        provider_capability(app.api_provider, app.effective_model_for_budget()).context_window;
     let max_i64 = i64::from(max);
     let reported = app
         .session
diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md
index 36fb466b..b9a838f6 100644
--- a/docs/PROVIDERS.md
+++ b/docs/PROVIDERS.md
@@ -143,7 +143,7 @@ endpoint.
 | `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
 | `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`, `HF_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible router route. Accepted aliases: `huggingface`, `hugging-face`, `hugging_face`, `hf`. Org-prefixed model IDs pass through. `HUGGINGFACE_MODEL` and `HF_MODEL` are accepted. Hub browsing/export are separate future features. |
 | `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
-| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
+| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. CodeWhale budgets this route with the 400K Codex-family effective context window even when the public API model table lists a larger native `gpt-5.5` window. |
 | `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
 
 ### Hugging Face Provider vs MCP vs Hub
@@ -262,7 +262,8 @@ Anthropic uses Messages, and `openai-codex` uses Responses.
 | OpenRouter Qwen 3.6 Flash / Plus | 1,000,000 | 65,536 | yes | no | not documented in code |
 | OpenRouter Qwen 3.6 35B / 27B | 262,144 | 262,140 | yes | no | not documented in code |
 | OpenRouter Qwen 3.6 Max Preview | 262,144 | 65,536 | yes | no | not documented in code |
-| OpenAI Codex / ChatGPT `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code |
+| OpenAI API `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code |
+| OpenAI Codex / ChatGPT route (`openai-codex`) | 400,000 effective | 128,000 | yes | no | route uses Responses payload at `/codex/responses` |
 | Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code |
 | Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code |
 | Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code |