fix(codex): budget oauth route at codex context
Separate model-native context metadata from provider-effective runtime capability so OpenAI API gpt-5.5 stays at its documented 1.05M window while the openai-codex OAuth route budgets preflight, recovery, capacity checks, prompt text, and TUI context indicators against the Codex-family 400K envelope. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -222,6 +222,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
CodeWhale aliases now use OpenAI's documented 1,050,000-token context window
|
||||
and 128,000 max-output metadata for context pressure, prompts, and doctor
|
||||
capability output.
|
||||
- **OpenAI Codex effective context budgeting.** The public OpenAI API metadata
|
||||
for `gpt-5.5` remains 1,050,000 tokens, but the `openai-codex` OAuth route now
|
||||
budgets prompts against the 400K Codex-family effective window so preflight
|
||||
compaction runs before the backend returns `context_length_exceeded`.
|
||||
- **OpenRouter Nemotron 3 Ultra preset.** The OpenRouter preset and model
|
||||
registry now emit `nvidia/nemotron-3-ultra-550b-a55b` while keeping the old
|
||||
Ultra aliases compatible.
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::path::Path;
|
||||
|
||||
use super::CommandResult;
|
||||
use crate::compaction::estimate_input_tokens_conservative;
|
||||
use crate::models::{LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, context_window_for_model};
|
||||
use crate::config::provider_capability;
|
||||
use crate::tui::app::App;
|
||||
use crate::utils::{display_path, estimate_message_chars};
|
||||
|
||||
@@ -166,7 +166,7 @@ fn footer_items(app: &App) -> String {
|
||||
}
|
||||
|
||||
fn context_usage(app: &App) -> (usize, u32, f64) {
|
||||
let max = context_window_for_model(&app.model).unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
|
||||
let max = provider_capability(app.api_provider, &app.model).context_window;
|
||||
let estimated =
|
||||
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
|
||||
let total_chars = estimate_message_chars(&app.api_messages);
|
||||
|
||||
@@ -156,6 +156,7 @@ pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
|
||||
pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1";
|
||||
pub const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5";
|
||||
pub const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api";
|
||||
pub const OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS: u32 = 400_000;
|
||||
/// Legacy `deepseek-cn` provider alias.
|
||||
///
|
||||
/// DeepSeek's official API host is the same worldwide. Keep this alias for
|
||||
@@ -433,8 +434,7 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
resolved_model: resolved_model.to_string(),
|
||||
context_window: crate::models::context_window_for_model(resolved_model)
|
||||
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
|
||||
context_window: OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS,
|
||||
max_output: crate::models::max_output_tokens_for_model(resolved_model).unwrap_or(4096),
|
||||
thinking_supported: true,
|
||||
cache_telemetry_supported: false,
|
||||
@@ -11442,7 +11442,10 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
let cap = provider_capability(ApiProvider::OpenaiCodex, DEFAULT_OPENAI_CODEX_MODEL);
|
||||
assert_eq!(cap.provider, ApiProvider::OpenaiCodex);
|
||||
assert_eq!(cap.resolved_model, DEFAULT_OPENAI_CODEX_MODEL);
|
||||
assert_eq!(cap.context_window, 1_050_000);
|
||||
assert_eq!(
|
||||
cap.context_window,
|
||||
OPENAI_CODEX_EFFECTIVE_CONTEXT_WINDOW_TOKENS
|
||||
);
|
||||
assert_eq!(cap.max_output, 128_000);
|
||||
assert!(cap.thinking_supported);
|
||||
assert!(!cap.cache_telemetry_supported);
|
||||
|
||||
@@ -518,6 +518,7 @@ pub struct Engine {
|
||||
subagent_manager: SharedSubAgentManager,
|
||||
shell_manager: SharedShellManager,
|
||||
mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
|
||||
api_provider: ApiProvider,
|
||||
rx_op: mpsc::Receiver<Op>,
|
||||
rx_approval: mpsc::Receiver<ApprovalDecision>,
|
||||
rx_user_input: mpsc::Receiver<UserInputDecision>,
|
||||
@@ -688,6 +689,7 @@ impl Engine {
|
||||
Ok(client) => (Some(client), None),
|
||||
Err(err) => (None, Some(err.to_string())),
|
||||
};
|
||||
let api_provider = api_config.api_provider();
|
||||
let api_key_env_only_recovery = Self::env_only_api_key_recovery_hint(api_config);
|
||||
|
||||
let mut session = Session::new(
|
||||
@@ -718,6 +720,10 @@ impl Engine {
|
||||
locale_tag: &config.locale_tag,
|
||||
translation_enabled: config.translation_enabled,
|
||||
model_id: &config.model,
|
||||
context_window_override: Some(
|
||||
crate::config::provider_capability(api_provider, &config.model)
|
||||
.context_window,
|
||||
),
|
||||
show_thinking: config.show_thinking,
|
||||
verbosity: config.verbosity.as_deref(),
|
||||
},
|
||||
@@ -821,6 +827,7 @@ impl Engine {
|
||||
subagent_manager,
|
||||
shell_manager,
|
||||
mcp_pool: None,
|
||||
api_provider,
|
||||
rx_op,
|
||||
rx_approval,
|
||||
rx_user_input,
|
||||
@@ -2159,7 +2166,9 @@ impl Engine {
|
||||
}
|
||||
|
||||
async fn recover_context_overflow(&mut self, client: &DeepSeekClient, reason: &str) -> bool {
|
||||
let Some(target_budget) = context_input_budget(&self.session.model) else {
|
||||
let Some(target_budget) =
|
||||
context_input_budget_for_provider(self.api_provider, &self.session.model)
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
|
||||
@@ -2500,6 +2509,10 @@ impl Engine {
|
||||
locale_tag: &self.config.locale_tag,
|
||||
translation_enabled: self.config.translation_enabled,
|
||||
model_id: &self.config.model,
|
||||
context_window_override: Some(
|
||||
crate::config::provider_capability(self.api_provider, &self.config.model)
|
||||
.context_window,
|
||||
),
|
||||
show_thinking: self.config.show_thinking,
|
||||
verbosity: self.config.verbosity.as_deref(),
|
||||
},
|
||||
@@ -2828,8 +2841,8 @@ mod handle;
|
||||
pub(crate) use context::compact_tool_result_for_context;
|
||||
use context::{
|
||||
COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
|
||||
context_input_budget, effective_max_output_tokens, extract_compaction_summary_prompt,
|
||||
is_context_length_error_message, summarize_text,
|
||||
context_input_budget_for_provider, effective_max_output_tokens,
|
||||
extract_compaction_summary_prompt, is_context_length_error_message, summarize_text,
|
||||
};
|
||||
mod dispatch;
|
||||
mod loop_guard;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
use super::*;
|
||||
|
||||
use crate::models::context_window_for_model;
|
||||
use crate::config::provider_capability;
|
||||
|
||||
impl Engine {
|
||||
pub(super) async fn run_capacity_pre_request_checkpoint(
|
||||
@@ -156,8 +156,7 @@ impl Engine {
|
||||
let unique_reference_ids_recent_window =
|
||||
self.recent_unique_reference_count(message_window, turn);
|
||||
let context_window = usize::try_from(
|
||||
context_window_for_model(&self.session.model)
|
||||
.unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS),
|
||||
provider_capability(self.api_provider, &self.session.model).context_window,
|
||||
)
|
||||
.unwrap_or(usize::try_from(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS).unwrap_or(128_000))
|
||||
.max(1);
|
||||
@@ -432,8 +431,9 @@ impl Engine {
|
||||
}
|
||||
|
||||
if !refreshed {
|
||||
let target_budget = context_input_budget(&self.session.model)
|
||||
.unwrap_or(self.config.compaction.token_threshold.max(1));
|
||||
let target_budget =
|
||||
context_input_budget_for_provider(self.api_provider, &self.session.model)
|
||||
.unwrap_or(self.config.compaction.token_threshold.max(1));
|
||||
if self.estimated_input_tokens() > target_budget {
|
||||
let trimmed = self.trim_oldest_messages_to_budget(target_budget);
|
||||
refreshed = trimmed > 0;
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! engine module from accumulating unrelated context-policy details.
|
||||
|
||||
use crate::compaction::estimate_tokens;
|
||||
use crate::config::{ApiProvider, provider_capability};
|
||||
use crate::error_taxonomy::ErrorCategory;
|
||||
use crate::models::{Message, SystemPrompt, context_window_for_model};
|
||||
use crate::tools::spec::ToolResult;
|
||||
@@ -562,9 +563,12 @@ pub(super) fn estimate_input_tokens_conservative(
|
||||
/// window does not underflow to a negative budget.
|
||||
const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000;
|
||||
|
||||
/// Internal input-side token budget for a model: `window - reserved_output -
|
||||
/// headroom`. Used by the preflight check, emergency recovery, and capacity
|
||||
/// trimming to decide when to compact.
|
||||
/// Internal input-side token budget for a provider/model route:
|
||||
/// `window - reserved_output - headroom`. Used by the preflight check,
|
||||
/// emergency recovery, and capacity trimming to decide when to compact.
|
||||
/// Unknown model ids fall back to the provider's conservative default instead
|
||||
/// of disabling preflight; custom long-context deployments can still advertise
|
||||
/// their window with a `-256k`/`-1024k` model suffix.
|
||||
///
|
||||
/// The reserved-output term is window-dependent:
|
||||
/// * `window >= 500K` (V4-class large-context) -> [`TURN_MAX_OUTPUT_TOKENS`]
|
||||
@@ -575,8 +579,15 @@ const INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD: u32 = 500_000;
|
||||
/// `256K - 262K - 1K`, which underflows `checked_sub` to `None` and
|
||||
/// *silently disables every preflight and emergency recovery path* — the
|
||||
/// session then runs until the provider hard-rejects on context length.
|
||||
pub(super) fn context_input_budget(model: &str) -> Option<usize> {
|
||||
let window_tokens = context_window_for_model(model)?;
|
||||
pub(super) fn context_input_budget_for_provider(
|
||||
provider: ApiProvider,
|
||||
model: &str,
|
||||
) -> Option<usize> {
|
||||
let capability = provider_capability(provider, model);
|
||||
context_input_budget_for_window(model, capability.context_window)
|
||||
}
|
||||
|
||||
fn context_input_budget_for_window(model: &str, window_tokens: u32) -> Option<usize> {
|
||||
let window = usize::try_from(window_tokens).ok()?;
|
||||
let reserved_output = if window_tokens >= INTERNAL_BUDGET_LARGE_WINDOW_THRESHOLD {
|
||||
TURN_MAX_OUTPUT_TOKENS
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::*;
|
||||
|
||||
use super::context::TURN_MAX_OUTPUT_TOKENS;
|
||||
use crate::config::ApiProvider;
|
||||
use crate::models::SystemBlock;
|
||||
use crate::test_support::lock_test_env;
|
||||
use crate::tools::plan::{PlanItemArg, PlanSnapshot, StepStatus};
|
||||
@@ -2103,13 +2104,31 @@ fn context_budget_reserves_output_and_headroom() {
|
||||
let _lock = lock_test_env();
|
||||
// V4 has a 1M context window — the only family that comfortably hosts
|
||||
// a 256K output reservation without saturating the input budget to 0.
|
||||
let budget = context_input_budget("deepseek-v4-pro")
|
||||
let budget = context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro")
|
||||
.expect("deepseek-v4-pro should have a known context window");
|
||||
let v4_window: usize = 1_000_000;
|
||||
let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
|
||||
assert_eq!(budget, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_budget_uses_conservative_fallback_for_unknown_models() {
|
||||
let _lock = lock_test_env();
|
||||
let budget = context_input_budget_for_provider(ApiProvider::Openai, "auto")
|
||||
.expect("unknown/auto model ids should still get a conservative hard preflight budget");
|
||||
let expected = 128_000usize - effective_max_output_tokens("auto") as usize - 1_024usize;
|
||||
assert_eq!(budget, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_budget_uses_provider_effective_window_for_openai_codex() {
|
||||
let _lock = lock_test_env();
|
||||
let budget = context_input_budget_for_provider(ApiProvider::OpenaiCodex, "gpt-5.5")
|
||||
.expect("OpenAI Codex should use the route-effective context window");
|
||||
let expected = 400_000usize - effective_max_output_tokens("gpt-5.5") as usize - 1_024usize;
|
||||
assert_eq!(budget, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn effective_max_output_tokens_caps_api_request_for_large_window_models() {
|
||||
// Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
|
||||
@@ -2213,7 +2232,8 @@ fn internal_context_budget_tiers_reserved_output_by_window() {
|
||||
// Large-context (>=500K) models reserve the full TURN_MAX_OUTPUT_TOKENS
|
||||
// headroom so long V4 sessions don't compact prematurely.
|
||||
let internal_budget =
|
||||
context_input_budget("deepseek-v4-pro").expect("V4 should have a known context window");
|
||||
context_input_budget_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro")
|
||||
.expect("V4 should have a known context window");
|
||||
let v4_window: usize = 1_000_000;
|
||||
let expected_internal = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
|
||||
assert_eq!(internal_budget, expected_internal);
|
||||
@@ -2222,8 +2242,9 @@ fn internal_context_budget_tiers_reserved_output_by_window() {
|
||||
// deployment must yield a usable positive budget rather than None. The
|
||||
// previous formula reserved the full 262K and computed 256K - 262K - 1K,
|
||||
// which underflowed to None and silently disabled preflight/recovery.
|
||||
let small_window_budget = context_input_budget("qwen3-32b-256k")
|
||||
.expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
|
||||
let small_window_budget =
|
||||
context_input_budget_for_provider(ApiProvider::Openai, "qwen3-32b-256k")
|
||||
.expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
|
||||
let effective_output = effective_max_output_tokens("qwen3-32b-256k") as usize;
|
||||
let expected_small = 256_000 - effective_output - 1_024;
|
||||
assert_eq!(small_window_budget, expected_small);
|
||||
|
||||
@@ -205,7 +205,9 @@ impl Engine {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(input_budget) = context_input_budget(&self.session.model) {
|
||||
if let Some(input_budget) =
|
||||
context_input_budget_for_provider(self.api_provider, &self.session.model)
|
||||
{
|
||||
let estimated_input = self.estimated_input_tokens();
|
||||
if estimated_input > input_budget {
|
||||
if context_recovery_attempts >= MAX_CONTEXT_RECOVERY_ATTEMPTS {
|
||||
|
||||
+78
-10
@@ -246,6 +246,12 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
|
||||
}
|
||||
return Some(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
|
||||
}
|
||||
if is_openai_gpt_55_api_model(&lower) {
|
||||
return Some(1_050_000);
|
||||
}
|
||||
if is_openai_codex_model(&lower) {
|
||||
return Some(400_000);
|
||||
}
|
||||
if let Some(window) = known_context_window_for_model(&lower) {
|
||||
return Some(window);
|
||||
}
|
||||
@@ -259,7 +265,8 @@ fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
|
||||
match model_lower {
|
||||
// OpenAI API model docs, verified 2026-06-12:
|
||||
// https://developers.openai.com/api/docs/models/gpt-5.5
|
||||
"gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" => Some(1_050_000),
|
||||
// Family aliases and snapshots are handled by
|
||||
// `is_openai_gpt_55_api_model` before this table.
|
||||
// OpenAI Codex model docs, verified 2026-06-12:
|
||||
// https://developers.openai.com/api/docs/models/gpt-5-codex
|
||||
// https://developers.openai.com/api/docs/models/gpt-5.3-codex
|
||||
@@ -320,9 +327,11 @@ pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
|
||||
if lower.contains("deepseek") && lower.contains("v4") {
|
||||
return Some(384_000);
|
||||
}
|
||||
if is_openai_gpt_55_api_model(&lower) || is_openai_codex_model(&lower) {
|
||||
return Some(128_000);
|
||||
}
|
||||
match lower.as_str() {
|
||||
"gpt-5.5" | "gpt-5.5-pro" | "codex-gpt-5.5" | "chatgpt-gpt-5.5" | "gpt-5-codex"
|
||||
| "gpt-5.3-codex" => Some(128_000),
|
||||
"gpt-5-codex" | "gpt-5.3-codex" => Some(128_000),
|
||||
"claude-opus-4-8" => Some(128_000),
|
||||
"claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000),
|
||||
"arcee-ai/trinity-large-thinking"
|
||||
@@ -369,10 +378,6 @@ pub fn model_supports_reasoning(model: &str) -> bool {
|
||||
lower.as_str(),
|
||||
"claude-opus-4-8"
|
||||
| "claude-sonnet-4-6"
|
||||
| "gpt-5.5"
|
||||
| "gpt-5.5-pro"
|
||||
| "codex-gpt-5.5"
|
||||
| "chatgpt-gpt-5.5"
|
||||
| "gpt-5-codex"
|
||||
| "gpt-5.3-codex"
|
||||
| "arcee-ai/trinity-large-thinking"
|
||||
@@ -414,9 +419,48 @@ pub fn model_supports_reasoning(model: &str) -> bool {
|
||||
| "z-ai/glm-5.2"
|
||||
| "glm-5.1"
|
||||
| "glm-5.2"
|
||||
) || is_openai_gpt_55_api_model(&lower)
|
||||
|| is_openai_codex_model(&lower)
|
||||
}
|
||||
|
||||
fn is_openai_gpt_55_api_model(model_lower: &str) -> bool {
|
||||
matches!(model_lower, "gpt-5.5" | "gpt-5.5-pro")
|
||||
|| has_date_snapshot_suffix(model_lower, "gpt-5.5-")
|
||||
|| has_date_snapshot_suffix(model_lower, "gpt-5.5-pro-")
|
||||
}
|
||||
|
||||
fn is_openai_codex_model(model_lower: &str) -> bool {
|
||||
matches!(
|
||||
model_lower,
|
||||
"gpt-5-codex"
|
||||
| "gpt-5.1-codex"
|
||||
| "gpt-5.1-codex-mini"
|
||||
| "gpt-5.1-codex-max"
|
||||
| "gpt-5.2-codex"
|
||||
| "gpt-5.3-codex"
|
||||
| "codex-gpt-5.5"
|
||||
| "chatgpt-gpt-5.5"
|
||||
| "gpt-5.5-codex"
|
||||
| "gpt-5.5-codex-preview"
|
||||
| "codex-gpt-5.5-preview"
|
||||
| "chatgpt-gpt-5.5-preview"
|
||||
)
|
||||
}
|
||||
|
||||
fn has_date_snapshot_suffix(model_lower: &str, prefix: &str) -> bool {
|
||||
let Some(rest) = model_lower.strip_prefix(prefix) else {
|
||||
return false;
|
||||
};
|
||||
let bytes = rest.as_bytes();
|
||||
bytes.len() == 10
|
||||
&& bytes[4] == b'-'
|
||||
&& bytes[7] == b'-'
|
||||
&& bytes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.all(|(idx, byte)| idx == 4 || idx == 7 || byte.is_ascii_digit())
|
||||
}
|
||||
|
||||
/// Parse an explicit `_Nk` context-window hint from a model name (vendor
|
||||
/// agnostic). Returns the window in tokens for `N` in `8..=1024`.
|
||||
fn explicit_context_window_hint(model_lower: &str) -> Option<u32> {
|
||||
@@ -632,8 +676,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn openai_codex_models_have_verified_context_metadata() {
|
||||
for model in ["gpt-5.5", "codex-gpt-5.5", "chatgpt-gpt-5.5"] {
|
||||
fn openai_api_and_codex_models_have_verified_context_metadata() {
|
||||
for model in [
|
||||
"gpt-5.5",
|
||||
"gpt-5.5-pro",
|
||||
"gpt-5.5-2026-04-23",
|
||||
"gpt-5.5-pro-2026-04-23",
|
||||
] {
|
||||
assert_eq!(context_window_for_model(model), Some(1_050_000));
|
||||
assert_eq!(max_output_tokens_for_model(model), Some(128_000));
|
||||
assert!(model_supports_reasoning(model));
|
||||
@@ -643,11 +692,30 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
for model in ["gpt-5-codex", "gpt-5.3-codex"] {
|
||||
for model in [
|
||||
"gpt-5-codex",
|
||||
"gpt-5.1-codex",
|
||||
"gpt-5.1-codex-mini",
|
||||
"gpt-5.1-codex-max",
|
||||
"gpt-5.2-codex",
|
||||
"gpt-5.3-codex",
|
||||
"codex-gpt-5.5",
|
||||
"chatgpt-gpt-5.5",
|
||||
"gpt-5.5-codex",
|
||||
"gpt-5.5-codex-preview",
|
||||
] {
|
||||
assert_eq!(context_window_for_model(model), Some(400_000));
|
||||
assert_eq!(max_output_tokens_for_model(model), Some(128_000));
|
||||
assert!(model_supports_reasoning(model));
|
||||
assert_eq!(
|
||||
compaction_threshold_for_model_at_percent(model, 80.0),
|
||||
320_000
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(context_window_for_model("gpt-5.5-nano"), None);
|
||||
assert_eq!(max_output_tokens_for_model("gpt-5.5-nano"), None);
|
||||
assert!(!model_supports_reasoning("gpt-5.5-nano"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -34,6 +34,9 @@ pub struct PromptSessionContext<'a> {
|
||||
/// preserving backward compatibility with existing call sites
|
||||
/// that predate dynamic model injection.
|
||||
pub model_id: &'a str,
|
||||
/// Route-effective context window, when known. This can differ from the
|
||||
/// model-family maximum when a provider wrapper exposes a smaller envelope.
|
||||
pub context_window_override: Option<u32>,
|
||||
/// Whether the user-visible transcript renders thinking blocks.
|
||||
/// When false, the prompt should not spend localization pressure on
|
||||
/// `reasoning_content` the user will never see.
|
||||
@@ -52,6 +55,7 @@ impl Default for PromptSessionContext<'_> {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
}
|
||||
@@ -838,12 +842,17 @@ pub(crate) fn render_runtime_policy_reference() -> String {
|
||||
/// constant; this function produces a per-session variant so the prompt
|
||||
/// says "You are deepseek-v4-pro" or "You are deepseek-v4-flash" instead
|
||||
/// of a static placeholder.
|
||||
fn apply_model_template(prompt: &str, model_id: &str) -> String {
|
||||
fn apply_model_template(
|
||||
prompt: &str,
|
||||
model_id: &str,
|
||||
context_window_override: Option<u32>,
|
||||
) -> String {
|
||||
let mut prompt = prompt.replace("{model_id}", model_id);
|
||||
|
||||
// #3025: Substitute model-specific facts so non-DeepSeek models don't
|
||||
// get V4 architecture claims, 1M-window assumptions, or Flash pricing.
|
||||
let ctx_window = crate::models::context_window_for_model(model_id);
|
||||
let ctx_window =
|
||||
context_window_override.or_else(|| crate::models::context_window_for_model(model_id));
|
||||
let window_note = if let Some(window) = ctx_window {
|
||||
format!(
|
||||
"You have a {}-token context window. Do not summarize or delete \
|
||||
@@ -999,7 +1008,7 @@ fn compose_default_static_layers(_personality: Personality, model_id: &str) -> S
|
||||
// Personality is now folded into the YAML constitution (constitution.yaml).
|
||||
// No separate overlay is appended — the base prompt already carries voice,
|
||||
// tone, and presentation guidance via the preamble and article text.
|
||||
apply_model_template(effective_base_prompt().trim(), model_id)
|
||||
apply_model_template(effective_base_prompt().trim(), model_id, None)
|
||||
}
|
||||
|
||||
fn apply_static_prompt_composer(
|
||||
@@ -1069,6 +1078,7 @@ pub fn system_prompt_for_mode_with_context_and_skills(
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -1098,8 +1108,17 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
|
||||
instructions: Option<&[InstructionSource]>,
|
||||
session_context: PromptSessionContext<'_>,
|
||||
) -> SystemPrompt {
|
||||
let mode_prompt =
|
||||
compose_prompt_with_approval_model_and_shell(Personality::Calm, session_context.model_id);
|
||||
let default_layers = apply_model_template(
|
||||
effective_base_prompt().trim(),
|
||||
session_context.model_id,
|
||||
session_context.context_window_override,
|
||||
);
|
||||
let mode_prompt = apply_static_prompt_composer(
|
||||
effective_static_prompt_composer(),
|
||||
Personality::Calm,
|
||||
session_context.model_id,
|
||||
&default_layers,
|
||||
);
|
||||
|
||||
// Load project context from workspace
|
||||
let project_context = load_project_context_with_parents(workspace);
|
||||
@@ -1548,7 +1567,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compose_prompt_for_openai_codex_uses_verified_context_window() {
|
||||
fn compose_prompt_for_openai_api_gpt_55_uses_verified_context_window() {
|
||||
let prompt = compose_prompt_with_approval_model_and_shell(Personality::Calm, "gpt-5.5");
|
||||
assert!(!prompt.contains("Your V4 Characteristics"));
|
||||
assert!(prompt.contains("1050000-token context window"));
|
||||
@@ -1577,11 +1596,18 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn apply_model_template_replaces_placeholder() {
|
||||
let result = apply_model_template("You are {model_id}", "deepseek-v4-pro");
|
||||
let result = apply_model_template("You are {model_id}", "deepseek-v4-pro", None);
|
||||
assert_eq!(result, "You are deepseek-v4-pro");
|
||||
assert!(!result.contains("{model_id}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_model_template_uses_context_window_override() {
|
||||
let result = apply_model_template("{context_window_note}", "gpt-5.5", Some(400_000));
|
||||
assert!(result.contains("400000-token context window"));
|
||||
assert!(!result.contains("1050000-token context window"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compose_prompt_injects_model_id() {
|
||||
let prompt =
|
||||
@@ -1978,6 +2004,7 @@ mod tests {
|
||||
locale_tag: "zh-Hans",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2048,6 +2075,7 @@ mod tests {
|
||||
locale_tag: "zh-Hans",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2091,6 +2119,7 @@ mod tests {
|
||||
locale_tag: "zh-Hans",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: false,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2144,6 +2173,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2248,6 +2278,7 @@ mod tests {
|
||||
locale_tag: "ja",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2285,6 +2316,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2314,6 +2346,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2372,6 +2405,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2401,6 +2435,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2608,6 +2643,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -2643,6 +2679,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: None,
|
||||
},
|
||||
@@ -3186,6 +3223,7 @@ mod tests {
|
||||
locale_tag: "en",
|
||||
translation_enabled: false,
|
||||
model_id: "codewhale",
|
||||
context_window_override: None,
|
||||
show_thinking: true,
|
||||
verbosity: Some(" Concise "),
|
||||
},
|
||||
|
||||
@@ -4,10 +4,9 @@ use std::collections::HashSet;
|
||||
use std::fmt::Write;
|
||||
|
||||
use crate::compaction::estimate_input_tokens_conservative;
|
||||
use crate::config::provider_capability;
|
||||
use crate::localization::{Locale, MessageId, tr};
|
||||
use crate::models::{
|
||||
LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, SystemPrompt, context_window_for_model,
|
||||
};
|
||||
use crate::models::SystemPrompt;
|
||||
use crate::session_manager::SessionContextReference;
|
||||
use crate::tui::app::{App, ToolDetailRecord};
|
||||
use crate::tui::file_mention::ContextReferenceSource;
|
||||
@@ -154,8 +153,8 @@ pub fn build_context_inspector_text(app: &App, locale: Locale) -> String {
|
||||
}
|
||||
|
||||
fn context_usage(app: &App) -> (usize, u32, f64) {
|
||||
let max = context_window_for_model(app.effective_model_for_budget())
|
||||
.unwrap_or(LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS);
|
||||
let max =
|
||||
provider_capability(app.api_provider, app.effective_model_for_budget()).context_window;
|
||||
let estimated =
|
||||
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
|
||||
let total_chars = estimate_message_chars(&app.api_messages);
|
||||
|
||||
@@ -31,6 +31,7 @@ use super::app::{
|
||||
use super::history::{GenericToolCell, HistoryCell, ToolCell, ToolStatus, summarize_tool_output};
|
||||
use super::subagent_routing::active_fanout_counts;
|
||||
use super::ui_text::{concise_shell_command_label, truncate_line_to_width};
|
||||
use crate::config::provider_capability;
|
||||
|
||||
/// Tolerance for floating-point cost comparison in the sidebar breakdown.
|
||||
/// Must be large enough that accumulated f64 error across hundreds of turns
|
||||
@@ -2407,7 +2408,7 @@ fn render_context_panel(f: &mut Frame, area: Rect, app: &mut App) {
|
||||
|
||||
// ── Token usage ──────────────────────────────────────────────
|
||||
let total_tokens = app.session.total_conversation_tokens;
|
||||
let window = crate::models::context_window_for_model(&app.model).unwrap_or(1_048_576);
|
||||
let window = provider_capability(app.api_provider, &app.model).context_window;
|
||||
let pct = if window > 0 {
|
||||
((total_tokens as f64 / window as f64) * 100.0).clamp(0.0, 100.0)
|
||||
} else {
|
||||
|
||||
@@ -43,7 +43,7 @@ use crate::commands;
|
||||
use crate::compaction::estimate_input_tokens_conservative;
|
||||
use crate::config::{
|
||||
ApiProvider, Config, DEFAULT_NVIDIA_NIM_BASE_URL, ProviderConfig, ProvidersConfig, StatusItem,
|
||||
UpdateConfig, save_provider_auth_mode_for,
|
||||
UpdateConfig, provider_capability, save_provider_auth_mode_for,
|
||||
};
|
||||
use crate::config_ui::{self, ConfigUiMode, WebConfigSession, WebConfigSessionEvent};
|
||||
use crate::core::engine::{EngineConfig, EngineHandle, spawn_engine};
|
||||
@@ -52,9 +52,7 @@ use crate::core::ops::{Op, USER_SHELL_TOOL_ID_PREFIX};
|
||||
use crate::hooks::{HookEvent, HookExecutor, TurnEndPayloadInput, TurnEndTotals};
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::localization::{MessageId, tr};
|
||||
use crate::models::{
|
||||
ContentBlock, Message, MessageRequest, SystemPrompt, Usage, context_window_for_model,
|
||||
};
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage};
|
||||
use crate::palette;
|
||||
use crate::prompts;
|
||||
use crate::session_manager::{
|
||||
@@ -5659,6 +5657,9 @@ async fn dispatch_user_message(
|
||||
locale_tag: app.ui_locale.tag(),
|
||||
translation_enabled: app.translation_enabled,
|
||||
model_id: &app.model,
|
||||
context_window_override: Some(
|
||||
provider_capability(app.api_provider, &app.model).context_window,
|
||||
),
|
||||
show_thinking: app.show_thinking,
|
||||
verbosity: app.verbosity.as_deref(),
|
||||
},
|
||||
@@ -9425,7 +9426,8 @@ fn estimated_context_tokens(app: &App) -> Option<i64> {
|
||||
}
|
||||
|
||||
pub(crate) fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
|
||||
let max = context_window_for_model(app.effective_model_for_budget())?;
|
||||
let max =
|
||||
provider_capability(app.api_provider, app.effective_model_for_budget()).context_window;
|
||||
let max_i64 = i64::from(max);
|
||||
let reported = app
|
||||
.session
|
||||
|
||||
+3
-2
@@ -143,7 +143,7 @@ endpoint.
|
||||
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
|
||||
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`, `HF_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible router route. Accepted aliases: `huggingface`, `hugging-face`, `hugging_face`, `hf`. Org-prefixed model IDs pass through. `HUGGINGFACE_MODEL` and `HF_MODEL` are accepted. Hub browsing/export are separate future features. |
|
||||
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
|
||||
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
|
||||
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. CodeWhale budgets this route with the 400K Codex-family effective context window even when the public API model table lists a larger native `gpt-5.5` window. |
|
||||
| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
|
||||
|
||||
### Hugging Face Provider vs MCP vs Hub
|
||||
@@ -262,7 +262,8 @@ Anthropic uses Messages, and `openai-codex` uses Responses.
|
||||
| OpenRouter Qwen 3.6 Flash / Plus | 1,000,000 | 65,536 | yes | no | not documented in code |
|
||||
| OpenRouter Qwen 3.6 35B / 27B | 262,144 | 262,140 | yes | no | not documented in code |
|
||||
| OpenRouter Qwen 3.6 Max Preview | 262,144 | 65,536 | yes | no | not documented in code |
|
||||
| OpenAI Codex / ChatGPT `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code |
|
||||
| OpenAI API `gpt-5.5` | 1,050,000 | 128,000 | yes | no | not documented in code |
|
||||
| OpenAI Codex / ChatGPT route (`openai-codex`) | 400,000 effective | 128,000 | yes | no | route uses Responses payload at `/codex/responses` |
|
||||
| Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code |
|
||||
| Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code |
|
||||
| Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code |
|
||||
|
||||
Reference in New Issue
Block a user