diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index a64b2ebd..59d31285 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -387,22 +387,11 @@ pub enum RequestPayloadMode { /// in the API payload (after normalization / provider-specific mapping). #[must_use] pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability { - if matches!( - provider, - ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot - ) { - return ProviderCapability { - provider, - resolved_model: resolved_model.to_string(), - context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, - max_output: 4096, - thinking_supported: false, - cache_telemetry_supported: false, - request_payload_mode: RequestPayloadMode::ChatCompletions, - alias_deprecation: None, - }; - } - + // #3023: Delete the Openai/Atlascloud/Moonshot early-return so these + // providers use the generic model-based path below, which correctly + // resolves context windows, output limits, and thinking support from + // models.rs lookups. Ollama also falls through to model-based lookups + // with 8192 as the last-resort fallback instead of a hardcoded floor. if matches!(provider, ApiProvider::XiaomiMimo) { return ProviderCapability { provider, @@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi }; } - if matches!(provider, ApiProvider::Ollama) { - return ProviderCapability { - provider, - resolved_model: resolved_model.to_string(), - context_window: 8192, - max_output: 4096, - thinking_supported: false, - cache_telemetry_supported: false, - request_payload_mode: RequestPayloadMode::ChatCompletions, - alias_deprecation: None, - }; - } - if matches!(provider, ApiProvider::Arcee) { return ProviderCapability { provider, @@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi && (model_lower.contains("reasoner") || model_lower.contains("r1")); // Context window: V4-class models get 1M, everything else falls through - // to the model's own lookup or a default. + // to the model's own lookup or a default. Ollama defaults to 8192 + // (conservative for small local models) instead of 128K. let context_window = if is_v4_pro || is_v4_flash { crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS + } else if let Some(window) = crate::models::context_window_for_model(resolved_model) { + window + } else if matches!(provider, ApiProvider::Ollama) { + 8192 } else { - crate::models::context_window_for_model(resolved_model) - .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) + crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS }; // Max output tokens: official DeepSeek V4 API metadata lists 384K; @@ -10808,14 +10788,30 @@ model = "deepseek-ai/deepseek-v4-pro" } #[test] - fn provider_capability_atlascloud_custom_model_is_chat_completions_without_thinking() { + fn provider_capability_atlascloud_v4_model_resolves_model_metadata() { + // #3023: Atlascloud uses the generic model-based path, so its default + // DeepSeek V4 model resolves the real V4 metadata instead of the old + // hardcoded legacy floor. let cap = provider_capability(ApiProvider::Atlascloud, "deepseek-ai/deepseek-v4-flash"); assert_eq!( cap.context_window, - crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS + crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 4096); - assert!(!cap.thinking_supported); + assert_eq!(cap.max_output, 384_000); + assert!(cap.thinking_supported); + assert!(!cap.cache_telemetry_supported); + assert_eq!( + cap.request_payload_mode, + RequestPayloadMode::ChatCompletions + ); + } + + #[test] + fn provider_capability_moonshot_default_model_resolves_kimi_metadata() { + let cap = provider_capability(ApiProvider::Moonshot, DEFAULT_MOONSHOT_MODEL); + assert_eq!(cap.context_window, 262_144); + assert_eq!(cap.max_output, 262_144); + assert!(cap.thinking_supported); assert!(!cap.cache_telemetry_supported); assert_eq!( cap.request_payload_mode, @@ -10840,8 +10836,26 @@ model = "deepseek-ai/deepseek-v4-pro" } #[test] - fn provider_capability_ollama_is_openai_compatible_without_thinking() { + fn provider_capability_ollama_deepseek_tag_uses_deepseek_heuristic() { + // #3023: known model families resolve through models.rs lookups even + // on Ollama — a legacy DeepSeek tag gets the 128K heuristic window. let cap = provider_capability(ApiProvider::Ollama, "deepseek-v3.1:671b"); + assert_eq!( + cap.context_window, + crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS + ); + assert_eq!(cap.max_output, 4096); + assert!(!cap.thinking_supported); + assert!(!cap.cache_telemetry_supported); + assert_eq!( + cap.request_payload_mode, + RequestPayloadMode::ChatCompletions + ); + } + + #[test] + fn provider_capability_ollama_unknown_model_falls_back_to_8192() { + let cap = provider_capability(ApiProvider::Ollama, "llama3.2:3b"); assert_eq!(cap.context_window, 8192); assert_eq!(cap.max_output, 4096); assert!(!cap.thinking_supported); diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 859c4525..700e096e 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -2930,6 +2930,33 @@ async fn pre_request_refresh_skips_compaction_below_normal_threshold() { assert_eq!(engine.session.messages.len(), before_len); } +#[test] +fn capacity_observation_uses_bare_kimi_context_window() { + // #3023: capacity math reads models::context_window_for_model directly, + // so bare Moonshot ids must resolve their real window, not the 128K + // legacy fallback. + let mut engine = build_engine_with_capacity(CapacityControllerConfig::default()); + engine.session.model = "kimi-k2.6".to_string(); + engine.session.messages.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "x".repeat(40_000), + cache_control: None, + }], + }); + + let estimated = engine.estimated_input_tokens() as f64; + let turn = TurnContext::new(1); + let observation = engine.capacity_observation(&turn); + + let expected = estimated / 262_144.0; + assert!( + (observation.context_used_ratio - expected).abs() < 1e-9, + "context_used_ratio must use kimi-k2.6's 262,144-token window (got {})", + observation.context_used_ratio + ); +} + #[tokio::test] async fn pre_request_refresh_invoked_when_medium_risk() { let capacity = CapacityControllerConfig { diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index ed0b505a..3159342d 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -263,9 +263,13 @@ fn known_context_window_for_model(model_lower: &str) -> Option { | "qwen/qwen3.6-27b" | "tencent/hy3-preview" | "moonshotai/kimi-k2.6" - | "moonshotai/kimi-k2.6:free" => Some(262_144), - "z-ai/glm-5.1" | "z-ai/glm-5v-turbo" => Some(202_752), - "minimax/minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => Some(1_000_000), + | "moonshotai/kimi-k2.6:free" + | "kimi-k2.6" + | "kimi-for-coding" => Some(262_144), + "z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752), + "minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => { + Some(1_000_000) + } "xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => { Some(1_000_000) } @@ -285,10 +289,12 @@ pub fn max_output_tokens_for_model(model: &str) -> Option { return Some(384_000); } match lower.as_str() { - "arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "moonshotai/kimi-k2.6" => { - Some(262_144) - } - "minimax/minimax-m3" => Some(524_288), + "arcee-ai/trinity-large-thinking" + | "trinity-large-thinking" + | "moonshotai/kimi-k2.6" + | "kimi-k2.6" + | "kimi-for-coding" => Some(262_144), + "minimax/minimax-m3" | "minimax-m3" => Some(524_288), "qwen/qwen3.6-35b-a3b" | "qwen/qwen3.6-27b" => Some(262_140), "qwen/qwen3.6-flash" | "qwen/qwen3.6-max-preview" | "qwen/qwen3.6-plus" => Some(65_536), "xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => { @@ -328,7 +334,9 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "google/gemma-4-26b-a4b-it:free" | "moonshotai/kimi-k2.6" | "moonshotai/kimi-k2.6:free" + | "kimi-k2.6" | "minimax/minimax-m3" + | "minimax-m3" | "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-35b-a3b" @@ -341,6 +349,7 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "mimo-v2.5-pro" | "mimo-v2.5" | "z-ai/glm-5.1" + | "glm-5.1" ) } @@ -602,6 +611,31 @@ mod tests { ); } + #[test] + fn bare_provider_model_ids_mirror_vendor_prefixed_rows() { + // Direct-provider routes (Moonshot, MiniMax, Z.ai) serve bare model + // ids without the OpenRouter vendor prefix; both spellings must + // resolve identical metadata (#1310 ride-along on #3023). + for (model, expected_window) in [ + ("kimi-k2.6", 262_144), + ("minimax-m3", 1_000_000), + ("glm-5.1", 202_752), + ] { + assert_eq!(context_window_for_model(model), Some(expected_window)); + assert!(model_supports_reasoning(model)); + } + assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144)); + assert!(!model_supports_reasoning("kimi-for-coding")); + assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752)); + assert!(!model_supports_reasoning("glm-5v-turbo")); + assert_eq!(max_output_tokens_for_model("kimi-k2.6"), Some(262_144)); + assert_eq!( + max_output_tokens_for_model("kimi-for-coding"), + Some(262_144) + ); + assert_eq!(max_output_tokens_for_model("minimax-m3"), Some(524_288)); + } + #[test] fn deepseek_models_with_k_suffix_use_hint() { assert_eq!(context_window_for_model("deepseek-v3.2-32k"), Some(32_000));