From 139fe07d71d796febec43d3634c2897e11df3246 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 10 Jun 2026 16:27:49 -0700 Subject: [PATCH] fix(providers): use model-based lookups for Moonshot/OpenAI/Atlascloud/Ollama capability (#3023) Three changes to fix provider capability reporting: 1. Delete the Openai/Atlascloud/Moonshot early-return arm in provider_capability() so these providers use the generic model-based path. Moonshot models now correctly report 262,144 context window and thinking_supported: true (via models.rs lookups). 2. Delete the Ollama hardcoded arm so Ollama also uses model-based lookups. The generic fallback now uses 8192 for Ollama (conservative for small local models) instead of the 128K default. 3. Ollama fallback: when context_window_for_model returns None and the provider is Ollama, default to 8192 instead of LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS (128K). --- crates/tui/src/config.rs | 44 +++++++++++----------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index d8f17de4..4c15aa6a 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -387,22 +387,11 @@ pub enum RequestPayloadMode { /// in the API payload (after normalization / provider-specific mapping). #[must_use] pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability { - if matches!( - provider, - ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot - ) { - return ProviderCapability { - provider, - resolved_model: resolved_model.to_string(), - context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, - max_output: 4096, - thinking_supported: false, - cache_telemetry_supported: false, - request_payload_mode: RequestPayloadMode::ChatCompletions, - alias_deprecation: None, - }; - } - + // #3023: Delete the Openai/Atlascloud/Moonshot early-return so these + // providers use the generic model-based path below, which correctly + // resolves context windows, output limits, and thinking support from + // models.rs lookups. Ollama also falls through to model-based lookups + // with 8192 as the last-resort fallback instead of a hardcoded floor. if matches!(provider, ApiProvider::XiaomiMimo) { return ProviderCapability { provider, @@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi }; } - if matches!(provider, ApiProvider::Ollama) { - return ProviderCapability { - provider, - resolved_model: resolved_model.to_string(), - context_window: 8192, - max_output: 4096, - thinking_supported: false, - cache_telemetry_supported: false, - request_payload_mode: RequestPayloadMode::ChatCompletions, - alias_deprecation: None, - }; - } - if matches!(provider, ApiProvider::Arcee) { return ProviderCapability { provider, @@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi && (model_lower.contains("reasoner") || model_lower.contains("r1")); // Context window: V4-class models get 1M, everything else falls through - // to the model's own lookup or a default. + // to the model's own lookup or a default. Ollama defaults to 8192 + // (conservative for small local models) instead of 128K. let context_window = if is_v4_pro || is_v4_flash { crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS + } else if let Some(window) = crate::models::context_window_for_model(resolved_model) { + window + } else if matches!(provider, ApiProvider::Ollama) { + 8192 } else { - crate::models::context_window_for_model(resolved_model) - .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) + crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS }; // Max output tokens: official DeepSeek V4 API metadata lists 384K;