fix(providers): use model-based lookups for Moonshot/OpenAI/Atlascloud/Ollama capability (#3023)

Three changes to fix provider capability reporting: 1. Delete the Openai/Atlascloud/Moonshot early-return arm in provider_capability() so these providers use the generic model-based path. Moonshot models now correctly report 262,144 context window and thinking_supported: true (via models.rs lookups). 2. Delete the Ollama hardcoded arm so Ollama also uses model-based lookups. The generic fallback now uses 8192 for Ollama (conservative for small local models) instead of the 128K default. 3. Ollama fallback: when context_window_for_model returns None and the provider is Ollama, default to 8192 instead of LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS (128K).
2026-06-10 16:27:49 -07:00
parent b23067bacd
commit 139fe07d71
1 changed files with 12 additions and 32 deletions
@@ -387,22 +387,11 @@ pub enum RequestPayloadMode {
 /// in the API payload (after normalization / provider-specific mapping).
 #[must_use]
 pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
-    if matches!(
-        provider,
-        ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot
-    ) {
-        return ProviderCapability {
-            provider,
-            resolved_model: resolved_model.to_string(),
-            context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS,
-            max_output: 4096,
-            thinking_supported: false,
-            cache_telemetry_supported: false,
-            request_payload_mode: RequestPayloadMode::ChatCompletions,
-            alias_deprecation: None,
-        };
-    }
-
+    // #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
+    // providers use the generic model-based path below, which correctly
+    // resolves context windows, output limits, and thinking support from
+    // models.rs lookups.  Ollama also falls through to model-based lookups
+    // with 8192 as the last-resort fallback instead of a hardcoded floor.
    if matches!(provider, ApiProvider::XiaomiMimo) {
        return ProviderCapability {
            provider,
@@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
        };
    }

-    if matches!(provider, ApiProvider::Ollama) {
-        return ProviderCapability {
-            provider,
-            resolved_model: resolved_model.to_string(),
-            context_window: 8192,
-            max_output: 4096,
-            thinking_supported: false,
-            cache_telemetry_supported: false,
-            request_payload_mode: RequestPayloadMode::ChatCompletions,
-            alias_deprecation: None,
-        };
-    }
-
    if matches!(provider, ApiProvider::Arcee) {
        return ProviderCapability {
            provider,
@@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
        && (model_lower.contains("reasoner") || model_lower.contains("r1"));

    // Context window: V4-class models get 1M, everything else falls through
-    // to the model's own lookup or a default.
+    // to the model's own lookup or a default.  Ollama defaults to 8192
+    // (conservative for small local models) instead of 128K.
    let context_window = if is_v4_pro || is_v4_flash {
        crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
+    } else if let Some(window) = crate::models::context_window_for_model(resolved_model) {
+        window
+    } else if matches!(provider, ApiProvider::Ollama) {
+        8192
    } else {
-        crate::models::context_window_for_model(resolved_model)
-            .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
+        crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
    };

    // Max output tokens: official DeepSeek V4 API metadata lists 384K;