fix(providers): use model-based lookups for Moonshot/OpenAI/Atlascloud/Ollama capability (#3023)

Three changes to fix provider capability reporting:

1. Delete the Openai/Atlascloud/Moonshot early-return arm in
   provider_capability() so these providers use the generic model-based
   path.  Moonshot models now correctly report 262,144 context window
   and thinking_supported: true (via models.rs lookups).

2. Delete the Ollama hardcoded arm so Ollama also uses model-based
   lookups.  The generic fallback now uses 8192 for Ollama (conservative
   for small local models) instead of the 128K default.

3. Ollama fallback: when context_window_for_model returns None and
   the provider is Ollama, default to 8192 instead of
   LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS (128K).
This commit is contained in:
Hunter Bown
2026-06-10 16:27:49 -07:00
parent b23067bacd
commit 139fe07d71
+12 -32
View File
@@ -387,22 +387,11 @@ pub enum RequestPayloadMode {
/// in the API payload (after normalization / provider-specific mapping).
#[must_use]
pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
if matches!(
provider,
ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot
) {
return ProviderCapability {
provider,
resolved_model: resolved_model.to_string(),
context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS,
max_output: 4096,
thinking_supported: false,
cache_telemetry_supported: false,
request_payload_mode: RequestPayloadMode::ChatCompletions,
alias_deprecation: None,
};
}
// #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
// providers use the generic model-based path below, which correctly
// resolves context windows, output limits, and thinking support from
// models.rs lookups. Ollama also falls through to model-based lookups
// with 8192 as the last-resort fallback instead of a hardcoded floor.
if matches!(provider, ApiProvider::XiaomiMimo) {
return ProviderCapability {
provider,
@@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
};
}
if matches!(provider, ApiProvider::Ollama) {
return ProviderCapability {
provider,
resolved_model: resolved_model.to_string(),
context_window: 8192,
max_output: 4096,
thinking_supported: false,
cache_telemetry_supported: false,
request_payload_mode: RequestPayloadMode::ChatCompletions,
alias_deprecation: None,
};
}
if matches!(provider, ApiProvider::Arcee) {
return ProviderCapability {
provider,
@@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
&& (model_lower.contains("reasoner") || model_lower.contains("r1"));
// Context window: V4-class models get 1M, everything else falls through
// to the model's own lookup or a default.
// to the model's own lookup or a default. Ollama defaults to 8192
// (conservative for small local models) instead of 128K.
let context_window = if is_v4_pro || is_v4_flash {
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
} else if let Some(window) = crate::models::context_window_for_model(resolved_model) {
window
} else if matches!(provider, ApiProvider::Ollama) {
8192
} else {
crate::models::context_window_for_model(resolved_model)
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
};
// Max output tokens: official DeepSeek V4 API metadata lists 384K;