fix(providers): use model-based lookups for Moonshot/OpenAI/Atlascloud/Ollama capability (#3023)
Three changes to fix provider capability reporting: 1. Delete the Openai/Atlascloud/Moonshot early-return arm in provider_capability() so these providers use the generic model-based path. Moonshot models now correctly report 262,144 context window and thinking_supported: true (via models.rs lookups). 2. Delete the Ollama hardcoded arm so Ollama also uses model-based lookups. The generic fallback now uses 8192 for Ollama (conservative for small local models) instead of the 128K default. 3. Ollama fallback: when context_window_for_model returns None and the provider is Ollama, default to 8192 instead of LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS (128K).
This commit is contained in:
+12
-32
@@ -387,22 +387,11 @@ pub enum RequestPayloadMode {
|
||||
/// in the API payload (after normalization / provider-specific mapping).
|
||||
#[must_use]
|
||||
pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
|
||||
if matches!(
|
||||
provider,
|
||||
ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot
|
||||
) {
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
resolved_model: resolved_model.to_string(),
|
||||
context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS,
|
||||
max_output: 4096,
|
||||
thinking_supported: false,
|
||||
cache_telemetry_supported: false,
|
||||
request_payload_mode: RequestPayloadMode::ChatCompletions,
|
||||
alias_deprecation: None,
|
||||
};
|
||||
}
|
||||
|
||||
// #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
|
||||
// providers use the generic model-based path below, which correctly
|
||||
// resolves context windows, output limits, and thinking support from
|
||||
// models.rs lookups. Ollama also falls through to model-based lookups
|
||||
// with 8192 as the last-resort fallback instead of a hardcoded floor.
|
||||
if matches!(provider, ApiProvider::XiaomiMimo) {
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
@@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
|
||||
};
|
||||
}
|
||||
|
||||
if matches!(provider, ApiProvider::Ollama) {
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
resolved_model: resolved_model.to_string(),
|
||||
context_window: 8192,
|
||||
max_output: 4096,
|
||||
thinking_supported: false,
|
||||
cache_telemetry_supported: false,
|
||||
request_payload_mode: RequestPayloadMode::ChatCompletions,
|
||||
alias_deprecation: None,
|
||||
};
|
||||
}
|
||||
|
||||
if matches!(provider, ApiProvider::Arcee) {
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
@@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
|
||||
&& (model_lower.contains("reasoner") || model_lower.contains("r1"));
|
||||
|
||||
// Context window: V4-class models get 1M, everything else falls through
|
||||
// to the model's own lookup or a default.
|
||||
// to the model's own lookup or a default. Ollama defaults to 8192
|
||||
// (conservative for small local models) instead of 128K.
|
||||
let context_window = if is_v4_pro || is_v4_flash {
|
||||
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
|
||||
} else if let Some(window) = crate::models::context_window_for_model(resolved_model) {
|
||||
window
|
||||
} else if matches!(provider, ApiProvider::Ollama) {
|
||||
8192
|
||||
} else {
|
||||
crate::models::context_window_for_model(resolved_model)
|
||||
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
|
||||
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
|
||||
};
|
||||
|
||||
// Max output tokens: official DeepSeek V4 API metadata lists 384K;
|
||||
|
||||
Reference in New Issue
Block a user