Merge PR #3047 from Hmbown: model-based capability lookups for Moonshot/OpenAI/Atlascloud + bare-id registry rows

fix(providers): use model-based lookups for Moonshot/OpenAI/Atlascloud/Ollama capability
This commit is contained in:
Hunter Bown
2026-06-10 22:29:52 -07:00
committed by GitHub
3 changed files with 119 additions and 44 deletions
+51 -37
View File
@@ -387,22 +387,11 @@ pub enum RequestPayloadMode {
/// in the API payload (after normalization / provider-specific mapping).
#[must_use]
pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
if matches!(
provider,
ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot
) {
return ProviderCapability {
provider,
resolved_model: resolved_model.to_string(),
context_window: crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS,
max_output: 4096,
thinking_supported: false,
cache_telemetry_supported: false,
request_payload_mode: RequestPayloadMode::ChatCompletions,
alias_deprecation: None,
};
}
// #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
// providers use the generic model-based path below, which correctly
// resolves context windows, output limits, and thinking support from
// models.rs lookups. Ollama also falls through to model-based lookups
// with 8192 as the last-resort fallback instead of a hardcoded floor.
if matches!(provider, ApiProvider::XiaomiMimo) {
return ProviderCapability {
provider,
@@ -417,19 +406,6 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
};
}
if matches!(provider, ApiProvider::Ollama) {
return ProviderCapability {
provider,
resolved_model: resolved_model.to_string(),
context_window: 8192,
max_output: 4096,
thinking_supported: false,
cache_telemetry_supported: false,
request_payload_mode: RequestPayloadMode::ChatCompletions,
alias_deprecation: None,
};
}
if matches!(provider, ApiProvider::Arcee) {
return ProviderCapability {
provider,
@@ -459,12 +435,16 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
&& (model_lower.contains("reasoner") || model_lower.contains("r1"));
// Context window: V4-class models get 1M, everything else falls through
// to the model's own lookup or a default.
// to the model's own lookup or a default. Ollama defaults to 8192
// (conservative for small local models) instead of 128K.
let context_window = if is_v4_pro || is_v4_flash {
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
} else if let Some(window) = crate::models::context_window_for_model(resolved_model) {
window
} else if matches!(provider, ApiProvider::Ollama) {
8192
} else {
crate::models::context_window_for_model(resolved_model)
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
};
// Max output tokens: official DeepSeek V4 API metadata lists 384K;
@@ -10808,14 +10788,30 @@ model = "deepseek-ai/deepseek-v4-pro"
}
#[test]
fn provider_capability_atlascloud_custom_model_is_chat_completions_without_thinking() {
fn provider_capability_atlascloud_v4_model_resolves_model_metadata() {
// #3023: Atlascloud uses the generic model-based path, so its default
// DeepSeek V4 model resolves the real V4 metadata instead of the old
// hardcoded legacy floor.
let cap = provider_capability(ApiProvider::Atlascloud, "deepseek-ai/deepseek-v4-flash");
assert_eq!(
cap.context_window,
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 4096);
assert!(!cap.thinking_supported);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
assert_eq!(
cap.request_payload_mode,
RequestPayloadMode::ChatCompletions
);
}
#[test]
fn provider_capability_moonshot_default_model_resolves_kimi_metadata() {
let cap = provider_capability(ApiProvider::Moonshot, DEFAULT_MOONSHOT_MODEL);
assert_eq!(cap.context_window, 262_144);
assert_eq!(cap.max_output, 262_144);
assert!(cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
assert_eq!(
cap.request_payload_mode,
@@ -10840,8 +10836,26 @@ model = "deepseek-ai/deepseek-v4-pro"
}
#[test]
fn provider_capability_ollama_is_openai_compatible_without_thinking() {
fn provider_capability_ollama_deepseek_tag_uses_deepseek_heuristic() {
// #3023: known model families resolve through models.rs lookups even
// on Ollama — a legacy DeepSeek tag gets the 128K heuristic window.
let cap = provider_capability(ApiProvider::Ollama, "deepseek-v3.1:671b");
assert_eq!(
cap.context_window,
crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 4096);
assert!(!cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
assert_eq!(
cap.request_payload_mode,
RequestPayloadMode::ChatCompletions
);
}
#[test]
fn provider_capability_ollama_unknown_model_falls_back_to_8192() {
let cap = provider_capability(ApiProvider::Ollama, "llama3.2:3b");
assert_eq!(cap.context_window, 8192);
assert_eq!(cap.max_output, 4096);
assert!(!cap.thinking_supported);
+27
View File
@@ -2930,6 +2930,33 @@ async fn pre_request_refresh_skips_compaction_below_normal_threshold() {
assert_eq!(engine.session.messages.len(), before_len);
}
#[test]
fn capacity_observation_uses_bare_kimi_context_window() {
// #3023: capacity math reads models::context_window_for_model directly,
// so bare Moonshot ids must resolve their real window, not the 128K
// legacy fallback.
let mut engine = build_engine_with_capacity(CapacityControllerConfig::default());
engine.session.model = "kimi-k2.6".to_string();
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "x".repeat(40_000),
cache_control: None,
}],
});
let estimated = engine.estimated_input_tokens() as f64;
let turn = TurnContext::new(1);
let observation = engine.capacity_observation(&turn);
let expected = estimated / 262_144.0;
assert!(
(observation.context_used_ratio - expected).abs() < 1e-9,
"context_used_ratio must use kimi-k2.6's 262,144-token window (got {})",
observation.context_used_ratio
);
}
#[tokio::test]
async fn pre_request_refresh_invoked_when_medium_risk() {
let capacity = CapacityControllerConfig {
+41 -7
View File
@@ -263,9 +263,13 @@ fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
| "qwen/qwen3.6-27b"
| "tencent/hy3-preview"
| "moonshotai/kimi-k2.6"
| "moonshotai/kimi-k2.6:free" => Some(262_144),
"z-ai/glm-5.1" | "z-ai/glm-5v-turbo" => Some(202_752),
"minimax/minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => Some(1_000_000),
| "moonshotai/kimi-k2.6:free"
| "kimi-k2.6"
| "kimi-for-coding" => Some(262_144),
"z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752),
"minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => {
Some(1_000_000)
}
"xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => {
Some(1_000_000)
}
@@ -285,10 +289,12 @@ pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
return Some(384_000);
}
match lower.as_str() {
"arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "moonshotai/kimi-k2.6" => {
Some(262_144)
}
"minimax/minimax-m3" => Some(524_288),
"arcee-ai/trinity-large-thinking"
| "trinity-large-thinking"
| "moonshotai/kimi-k2.6"
| "kimi-k2.6"
| "kimi-for-coding" => Some(262_144),
"minimax/minimax-m3" | "minimax-m3" => Some(524_288),
"qwen/qwen3.6-35b-a3b" | "qwen/qwen3.6-27b" => Some(262_140),
"qwen/qwen3.6-flash" | "qwen/qwen3.6-max-preview" | "qwen/qwen3.6-plus" => Some(65_536),
"xiaomi/mimo-v2.5-pro" | "xiaomi/mimo-v2.5" | "mimo-v2.5-pro" | "mimo-v2.5" => {
@@ -328,7 +334,9 @@ pub fn model_supports_reasoning(model: &str) -> bool {
| "google/gemma-4-26b-a4b-it:free"
| "moonshotai/kimi-k2.6"
| "moonshotai/kimi-k2.6:free"
| "kimi-k2.6"
| "minimax/minimax-m3"
| "minimax-m3"
| "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"
| "qwen/qwen3.6-flash"
| "qwen/qwen3.6-35b-a3b"
@@ -341,6 +349,7 @@ pub fn model_supports_reasoning(model: &str) -> bool {
| "mimo-v2.5-pro"
| "mimo-v2.5"
| "z-ai/glm-5.1"
| "glm-5.1"
)
}
@@ -602,6 +611,31 @@ mod tests {
);
}
#[test]
fn bare_provider_model_ids_mirror_vendor_prefixed_rows() {
// Direct-provider routes (Moonshot, MiniMax, Z.ai) serve bare model
// ids without the OpenRouter vendor prefix; both spellings must
// resolve identical metadata (#1310 ride-along on #3023).
for (model, expected_window) in [
("kimi-k2.6", 262_144),
("minimax-m3", 1_000_000),
("glm-5.1", 202_752),
] {
assert_eq!(context_window_for_model(model), Some(expected_window));
assert!(model_supports_reasoning(model));
}
assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144));
assert!(!model_supports_reasoning("kimi-for-coding"));
assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752));
assert!(!model_supports_reasoning("glm-5v-turbo"));
assert_eq!(max_output_tokens_for_model("kimi-k2.6"), Some(262_144));
assert_eq!(
max_output_tokens_for_model("kimi-for-coding"),
Some(262_144)
);
assert_eq!(max_output_tokens_for_model("minimax-m3"), Some(524_288));
}
#[test]
fn deepseek_models_with_k_suffix_use_hint() {
assert_eq!(context_window_for_model("deepseek-v3.2-32k"), Some(32_000));