From 652d3925ae05d48752faed31a3e5ef401c396109 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Sat, 13 Jun 2026 16:04:13 -0700 Subject: [PATCH] fix(model): align explicit provider routing Fixes #3202. Add first-party Z.ai, StepFun, and MiniMax rows to the shared model registry so provider-filtered list/resolve does not fall through to OpenRouter or DeepSeek defaults. Teach the dispatcher model helper to honor top-level --provider as a fallback when the subcommand does not provide one, matching user-facing command intent. Route explicit exec models through authenticated inventory when the configured route is unique, and fail early with a provider hint when the model alias is available from multiple configured providers. Verification: cargo fmt --all --check; git diff --check; cargo test -p codewhale-agent -p codewhale-cli --locked; cargo test -p codewhale-tui explicit_exec_model --locked; cargo build -p codewhale-cli -p codewhale-tui --locked; debug model list/resolve smoke; debug Z.ai GLM-5.2 PONG smoke. Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/agent/src/lib.rs | 191 ++++++++++++++++++++++++++++++++ crates/cli/src/lib.rs | 43 ++++++- crates/tui/src/main.rs | 67 +++++++++++ crates/tui/src/model_routing.rs | 75 ++++++++++++- 4 files changed, 371 insertions(+), 5 deletions(-) diff --git a/crates/agent/src/lib.rs b/crates/agent/src/lib.rs index d79371b2..5f908f92 100644 --- a/crates/agent/src/lib.rs +++ b/crates/agent/src/lib.rs @@ -328,6 +328,30 @@ impl Default for ModelRegistry { supports_tools: true, supports_reasoning: true, }, + ModelInfo { + id: "GLM-5.1".to_string(), + provider: ProviderKind::Zai, + aliases: vec![ + "glm-5.1".to_string(), + "glm-5-1".to_string(), + "zai-glm-5.1".to_string(), + "zai-glm-5-1".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "GLM-5.2".to_string(), + provider: ProviderKind::Zai, + aliases: vec![ + "glm-5.2".to_string(), + "glm-5-2".to_string(), + "zai-glm-5.2".to_string(), + "zai-glm-5-2".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, ModelInfo { id: "tencent/hy3-preview".to_string(), provider: ProviderKind::Openrouter, @@ -667,6 +691,103 @@ impl Default for ModelRegistry { supports_tools: true, supports_reasoning: true, }, + ModelInfo { + id: "step-3.7-flash".to_string(), + provider: ProviderKind::Stepfun, + aliases: vec!["stepfun".to_string(), "stepflash".to_string()], + supports_tools: true, + supports_reasoning: false, + }, + ModelInfo { + id: "MiniMax-M3".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax".to_string(), + "minimax-m3".to_string(), + "minimax-m-3".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.7".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.7".to_string(), + "minimax-m2-7".to_string(), + "minimax-m-2.7".to_string(), + "minimax-m-2-7".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.7-highspeed".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.7-highspeed".to_string(), + "minimax-m2-7-highspeed".to_string(), + "minimax-m-2.7-highspeed".to_string(), + "minimax-m-2-7-highspeed".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.5".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.5".to_string(), + "minimax-m2-5".to_string(), + "minimax-m-2.5".to_string(), + "minimax-m-2-5".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.5-highspeed".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.5-highspeed".to_string(), + "minimax-m2-5-highspeed".to_string(), + "minimax-m-2.5-highspeed".to_string(), + "minimax-m-2-5-highspeed".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.1".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.1".to_string(), + "minimax-m2-1".to_string(), + "minimax-m-2.1".to_string(), + "minimax-m-2-1".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2.1-highspeed".to_string(), + provider: ProviderKind::Minimax, + aliases: vec![ + "minimax-m2.1-highspeed".to_string(), + "minimax-m2-1-highspeed".to_string(), + "minimax-m-2.1-highspeed".to_string(), + "minimax-m-2-1-highspeed".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "MiniMax-M2".to_string(), + provider: ProviderKind::Minimax, + aliases: vec!["minimax-m2".to_string(), "minimax-m-2".to_string()], + supports_tools: true, + supports_reasoning: true, + }, // NVIDIA Nemotron 3 Ultra (OpenRouter) ModelInfo { id: "nvidia/nemotron-3-ultra-550b-a55b".to_string(), @@ -1249,6 +1370,76 @@ mod tests { assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Pro"); } + #[test] + fn zai_direct_models_resolve_when_provider_hinted() { + let registry = ModelRegistry::default(); + + let default = registry.resolve(None, Some(ProviderKind::Zai)); + assert_eq!(default.resolved.provider, ProviderKind::Zai); + assert_eq!(default.resolved.id, "GLM-5.1"); + + for (alias, expected) in [ + ("GLM-5.1", "GLM-5.1"), + ("glm-5-1", "GLM-5.1"), + ("GLM-5.2", "GLM-5.2"), + ("glm-5.2", "GLM-5.2"), + ("zai-glm-5-2", "GLM-5.2"), + ] { + let resolved = registry.resolve(Some(alias), Some(ProviderKind::Zai)); + + assert_eq!(resolved.resolved.provider, ProviderKind::Zai); + assert_eq!(resolved.resolved.id, expected); + assert!(!resolved.used_fallback); + assert!(resolved.resolved.supports_tools); + assert!(resolved.resolved.supports_reasoning); + } + } + + #[test] + fn first_party_recent_provider_models_are_listed() { + let registry = ModelRegistry::default(); + let models = registry.list(); + + for (provider, id) in [ + (ProviderKind::Zai, "GLM-5.2"), + (ProviderKind::Stepfun, "step-3.7-flash"), + (ProviderKind::Minimax, "MiniMax-M2.1"), + ] { + assert!( + models + .iter() + .any(|model| model.provider == provider && model.id == id), + "expected {provider:?} model {id} in registry" + ); + } + } + + #[test] + fn stepfun_and_minimax_direct_models_resolve_when_provider_hinted() { + let registry = ModelRegistry::default(); + + let stepfun = registry.resolve(None, Some(ProviderKind::Stepfun)); + assert_eq!(stepfun.resolved.provider, ProviderKind::Stepfun); + assert_eq!(stepfun.resolved.id, "step-3.7-flash"); + + for (alias, expected) in [ + ("minimax", "MiniMax-M3"), + ("minimax-m3", "MiniMax-M3"), + ("minimax-m2.7", "MiniMax-M2.7"), + ("minimax-m2-7-highspeed", "MiniMax-M2.7-highspeed"), + ("minimax-m2.1", "MiniMax-M2.1"), + ("minimax-m2", "MiniMax-M2"), + ] { + let resolved = registry.resolve(Some(alias), Some(ProviderKind::Minimax)); + + assert_eq!(resolved.resolved.provider, ProviderKind::Minimax); + assert_eq!(resolved.resolved.id, expected); + assert!(!resolved.used_fallback); + assert!(resolved.resolved.supports_tools); + assert!(resolved.resolved.supports_reasoning); + } + } + #[test] fn deepseek_v4_flash_alias_resolves_to_openrouter_when_provider_hinted() { let registry = ModelRegistry::default(); diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index a62f71ca..5a16d61c 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -620,7 +620,7 @@ fn run() -> Result<()> { Some(Commands::Auth(args)) => run_auth_command(&mut store, args.command), Some(Commands::McpServer) => run_mcp_server_command(&mut store), Some(Commands::Config(args)) => run_config_command(&mut store, args.command), - Some(Commands::Model(args)) => run_model_command(args.command), + Some(Commands::Model(args)) => run_model_command(args.command, runtime_overrides.provider), Some(Commands::Thread(args)) => run_thread_command(args.command), Some(Commands::Sandbox(args)) => run_sandbox_command(args.command), Some(Commands::AppServer(args)) => run_app_server_command(args), @@ -1467,11 +1467,23 @@ fn run_config_command(store: &mut ConfigStore, command: ConfigCommand) -> Result } } -fn run_model_command(command: ModelCommand) -> Result<()> { +fn model_command_provider_hint( + command_provider: Option, + top_level_provider: Option, +) -> Option { + command_provider + .map(ProviderKind::from) + .or(top_level_provider) +} + +fn run_model_command( + command: ModelCommand, + top_level_provider: Option, +) -> Result<()> { let registry = ModelRegistry::default(); match command { ModelCommand::List { provider } => { - let filter = provider.map(ProviderKind::from); + let filter = model_command_provider_hint(provider, top_level_provider); for model in registry.list().into_iter().filter(|m| match filter { Some(p) => m.provider == p, None => true, @@ -1481,7 +1493,8 @@ fn run_model_command(command: ModelCommand) -> Result<()> { Ok(()) } ModelCommand::Resolve { model, provider } => { - let resolved = registry.resolve(model.as_deref(), provider.map(ProviderKind::from)); + let provider = model_command_provider_hint(provider, top_level_provider); + let resolved = registry.resolve(model.as_deref(), provider); println!("requested: {}", resolved.requested.unwrap_or_default()); println!("resolved: {}", resolved.resolved.id); println!("provider: {}", resolved.resolved.provider.as_str()); @@ -2270,6 +2283,28 @@ mod tests { )); } + #[test] + fn model_command_provider_hint_uses_subcommand_then_top_level_provider() { + assert_eq!( + model_command_provider_hint(None, Some(ProviderKind::Zai)), + Some(ProviderKind::Zai) + ); + assert_eq!( + model_command_provider_hint(Some(ProviderArg::Minimax), Some(ProviderKind::Zai)), + Some(ProviderKind::Minimax) + ); + assert_eq!(model_command_provider_hint(None, None), None); + + let cli = parse_ok(&["codewhale", "--provider", "zai", "model", "list"]); + assert_eq!(cli.provider, Some(ProviderArg::Zai)); + assert!(matches!( + cli.command, + Some(Commands::Model(ModelArgs { + command: ModelCommand::List { provider: None } + })) + )); + } + #[test] fn parses_thread_command_matrix() { let cli = parse_ok(&["deepseek", "thread", "list", "--all", "--limit", "50"]); diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 808ea121..a5c0c35e 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -6097,6 +6097,7 @@ async fn run_interactive( .await } +#[derive(Debug)] struct CliAutoRoute { provider: crate::config::ApiProvider, model: String, @@ -6144,6 +6145,30 @@ async fn resolve_cli_auto_route( auto_model: true, }) } else { + if let Some(selection) = model_routing::resolve_explicit_route_with_inventory(config, model) + { + return Ok(CliAutoRoute { + provider: selection.provider, + model: selection.model, + reasoning_effort: selection.reasoning_effort, + auto_model: false, + }); + } + + let candidate_providers = model_routing::explicit_route_candidate_providers(config, model); + if !candidate_providers.is_empty() && !candidate_providers.contains(&config.api_provider()) + { + let providers = candidate_providers + .iter() + .map(|provider| provider.as_str()) + .collect::>() + .join(", "); + bail!( + "model `{model}` is available from configured provider route(s): {providers}. \ + Pass `--provider ` with `--model {model}` to choose one explicitly." + ); + } + // When --model is not `auto`, fall back to the reasoning_effort // declared in the user's config.toml. The previous hard-coded `None` // silently dropped the user's setting on every non-auto-route exec @@ -7303,6 +7328,48 @@ mod terminal_mode_tests { ); } + #[tokio::test] + async fn explicit_exec_model_routes_to_unique_authenticated_provider_candidate() { + let _env_lock = crate::test_support::lock_test_env(); + let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key"); + let _openrouter = crate::test_support::EnvVarGuard::remove("OPENROUTER_API_KEY"); + let config = Config { + provider: Some("deepseek".to_string()), + default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()), + ..Default::default() + }; + + let route = resolve_cli_auto_route(&config, crate::config::ZAI_GLM_5_2_MODEL, "pong") + .await + .expect("explicit GLM should route to the configured Z.ai provider"); + + assert_eq!(route.provider, crate::config::ApiProvider::Zai); + assert_eq!(route.model, crate::config::ZAI_GLM_5_2_MODEL); + assert!(!route.auto_model); + } + + #[tokio::test] + async fn explicit_exec_model_reports_ambiguous_authenticated_provider_candidates() { + let _env_lock = crate::test_support::lock_test_env(); + let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key"); + let _openrouter = crate::test_support::EnvVarGuard::set("OPENROUTER_API_KEY", "or-key"); + let config = Config { + provider: Some("deepseek".to_string()), + default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()), + ..Default::default() + }; + + let err = resolve_cli_auto_route(&config, crate::config::ZAI_GLM_5_2_MODEL, "pong") + .await + .expect_err("ambiguous GLM route should ask for an explicit provider"); + let message = err.to_string(); + + assert!(message.contains("model `GLM-5.2` is available")); + assert!(message.contains("openrouter")); + assert!(message.contains("zai")); + assert!(message.contains("--provider")); + } + #[test] fn cli_route_execution_config_stamps_routed_model_into_provider_slot() { let mut providers = crate::config::ProvidersConfig::default(); diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index 56408c0c..a1e4fef1 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -8,7 +8,7 @@ use std::time::Duration; use anyhow::{Result, bail}; use crate::client::DeepSeekClient; -use crate::config::{ApiProvider, Config}; +use crate::config::{ApiProvider, Config, normalize_model_name_for_provider}; use crate::llm_client::LlmClient; use crate::model_inventory::ModelInventory; use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt}; @@ -453,6 +453,79 @@ pub(crate) async fn resolve_auto_route_with_inventory( } } +pub(crate) fn resolve_explicit_route_with_inventory( + config: &Config, + requested_model: &str, +) -> Option { + let requested_model = requested_model.trim(); + if requested_model.is_empty() || requested_model.eq_ignore_ascii_case("auto") { + return None; + } + + let inventory = ModelInventory::from_config(config); + let active_provider = config.api_provider(); + + if let Some(candidate) = inventory.candidates.iter().find(|candidate| { + candidate.provider == active_provider + && explicit_model_matches_candidate(candidate, requested_model) + }) { + return Some(AutoRouteSelection { + provider: candidate.provider, + model: candidate.model.clone(), + reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting), + source: AutoRouteSource::Heuristic, + }); + } + + let mut matches = inventory + .candidates + .iter() + .filter(|candidate| explicit_model_matches_candidate(candidate, requested_model)); + let candidate = matches.next()?; + if matches.next().is_some() { + return None; + } + + Some(AutoRouteSelection { + provider: candidate.provider, + model: candidate.model.clone(), + reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting), + source: AutoRouteSource::Heuristic, + }) +} + +pub(crate) fn explicit_route_candidate_providers( + config: &Config, + requested_model: &str, +) -> Vec { + let requested_model = requested_model.trim(); + if requested_model.is_empty() || requested_model.eq_ignore_ascii_case("auto") { + return Vec::new(); + } + + let inventory = ModelInventory::from_config(config); + let mut providers = Vec::new(); + for candidate in inventory + .candidates + .iter() + .filter(|candidate| explicit_model_matches_candidate(candidate, requested_model)) + { + if !providers.contains(&candidate.provider) { + providers.push(candidate.provider); + } + } + providers +} + +fn explicit_model_matches_candidate( + candidate: &crate::model_inventory::ModelRouteCandidate, + requested_model: &str, +) -> bool { + candidate.model.eq_ignore_ascii_case(requested_model) + || normalize_model_name_for_provider(candidate.provider, requested_model) + .is_some_and(|model| candidate.model.eq_ignore_ascii_case(&model)) +} + fn auto_route_from_inventory_heuristic( config: &Config, latest_request: &str,