fix(model): align explicit provider routing
Fixes #3202. Add first-party Z.ai, StepFun, and MiniMax rows to the shared model registry so provider-filtered list/resolve does not fall through to OpenRouter or DeepSeek defaults. Teach the dispatcher model helper to honor top-level --provider as a fallback when the subcommand does not provide one, matching user-facing command intent. Route explicit exec models through authenticated inventory when the configured route is unique, and fail early with a provider hint when the model alias is available from multiple configured providers. Verification: cargo fmt --all --check; git diff --check; cargo test -p codewhale-agent -p codewhale-cli --locked; cargo test -p codewhale-tui explicit_exec_model --locked; cargo build -p codewhale-cli -p codewhale-tui --locked; debug model list/resolve smoke; debug Z.ai GLM-5.2 PONG smoke. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -328,6 +328,30 @@ impl Default for ModelRegistry {
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "GLM-5.1".to_string(),
|
||||
provider: ProviderKind::Zai,
|
||||
aliases: vec![
|
||||
"glm-5.1".to_string(),
|
||||
"glm-5-1".to_string(),
|
||||
"zai-glm-5.1".to_string(),
|
||||
"zai-glm-5-1".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "GLM-5.2".to_string(),
|
||||
provider: ProviderKind::Zai,
|
||||
aliases: vec![
|
||||
"glm-5.2".to_string(),
|
||||
"glm-5-2".to_string(),
|
||||
"zai-glm-5.2".to_string(),
|
||||
"zai-glm-5-2".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "tencent/hy3-preview".to_string(),
|
||||
provider: ProviderKind::Openrouter,
|
||||
@@ -667,6 +691,103 @@ impl Default for ModelRegistry {
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "step-3.7-flash".to_string(),
|
||||
provider: ProviderKind::Stepfun,
|
||||
aliases: vec!["stepfun".to_string(), "stepflash".to_string()],
|
||||
supports_tools: true,
|
||||
supports_reasoning: false,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M3".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax".to_string(),
|
||||
"minimax-m3".to_string(),
|
||||
"minimax-m-3".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.7".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.7".to_string(),
|
||||
"minimax-m2-7".to_string(),
|
||||
"minimax-m-2.7".to_string(),
|
||||
"minimax-m-2-7".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.7-highspeed".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.7-highspeed".to_string(),
|
||||
"minimax-m2-7-highspeed".to_string(),
|
||||
"minimax-m-2.7-highspeed".to_string(),
|
||||
"minimax-m-2-7-highspeed".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.5".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.5".to_string(),
|
||||
"minimax-m2-5".to_string(),
|
||||
"minimax-m-2.5".to_string(),
|
||||
"minimax-m-2-5".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.5-highspeed".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.5-highspeed".to_string(),
|
||||
"minimax-m2-5-highspeed".to_string(),
|
||||
"minimax-m-2.5-highspeed".to_string(),
|
||||
"minimax-m-2-5-highspeed".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.1".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.1".to_string(),
|
||||
"minimax-m2-1".to_string(),
|
||||
"minimax-m-2.1".to_string(),
|
||||
"minimax-m-2-1".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2.1-highspeed".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec![
|
||||
"minimax-m2.1-highspeed".to_string(),
|
||||
"minimax-m2-1-highspeed".to_string(),
|
||||
"minimax-m-2.1-highspeed".to_string(),
|
||||
"minimax-m-2-1-highspeed".to_string(),
|
||||
],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "MiniMax-M2".to_string(),
|
||||
provider: ProviderKind::Minimax,
|
||||
aliases: vec!["minimax-m2".to_string(), "minimax-m-2".to_string()],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
// NVIDIA Nemotron 3 Ultra (OpenRouter)
|
||||
ModelInfo {
|
||||
id: "nvidia/nemotron-3-ultra-550b-a55b".to_string(),
|
||||
@@ -1249,6 +1370,76 @@ mod tests {
|
||||
assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Pro");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zai_direct_models_resolve_when_provider_hinted() {
|
||||
let registry = ModelRegistry::default();
|
||||
|
||||
let default = registry.resolve(None, Some(ProviderKind::Zai));
|
||||
assert_eq!(default.resolved.provider, ProviderKind::Zai);
|
||||
assert_eq!(default.resolved.id, "GLM-5.1");
|
||||
|
||||
for (alias, expected) in [
|
||||
("GLM-5.1", "GLM-5.1"),
|
||||
("glm-5-1", "GLM-5.1"),
|
||||
("GLM-5.2", "GLM-5.2"),
|
||||
("glm-5.2", "GLM-5.2"),
|
||||
("zai-glm-5-2", "GLM-5.2"),
|
||||
] {
|
||||
let resolved = registry.resolve(Some(alias), Some(ProviderKind::Zai));
|
||||
|
||||
assert_eq!(resolved.resolved.provider, ProviderKind::Zai);
|
||||
assert_eq!(resolved.resolved.id, expected);
|
||||
assert!(!resolved.used_fallback);
|
||||
assert!(resolved.resolved.supports_tools);
|
||||
assert!(resolved.resolved.supports_reasoning);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_party_recent_provider_models_are_listed() {
|
||||
let registry = ModelRegistry::default();
|
||||
let models = registry.list();
|
||||
|
||||
for (provider, id) in [
|
||||
(ProviderKind::Zai, "GLM-5.2"),
|
||||
(ProviderKind::Stepfun, "step-3.7-flash"),
|
||||
(ProviderKind::Minimax, "MiniMax-M2.1"),
|
||||
] {
|
||||
assert!(
|
||||
models
|
||||
.iter()
|
||||
.any(|model| model.provider == provider && model.id == id),
|
||||
"expected {provider:?} model {id} in registry"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stepfun_and_minimax_direct_models_resolve_when_provider_hinted() {
|
||||
let registry = ModelRegistry::default();
|
||||
|
||||
let stepfun = registry.resolve(None, Some(ProviderKind::Stepfun));
|
||||
assert_eq!(stepfun.resolved.provider, ProviderKind::Stepfun);
|
||||
assert_eq!(stepfun.resolved.id, "step-3.7-flash");
|
||||
|
||||
for (alias, expected) in [
|
||||
("minimax", "MiniMax-M3"),
|
||||
("minimax-m3", "MiniMax-M3"),
|
||||
("minimax-m2.7", "MiniMax-M2.7"),
|
||||
("minimax-m2-7-highspeed", "MiniMax-M2.7-highspeed"),
|
||||
("minimax-m2.1", "MiniMax-M2.1"),
|
||||
("minimax-m2", "MiniMax-M2"),
|
||||
] {
|
||||
let resolved = registry.resolve(Some(alias), Some(ProviderKind::Minimax));
|
||||
|
||||
assert_eq!(resolved.resolved.provider, ProviderKind::Minimax);
|
||||
assert_eq!(resolved.resolved.id, expected);
|
||||
assert!(!resolved.used_fallback);
|
||||
assert!(resolved.resolved.supports_tools);
|
||||
assert!(resolved.resolved.supports_reasoning);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_v4_flash_alias_resolves_to_openrouter_when_provider_hinted() {
|
||||
let registry = ModelRegistry::default();
|
||||
|
||||
+39
-4
@@ -620,7 +620,7 @@ fn run() -> Result<()> {
|
||||
Some(Commands::Auth(args)) => run_auth_command(&mut store, args.command),
|
||||
Some(Commands::McpServer) => run_mcp_server_command(&mut store),
|
||||
Some(Commands::Config(args)) => run_config_command(&mut store, args.command),
|
||||
Some(Commands::Model(args)) => run_model_command(args.command),
|
||||
Some(Commands::Model(args)) => run_model_command(args.command, runtime_overrides.provider),
|
||||
Some(Commands::Thread(args)) => run_thread_command(args.command),
|
||||
Some(Commands::Sandbox(args)) => run_sandbox_command(args.command),
|
||||
Some(Commands::AppServer(args)) => run_app_server_command(args),
|
||||
@@ -1467,11 +1467,23 @@ fn run_config_command(store: &mut ConfigStore, command: ConfigCommand) -> Result
|
||||
}
|
||||
}
|
||||
|
||||
fn run_model_command(command: ModelCommand) -> Result<()> {
|
||||
fn model_command_provider_hint(
|
||||
command_provider: Option<ProviderArg>,
|
||||
top_level_provider: Option<ProviderKind>,
|
||||
) -> Option<ProviderKind> {
|
||||
command_provider
|
||||
.map(ProviderKind::from)
|
||||
.or(top_level_provider)
|
||||
}
|
||||
|
||||
fn run_model_command(
|
||||
command: ModelCommand,
|
||||
top_level_provider: Option<ProviderKind>,
|
||||
) -> Result<()> {
|
||||
let registry = ModelRegistry::default();
|
||||
match command {
|
||||
ModelCommand::List { provider } => {
|
||||
let filter = provider.map(ProviderKind::from);
|
||||
let filter = model_command_provider_hint(provider, top_level_provider);
|
||||
for model in registry.list().into_iter().filter(|m| match filter {
|
||||
Some(p) => m.provider == p,
|
||||
None => true,
|
||||
@@ -1481,7 +1493,8 @@ fn run_model_command(command: ModelCommand) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
ModelCommand::Resolve { model, provider } => {
|
||||
let resolved = registry.resolve(model.as_deref(), provider.map(ProviderKind::from));
|
||||
let provider = model_command_provider_hint(provider, top_level_provider);
|
||||
let resolved = registry.resolve(model.as_deref(), provider);
|
||||
println!("requested: {}", resolved.requested.unwrap_or_default());
|
||||
println!("resolved: {}", resolved.resolved.id);
|
||||
println!("provider: {}", resolved.resolved.provider.as_str());
|
||||
@@ -2270,6 +2283,28 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_command_provider_hint_uses_subcommand_then_top_level_provider() {
|
||||
assert_eq!(
|
||||
model_command_provider_hint(None, Some(ProviderKind::Zai)),
|
||||
Some(ProviderKind::Zai)
|
||||
);
|
||||
assert_eq!(
|
||||
model_command_provider_hint(Some(ProviderArg::Minimax), Some(ProviderKind::Zai)),
|
||||
Some(ProviderKind::Minimax)
|
||||
);
|
||||
assert_eq!(model_command_provider_hint(None, None), None);
|
||||
|
||||
let cli = parse_ok(&["codewhale", "--provider", "zai", "model", "list"]);
|
||||
assert_eq!(cli.provider, Some(ProviderArg::Zai));
|
||||
assert!(matches!(
|
||||
cli.command,
|
||||
Some(Commands::Model(ModelArgs {
|
||||
command: ModelCommand::List { provider: None }
|
||||
}))
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_thread_command_matrix() {
|
||||
let cli = parse_ok(&["deepseek", "thread", "list", "--all", "--limit", "50"]);
|
||||
|
||||
@@ -6097,6 +6097,7 @@ async fn run_interactive(
|
||||
.await
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct CliAutoRoute {
|
||||
provider: crate::config::ApiProvider,
|
||||
model: String,
|
||||
@@ -6144,6 +6145,30 @@ async fn resolve_cli_auto_route(
|
||||
auto_model: true,
|
||||
})
|
||||
} else {
|
||||
if let Some(selection) = model_routing::resolve_explicit_route_with_inventory(config, model)
|
||||
{
|
||||
return Ok(CliAutoRoute {
|
||||
provider: selection.provider,
|
||||
model: selection.model,
|
||||
reasoning_effort: selection.reasoning_effort,
|
||||
auto_model: false,
|
||||
});
|
||||
}
|
||||
|
||||
let candidate_providers = model_routing::explicit_route_candidate_providers(config, model);
|
||||
if !candidate_providers.is_empty() && !candidate_providers.contains(&config.api_provider())
|
||||
{
|
||||
let providers = candidate_providers
|
||||
.iter()
|
||||
.map(|provider| provider.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ");
|
||||
bail!(
|
||||
"model `{model}` is available from configured provider route(s): {providers}. \
|
||||
Pass `--provider <provider>` with `--model {model}` to choose one explicitly."
|
||||
);
|
||||
}
|
||||
|
||||
// When --model is not `auto`, fall back to the reasoning_effort
|
||||
// declared in the user's config.toml. The previous hard-coded `None`
|
||||
// silently dropped the user's setting on every non-auto-route exec
|
||||
@@ -7303,6 +7328,48 @@ mod terminal_mode_tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn explicit_exec_model_routes_to_unique_authenticated_provider_candidate() {
|
||||
let _env_lock = crate::test_support::lock_test_env();
|
||||
let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key");
|
||||
let _openrouter = crate::test_support::EnvVarGuard::remove("OPENROUTER_API_KEY");
|
||||
let config = Config {
|
||||
provider: Some("deepseek".to_string()),
|
||||
default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let route = resolve_cli_auto_route(&config, crate::config::ZAI_GLM_5_2_MODEL, "pong")
|
||||
.await
|
||||
.expect("explicit GLM should route to the configured Z.ai provider");
|
||||
|
||||
assert_eq!(route.provider, crate::config::ApiProvider::Zai);
|
||||
assert_eq!(route.model, crate::config::ZAI_GLM_5_2_MODEL);
|
||||
assert!(!route.auto_model);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn explicit_exec_model_reports_ambiguous_authenticated_provider_candidates() {
|
||||
let _env_lock = crate::test_support::lock_test_env();
|
||||
let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key");
|
||||
let _openrouter = crate::test_support::EnvVarGuard::set("OPENROUTER_API_KEY", "or-key");
|
||||
let config = Config {
|
||||
provider: Some("deepseek".to_string()),
|
||||
default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let err = resolve_cli_auto_route(&config, crate::config::ZAI_GLM_5_2_MODEL, "pong")
|
||||
.await
|
||||
.expect_err("ambiguous GLM route should ask for an explicit provider");
|
||||
let message = err.to_string();
|
||||
|
||||
assert!(message.contains("model `GLM-5.2` is available"));
|
||||
assert!(message.contains("openrouter"));
|
||||
assert!(message.contains("zai"));
|
||||
assert!(message.contains("--provider"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_route_execution_config_stamps_routed_model_into_provider_slot() {
|
||||
let mut providers = crate::config::ProvidersConfig::default();
|
||||
|
||||
@@ -8,7 +8,7 @@ use std::time::Duration;
|
||||
use anyhow::{Result, bail};
|
||||
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::config::{ApiProvider, Config};
|
||||
use crate::config::{ApiProvider, Config, normalize_model_name_for_provider};
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::model_inventory::ModelInventory;
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
|
||||
@@ -453,6 +453,79 @@ pub(crate) async fn resolve_auto_route_with_inventory(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn resolve_explicit_route_with_inventory(
|
||||
config: &Config,
|
||||
requested_model: &str,
|
||||
) -> Option<AutoRouteSelection> {
|
||||
let requested_model = requested_model.trim();
|
||||
if requested_model.is_empty() || requested_model.eq_ignore_ascii_case("auto") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let inventory = ModelInventory::from_config(config);
|
||||
let active_provider = config.api_provider();
|
||||
|
||||
if let Some(candidate) = inventory.candidates.iter().find(|candidate| {
|
||||
candidate.provider == active_provider
|
||||
&& explicit_model_matches_candidate(candidate, requested_model)
|
||||
}) {
|
||||
return Some(AutoRouteSelection {
|
||||
provider: candidate.provider,
|
||||
model: candidate.model.clone(),
|
||||
reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting),
|
||||
source: AutoRouteSource::Heuristic,
|
||||
});
|
||||
}
|
||||
|
||||
let mut matches = inventory
|
||||
.candidates
|
||||
.iter()
|
||||
.filter(|candidate| explicit_model_matches_candidate(candidate, requested_model));
|
||||
let candidate = matches.next()?;
|
||||
if matches.next().is_some() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(AutoRouteSelection {
|
||||
provider: candidate.provider,
|
||||
model: candidate.model.clone(),
|
||||
reasoning_effort: config.reasoning_effort().map(ReasoningEffort::from_setting),
|
||||
source: AutoRouteSource::Heuristic,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn explicit_route_candidate_providers(
|
||||
config: &Config,
|
||||
requested_model: &str,
|
||||
) -> Vec<ApiProvider> {
|
||||
let requested_model = requested_model.trim();
|
||||
if requested_model.is_empty() || requested_model.eq_ignore_ascii_case("auto") {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let inventory = ModelInventory::from_config(config);
|
||||
let mut providers = Vec::new();
|
||||
for candidate in inventory
|
||||
.candidates
|
||||
.iter()
|
||||
.filter(|candidate| explicit_model_matches_candidate(candidate, requested_model))
|
||||
{
|
||||
if !providers.contains(&candidate.provider) {
|
||||
providers.push(candidate.provider);
|
||||
}
|
||||
}
|
||||
providers
|
||||
}
|
||||
|
||||
fn explicit_model_matches_candidate(
|
||||
candidate: &crate::model_inventory::ModelRouteCandidate,
|
||||
requested_model: &str,
|
||||
) -> bool {
|
||||
candidate.model.eq_ignore_ascii_case(requested_model)
|
||||
|| normalize_model_name_for_provider(candidate.provider, requested_model)
|
||||
.is_some_and(|model| candidate.model.eq_ignore_ascii_case(&model))
|
||||
}
|
||||
|
||||
fn auto_route_from_inventory_heuristic(
|
||||
config: &Config,
|
||||
latest_request: &str,
|
||||
|
||||
Reference in New Issue
Block a user