diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index d8f17de4..a64b2ebd 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -575,6 +575,19 @@ pub(crate) fn normalize_custom_model_id(model: &str) -> Option { } } +/// Validate a user-requested model id against the active provider (#3018). +/// +/// DeepSeek providers use the strict `normalize_model_name` gate (official +/// API only accepts DeepSeek IDs). All other providers pass any non-empty, +/// non-control-character string through — the provider API is the authority. +#[must_use] +pub fn requested_model_for_provider(provider: ApiProvider, model: &str) -> Option { + match provider { + ApiProvider::Deepseek | ApiProvider::DeepseekCN => normalize_model_name(model), + _ => normalize_custom_model_id(model), + } +} + fn canonical_official_deepseek_model_id(model: &str) -> Option<&'static str> { match model.trim().to_ascii_lowercase().as_str() { "deepseek-v4-pro" @@ -7891,6 +7904,25 @@ api_key = "old-openrouter-key" assert_eq!(config.default_model(), DEFAULT_TEXT_MODEL); } + #[test] + fn requested_model_for_provider_is_permissive_off_deepseek() { + // #3018: the provider API is the authority for non-DeepSeek routes. + assert_eq!( + requested_model_for_provider(ApiProvider::Moonshot, "kimi-k2.5").as_deref(), + Some("kimi-k2.5") + ); + assert_eq!( + requested_model_for_provider(ApiProvider::Ollama, "qwen3:32b").as_deref(), + Some("qwen3:32b") + ); + // The official DeepSeek API stays strict. + assert!(requested_model_for_provider(ApiProvider::Deepseek, "kimi-k2.5").is_none()); + assert_eq!( + requested_model_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro").as_deref(), + Some("deepseek-v4-pro") + ); + } + #[test] fn wire_model_for_provider_matches_active_provider_shape() { assert_eq!( diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index 9e9b483e..fc32e3e5 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -13,16 +13,95 @@ use crate::llm_client::LlmClient; use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt}; use crate::tui::app::ReasoningEffort; -/// Auto-select a model based on request complexity. +/// Big/cheap model pair the auto-router may choose between for the active +/// provider (#3018). /// -/// Short messages (<100 chars) go to Flash. Long messages and requests with -/// complex keywords go to Pro. The fallback is Flash. -pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String { - auto_model_heuristic_with_bias(input, current_model, false) +/// `cheap == None` means the provider has no known cheap tier: heuristics +/// stay on the current model (only thinking effort varies) and the network +/// router is skipped entirely (#1549). +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct RouterCandidates { + pub(crate) big: String, + pub(crate) cheap: Option, } +impl RouterCandidates { + pub(crate) fn deepseek() -> Self { + Self { + big: "deepseek-v4-pro".to_string(), + cheap: Some("deepseek-v4-flash".to_string()), + } + } + + /// The cheap-tier id, falling back to `big` when no cheap tier exists. + pub(crate) fn cheap_or_big(&self) -> &str { + self.cheap.as_deref().unwrap_or(&self.big) + } +} + +/// Derive the auto-router's candidate pair for the active provider (#3018). +/// +/// DeepSeek providers route between the canonical pro/flash pair. Hosted +/// routes with known wire ids for that pair (NVIDIA NIM, OpenRouter, Novita, +/// SiliconFlow, SGLang, vLLM) use their provider-prefixed spellings. Every +/// other provider has no known cheap tier: `big` is the session model and +/// `cheap` is `None`, so auto mode never fabricates a DeepSeek id for a +/// provider that cannot serve it. +pub(crate) fn provider_router_candidates( + provider: crate::config::ApiProvider, + current_model: &str, +) -> RouterCandidates { + use crate::config::ApiProvider; + match provider { + ApiProvider::Deepseek | ApiProvider::DeepseekCN => RouterCandidates::deepseek(), + ApiProvider::NvidiaNim + | ApiProvider::Openrouter + | ApiProvider::Novita + | ApiProvider::Siliconflow + | ApiProvider::SiliconflowCn + | ApiProvider::Sglang + | ApiProvider::Vllm => RouterCandidates { + big: crate::config::wire_model_for_provider(provider, "deepseek-v4-pro"), + cheap: Some(crate::config::wire_model_for_provider( + provider, + "deepseek-v4-flash", + )), + }, + _ => RouterCandidates { + big: current_model.to_string(), + cheap: None, + }, + } +} + +/// Auto-select a model based on request complexity. +/// +/// Short messages (<100 chars) go to the cheap tier. Long messages and +/// requests with complex keywords go to the big tier. The fallback is cheap. +/// This DeepSeek-candidate wrapper keeps legacy callers and tests intact; +/// provider-aware callers use [`auto_model_heuristic_for_candidates`]. +pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String { + auto_model_heuristic_for_candidates(input, current_model, &RouterCandidates::deepseek()) +} + +/// Candidate-aware variant of [`auto_model_heuristic`] (#3018). +pub(crate) fn auto_model_heuristic_for_candidates( + input: &str, + current_model: &str, + candidates: &RouterCandidates, +) -> String { + auto_model_heuristic_selection_with_bias(input, current_model, false, candidates).model +} + +#[cfg(test)] fn auto_model_heuristic_with_bias(input: &str, current_model: &str, cost_saving: bool) -> String { - auto_model_heuristic_selection_with_bias(input, current_model, cost_saving).model + auto_model_heuristic_selection_with_bias( + input, + current_model, + cost_saving, + &RouterCandidates::deepseek(), + ) + .model } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -41,6 +120,7 @@ fn auto_model_heuristic_selection_with_bias( input: &str, _current_model: &str, cost_saving: bool, + candidates: &RouterCandidates, ) -> AutoModelHeuristicSelection { let len = input.chars().count(); let lower = input.to_lowercase(); @@ -58,26 +138,26 @@ fn auto_model_heuristic_selection_with_bias( let pro_match = strong_match || (!cost_saving && borderline_match); if pro_match { return AutoModelHeuristicSelection { - model: "deepseek-v4-pro".to_string(), + model: candidates.big.clone(), confidence: AutoModelHeuristicConfidence::Decisive, }; } if len < 100 { return AutoModelHeuristicSelection { - model: "deepseek-v4-flash".to_string(), + model: candidates.cheap_or_big().to_string(), confidence: AutoModelHeuristicConfidence::Decisive, }; } let long_threshold = if cost_saving { 1_000 } else { 500 }; if len > long_threshold { return AutoModelHeuristicSelection { - model: "deepseek-v4-pro".to_string(), + model: candidates.big.clone(), confidence: AutoModelHeuristicConfidence::Decisive, }; } AutoModelHeuristicSelection { - model: "deepseek-v4-flash".to_string(), + model: candidates.cheap_or_big().to_string(), confidence: AutoModelHeuristicConfidence::Ambiguous, } } @@ -148,26 +228,50 @@ pub(crate) struct AutoRouteSelection { pub(crate) source: AutoRouteSource, } -const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\ -You are the codewhale auto-routing classifier. Return only compact JSON: \ -{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \ -Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \ -Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \ +/// Render the auto-router system prompt with the actual candidate ids +/// (#3018): the classifier must answer with ids the active provider can +/// serve, not hardcoded DeepSeek spellings. +pub(crate) fn auto_router_system_prompt( + candidates: &RouterCandidates, + cost_saving: bool, +) -> String { + let cheap = candidates.cheap_or_big(); + let big = &candidates.big; + let mut prompt = format!( + "You are the codewhale auto-routing classifier. Return only compact JSON: \ +{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \ +Use {cheap} for trivial, conversational, status, or single-step work. \ +Use {big} for coding, debugging, release work, multi-step tasks, high-risk decisions, \ tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \ Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \ -agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work."; - -const AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM: &str = "\ -\n\nCost-saving mode is ON. Prefer deepseek-v4-flash for any request that is \ +agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work." + ); + if cost_saving { + prompt.push_str(&format!( + "\n\nCost-saving mode is ON. Prefer {cheap} for any request that is \ not unmistakably agentic, multi-step, architecture/design, security review, \ -debugging, or otherwise clearly out of Flash's capability. Resolve ambiguous \ -cases in favour of deepseek-v4-flash, not deepseek-v4-pro."; +debugging, or otherwise clearly out of the cheap tier's capability. Resolve \ +ambiguous cases in favour of {cheap}, not {big}." + )); + } + prompt +} +/// DeepSeek-candidate wrapper kept for the legacy parser tests; the +/// network router parses with [`parse_auto_route_recommendation_for_candidates`]. +#[cfg(test)] pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option { + parse_auto_route_recommendation_for_candidates(raw, &RouterCandidates::deepseek()) +} + +pub(crate) fn parse_auto_route_recommendation_for_candidates( + raw: &str, + candidates: &RouterCandidates, +) -> Option { let json = extract_first_json_object(raw)?; let value: serde_json::Value = serde_json::from_str(json).ok()?; let model = value.get("model").and_then(serde_json::Value::as_str)?; - let model = normalize_auto_route_model(model)?; + let model = normalize_auto_route_model(model, candidates)?; let reasoning_effort = value .get("thinking") .or_else(|| value.get("reasoning_effort")) @@ -176,7 +280,7 @@ pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option Option<&str> { (end >= start).then_some(&raw[start..=end]) } -fn normalize_auto_route_model(model: &str) -> Option<&'static str> { - match model.trim().to_ascii_lowercase().as_str() { - "deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"), - "deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"), +fn normalize_auto_route_model(model: &str, candidates: &RouterCandidates) -> Option { + let normalized = model.trim().to_ascii_lowercase(); + // Exact candidate ids, case-insensitively (#3018). + if normalized == candidates.big.to_ascii_lowercase() { + return Some(candidates.big.clone()); + } + if let Some(cheap) = candidates.cheap.as_deref() + && normalized == cheap.to_ascii_lowercase() + { + return Some(cheap.to_string()); + } + // Legacy pro/flash shorthand maps onto the big/cheap tiers. + match normalized.as_str() { + "deepseek-v4-pro" | "v4-pro" | "pro" => Some(candidates.big.clone()), + "deepseek-v4-flash" | "v4-flash" | "flash" => Some(candidates.cheap_or_big().to_string()), _ => None, } } @@ -221,14 +336,29 @@ pub(crate) async fn resolve_auto_route_with_flash( selected_thinking_mode: &str, ) -> AutoRouteSelection { let cost_saving = config.auto_cost_saving(); - let heuristic = - auto_model_heuristic_selection_with_bias(latest_request, selected_model_mode, cost_saving); + // #3018: derive the candidate pair from the active provider. The + // config-resolved default model stands in for the session model — with + // auto mode on, that is the canonical id the provider serves. + let candidates = provider_router_candidates(config.api_provider(), &config.default_model()); + let heuristic = auto_model_heuristic_selection_with_bias( + latest_request, + selected_model_mode, + cost_saving, + &candidates, + ); if heuristic.confidence == AutoModelHeuristicConfidence::Decisive { return auto_route_from_heuristic(latest_request, heuristic); } + // #1549/#3018: no cheap tier → no network round-trip. The heuristic is + // the only signal and the routed model stays on the session model. + if candidates.cheap.is_none() { + return auto_route_from_heuristic(latest_request, heuristic); + } + match auto_route_flash_recommendation( config, + &candidates, latest_request, recent_context, selected_model_mode, @@ -261,6 +391,7 @@ fn auto_route_from_heuristic( async fn auto_route_flash_recommendation( config: &Config, + candidates: &RouterCandidates, latest_request: &str, recent_context: &str, selected_model_mode: &str, @@ -269,14 +400,16 @@ async fn auto_route_flash_recommendation( if cfg!(test) { return Ok(None); } + let Some(cheap_model) = candidates.cheap.clone() else { + // Callers skip the router when there is no cheap tier; this is a + // defensive second gate so a future caller cannot 400 the provider. + return Ok(None); + }; let client = DeepSeekClient::new(config)?; - let mut router_system = AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(); - if config.auto_cost_saving() { - router_system.push_str(AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM); - } + let router_system = auto_router_system_prompt(candidates, config.auto_cost_saving()); let request = MessageRequest { - model: "deepseek-v4-flash".to_string(), + model: cheap_model, messages: vec![Message { role: "user".to_string(), content: vec![ContentBlock::Text { @@ -303,9 +436,10 @@ async fn auto_route_flash_recommendation( let response = tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??; - Ok(parse_auto_route_recommendation(&message_response_text( - &response, - ))) + Ok(parse_auto_route_recommendation_for_candidates( + &message_response_text(&response), + candidates, + )) } fn auto_route_prompt( @@ -416,7 +550,12 @@ mod tests { #[test] fn auto_heuristic_selection_marks_short_and_complex_routes_decisive() { - let short = auto_model_heuristic_selection_with_bias("yes", "auto", false); + let short = auto_model_heuristic_selection_with_bias( + "yes", + "auto", + false, + &RouterCandidates::deepseek(), + ); assert_eq!(short.model, "deepseek-v4-flash"); assert_eq!( short.confidence, @@ -428,6 +567,7 @@ mod tests { "Please review the auth migration", "auto", false, + &RouterCandidates::deepseek(), ); assert_eq!(complex.model, "deepseek-v4-pro"); assert_eq!( @@ -446,7 +586,12 @@ mod tests { "test request must stay in the default grey zone" ); - let selection = auto_model_heuristic_selection_with_bias(&request, "auto", false); + let selection = auto_model_heuristic_selection_with_bias( + &request, + "auto", + false, + &RouterCandidates::deepseek(), + ); assert_eq!(selection.model, "deepseek-v4-flash"); assert_eq!( selection.confidence, @@ -550,6 +695,91 @@ mod tests { ); } + #[test] + fn provider_router_candidates_cover_known_provider_classes() { + use crate::config::ApiProvider; + + let deepseek = provider_router_candidates(ApiProvider::Deepseek, "deepseek-v4-pro"); + assert_eq!(deepseek.big, "deepseek-v4-pro"); + assert_eq!(deepseek.cheap.as_deref(), Some("deepseek-v4-flash")); + + let openrouter = + provider_router_candidates(ApiProvider::Openrouter, "deepseek/deepseek-v4-pro"); + assert_eq!(openrouter.big, "deepseek/deepseek-v4-pro"); + assert_eq!( + openrouter.cheap.as_deref(), + Some("deepseek/deepseek-v4-flash") + ); + + // Providers without a known cheap tier: big = session model, no cheap. + let ollama = provider_router_candidates(ApiProvider::Ollama, "qwen3:32b"); + assert_eq!(ollama.big, "qwen3:32b"); + assert_eq!(ollama.cheap, None); + + let moonshot = provider_router_candidates(ApiProvider::Moonshot, "kimi-k2.6"); + assert_eq!(moonshot.big, "kimi-k2.6"); + assert_eq!(moonshot.cheap, None); + } + + #[test] + fn heuristic_without_cheap_tier_always_returns_current_model() { + // #3018 AC: Ollama + auto must never fabricate a DeepSeek id. + let candidates = RouterCandidates { + big: "qwen3:32b".to_string(), + cheap: None, + }; + for prompt in [ + "hi", + "please refactor the auth module for security", + &"long filler sentence. ".repeat(60), + ] { + let model = auto_model_heuristic_for_candidates(prompt, "qwen3:32b", &candidates); + assert_eq!(model, "qwen3:32b", "prompt {prompt:?}"); + } + } + + #[test] + fn auto_route_parser_accepts_candidate_ids_and_legacy_shorthand() { + let candidates = RouterCandidates { + big: "deepseek/deepseek-v4-pro".to_string(), + cheap: Some("deepseek/deepseek-v4-flash".to_string()), + }; + + let rec = parse_auto_route_recommendation_for_candidates( + r#"{"model":"DeepSeek/DeepSeek-V4-Pro","thinking":"max"}"#, + &candidates, + ) + .expect("exact candidate id parses case-insensitively"); + assert_eq!(rec.model, "deepseek/deepseek-v4-pro"); + + let rec = parse_auto_route_recommendation_for_candidates( + r#"{"model":"flash","thinking":"off"}"#, + &candidates, + ) + .expect("legacy shorthand maps onto the cheap tier"); + assert_eq!(rec.model, "deepseek/deepseek-v4-flash"); + + assert!( + parse_auto_route_recommendation_for_candidates( + r#"{"model":"gpt-4o","thinking":"high"}"#, + &candidates, + ) + .is_none(), + "non-candidate ids are rejected" + ); + } + + #[test] + fn auto_router_system_prompt_names_candidate_ids() { + let candidates = RouterCandidates { + big: "deepseek/deepseek-v4-pro".to_string(), + cheap: Some("deepseek/deepseek-v4-flash".to_string()), + }; + let prompt = auto_router_system_prompt(&candidates, true); + assert!(prompt.contains("deepseek/deepseek-v4-flash|deepseek/deepseek-v4-pro")); + assert!(prompt.contains("Cost-saving mode is ON")); + } + #[test] fn config_auto_cost_saving_defaults_to_false() { let cfg = Config::default(); diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index c9166a1d..df8592d9 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -2648,7 +2648,14 @@ impl ToolSpec for AgentSpawnTool { child_runtime.context.workspace = cwd; } let configured_model = match spawn_request.model.clone() { - Some(model) => Some(model), + // #3018 audit fix: spawn-time `model` must be validated and + // canonicalized here — passing it through raw turns a typo into + // an opaque provider 400 instead of a clear spawn-time error. + Some(model) => Some(normalize_requested_subagent_model( + &model, + "model", + self.runtime.client.api_provider(), + )?), None => configured_model_for_role_or_type( &self.runtime, spawn_request.assignment.role.as_deref(), @@ -5081,18 +5088,39 @@ fn with_default_fork_context(mut input: Value, default: bool) -> Value { pub(crate) fn normalize_requested_subagent_model( value: &str, field: &str, + provider: crate::config::ApiProvider, ) -> Result { let trimmed = value.trim(); if trimmed.is_empty() { return Err(ToolError::invalid_input(format!("{field} cannot be blank"))); } - crate::config::normalize_model_name(trimmed).ok_or_else(|| { + // #3018: Use provider-aware validation so non-DeepSeek providers can + // accept their own model IDs instead of failing with "Expected a + // DeepSeek model id". + crate::config::requested_model_for_provider(provider, trimmed).ok_or_else(|| { + let valid_names = crate::config::model_completion_names_for_provider(provider); + let valid_hint = if valid_names.is_empty() { + String::new() + } else { + format!(" (accepted: {})", valid_names.join(", ")) + }; ToolError::invalid_input(format!( - "Invalid {field} '{trimmed}'. Expected a DeepSeek model id such as deepseek-v4-pro or deepseek-v4-flash" + "Invalid {field} '{trimmed}' for provider {}{valid_hint}", + provider_name_for_error(provider) )) }) } +fn provider_name_for_error(provider: crate::config::ApiProvider) -> &'static str { + match provider { + crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => "DeepSeek", + crate::config::ApiProvider::Openai | crate::config::ApiProvider::OpenaiCodex => "OpenAI", + crate::config::ApiProvider::Moonshot => "Moonshot", + crate::config::ApiProvider::Ollama => "Ollama", + _ => "this provider", + } +} + pub(crate) fn configured_model_for_role_or_type( runtime: &SubAgentRuntime, role: Option<&str>, @@ -5107,8 +5135,12 @@ pub(crate) fn configured_model_for_role_or_type( for key in keys { if let Some(model) = runtime.role_models.get(&key) { - return normalize_requested_subagent_model(model, &format!("subagents.{key}.model")) - .map(Some); + return normalize_requested_subagent_model( + model, + &format!("subagents.{key}.model"), + runtime.client.api_provider(), + ) + .map(Some); } } Ok(None) @@ -5161,7 +5193,15 @@ fn tool_agent_route(parent_model: &str, configured_model: Option) -> Sub } fn should_use_subagent_flash_router(runtime: &SubAgentRuntime) -> bool { - runtime.auto_model + // #3018: providers without a known cheap tier skip the network router + // entirely — there is no alternative model worth a round-trip. + runtime.auto_model && subagent_router_candidates(runtime).cheap.is_some() +} + +/// Candidate pair for the sub-agent router, derived from the active +/// provider and the (already provider-resolved) parent model (#3018). +fn subagent_router_candidates(runtime: &SubAgentRuntime) -> crate::model_routing::RouterCandidates { + crate::model_routing::provider_router_candidates(runtime.client.api_provider(), &runtime.model) } fn fallback_subagent_assignment_route( @@ -5172,7 +5212,13 @@ fn fallback_subagent_assignment_route( let model = if let Some(model) = configured_model { model } else if runtime.auto_model { - crate::model_routing::auto_model_heuristic(prompt, &runtime.model) + // #3018: candidate-aware — on providers without a cheap tier this + // always resolves to the parent model instead of a DeepSeek id. + crate::model_routing::auto_model_heuristic_for_candidates( + prompt, + &runtime.model, + &subagent_router_candidates(runtime), + ) } else { runtime.model.clone() }; @@ -5203,8 +5249,15 @@ async fn subagent_flash_router( return Ok(None); } + let candidates = subagent_router_candidates(runtime); + let Some(cheap_model) = candidates.cheap.clone() else { + // should_use_subagent_flash_router already gates on this; defensive + // second gate so the router can never 400 a cheap-tier-less provider. + return Ok(None); + }; + let request = MessageRequest { - model: "deepseek-v4-flash".to_string(), + model: cheap_model, messages: vec![Message { role: "user".to_string(), content: vec![ContentBlock::Text { @@ -5213,9 +5266,9 @@ async fn subagent_flash_router( }], }], max_tokens: 96, - system: Some(SystemPrompt::Text( - SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(), - )), + system: Some(SystemPrompt::Text(subagent_router_system_prompt( + &candidates, + ))), tools: None, tool_choice: None, metadata: None, @@ -5231,21 +5284,31 @@ async fn subagent_flash_router( runtime.client.create_message(request), ) .await??; - Ok(crate::model_routing::parse_auto_route_recommendation( - &message_response_text(&response.content), - )) + Ok( + crate::model_routing::parse_auto_route_recommendation_for_candidates( + &message_response_text(&response.content), + &candidates, + ), + ) } -const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\ -You are the codewhale sub-agent routing manager. Return only compact JSON: \ -{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \ +/// Render the sub-agent router system prompt from the actual candidate ids +/// (#3018) so the classifier answers with ids the active provider serves. +fn subagent_router_system_prompt(candidates: &crate::model_routing::RouterCandidates) -> String { + let cheap = candidates.cheap_or_big(); + let big = &candidates.big; + format!( + "You are the codewhale sub-agent routing manager. Return only compact JSON: \ +{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \ Treat each child assignment like a customer request entering a team queue: decide the least \ sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \ -important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \ -Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \ +important by itself. Use {cheap} for trivial, read-only, status, lookup, or single-step work. \ +Use {big} for coding, debugging, release work, multi-file changes, security, architecture, \ high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \ off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \ -multi-step, uncertain, or tool-heavy work."; +multi-step, uncertain, or tool-heavy work." + ) +} fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String { format!( @@ -5297,7 +5360,15 @@ fn message_response_text(blocks: &[ContentBlock]) -> String { fn parse_optional_subagent_model(input: &Value, key: &str) -> Result, ToolError> { match input.get(key) { None | Some(Value::Null) => Ok(None), - Some(Value::String(value)) => normalize_requested_subagent_model(value, key).map(Some), + Some(Value::String(value)) => { + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err(ToolError::invalid_input(format!("{key} cannot be blank"))); + } + // #3018: Basic parsing only — provider-aware validation is deferred + // to the spawn path where the runtime's ApiProvider is available. + Ok(Some(trimmed.to_string())) + } Some(_) => Err(ToolError::invalid_input(format!("{key} must be a string"))), } } diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 26702df1..4b8e02d2 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -2665,6 +2665,35 @@ fn stub_runtime() -> SubAgentRuntime { /// *some* `DeepSeekClient` because `SubAgentRuntime.client` isn't /// `Option<...>`. `Config::default()` is enough — `DeepSeekClient::new` /// only validates that an API key field exists, not that the key works. +fn stub_runtime_for_provider(provider: &str) -> SubAgentRuntime { + let mut runtime = stub_runtime(); + runtime.client = stub_client_for_provider(provider); + runtime +} + +fn stub_client_for_provider(provider: &str) -> DeepSeekClient { + let _ = rustls::crypto::ring::default_provider().install_default(); + let mut providers = crate::config::ProvidersConfig::default(); + match provider { + "moonshot" => { + providers.moonshot = crate::config::ProviderConfig { + api_key: Some("test-key".to_string()), + ..Default::default() + }; + } + // Ollama is keyless (local runtime); extend per-provider as needed. + "ollama" => {} + other => panic!("extend stub_client_for_provider for provider {other}"), + } + let config = crate::config::Config { + api_key: Some("test-key".to_string()), + provider: Some(provider.to_string()), + providers: Some(providers), + ..crate::config::Config::default() + }; + DeepSeekClient::new(&config).expect("stub client should construct") +} + fn stub_client() -> DeepSeekClient { let _ = rustls::crypto::ring::default_provider().install_default(); let config = crate::config::Config { @@ -3197,6 +3226,104 @@ fn model_catalog_only_advertises_canonical_subagent_tools() { } } +// ── #3018: provider-aware auto routing and model validation ───────────────── + +#[tokio::test] +async fn auto_route_on_provider_without_cheap_tier_stays_on_parent_model() { + // AC: Ollama + auto-model must never build a request with a DeepSeek id; + // the routed model equals the session model for any prompt class. + let mut runtime = stub_runtime_for_provider("ollama").with_auto_model(true); + runtime.model = "qwen3:32b".to_string(); + + for prompt in ["hi", "please refactor the whole auth module for security"] { + let route = + resolve_subagent_assignment_route(&runtime, None, prompt, &SubAgentType::General).await; + assert_eq!(route.model, "qwen3:32b", "prompt {prompt:?}"); + assert!( + !route.model.contains("deepseek"), + "no DeepSeek id may be fabricated: {route:?}" + ); + } +} + +#[test] +fn flash_router_gate_requires_cheap_tier() { + let deepseek = stub_runtime().with_auto_model(true); + assert!( + should_use_subagent_flash_router(&deepseek), + "DeepSeek keeps the network router" + ); + + let mut moonshot = stub_runtime_for_provider("moonshot").with_auto_model(true); + moonshot.model = "kimi-k2.6".to_string(); + assert!( + !should_use_subagent_flash_router(&moonshot), + "providers without a cheap tier skip the network router" + ); +} + +#[test] +fn role_model_validation_accepts_provider_native_ids() { + // AC: [subagents] worker_model = "kimi-k2.5" on Moonshot must not fail + // with "Expected a DeepSeek model id". + let mut runtime = stub_runtime_for_provider("moonshot"); + runtime + .role_models + .insert("worker".to_string(), "kimi-k2.5".to_string()); + + let model = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General) + .expect("provider-native id is accepted"); + assert_eq!(model.as_deref(), Some("kimi-k2.5")); +} + +#[test] +fn role_model_validation_stays_strict_on_official_deepseek() { + let mut runtime = stub_runtime(); + runtime + .role_models + .insert("worker".to_string(), "kimi-k2.5".to_string()); + + let err = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General) + .expect_err("non-DeepSeek id is rejected on the official API"); + let msg = err.to_string(); + assert!(msg.contains("kimi-k2.5"), "names the bad id: {msg}"); + assert!( + msg.contains("deepseek-v4-pro"), + "lists accepted ids from model_completion_names_for_provider: {msg}" + ); +} + +#[test] +fn normalize_requested_subagent_model_is_provider_aware() { + assert_eq!( + normalize_requested_subagent_model( + "kimi-k2.5", + "model", + crate::config::ApiProvider::Moonshot + ) + .expect("Moonshot accepts its own ids"), + "kimi-k2.5" + ); + assert_eq!( + normalize_requested_subagent_model( + "qwen3:32b", + "model", + crate::config::ApiProvider::Ollama + ) + .expect("Ollama tags pass through"), + "qwen3:32b" + ); + assert!( + normalize_requested_subagent_model( + "kimi-k2.5", + "model", + crate::config::ApiProvider::Deepseek + ) + .is_err(), + "official DeepSeek API rejects foreign ids" + ); +} + // ── #3030: step-counter formatting ────────────────────────────────────────── #[test] diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md index 71aba596..730753ae 100644 --- a/docs/SUBAGENTS.md +++ b/docs/SUBAGENTS.md @@ -124,6 +124,50 @@ cancelled records persist for inspection but don't occupy a slot. Agents that lost their `task_handle` (e.g. across a process restart) also don't count against the cap. +## Per-role models (#3018) + +Children can run on a different model than the parent. Two config surfaces +feed the same override map (`[subagents.models]` keys win on conflict, keys +are case-insensitive): + +```toml +[subagents] +default_model = "deepseek-v4-flash" # fallback for every role +worker_model = "deepseek-v4-pro" # worker / general +explorer_model = "deepseek-v4-flash" # explorer / explore +awaiter_model = "deepseek-v4-flash" # awaiter / plan +review_model = "deepseek-v4-pro" # review +custom_model = "deepseek-v4-pro" # custom + +[subagents.models] +# Free-form role → model map; any role alias accepted by agent_open works. +implementation = "deepseek-v4-pro" +``` + +Model ids may be **any model the active provider accepts** — validation is +provider-aware and happens at spawn time, not load time. On the official +DeepSeek API only DeepSeek ids are accepted; every other provider passes the +id through to the provider API, which is the authority. A non-DeepSeek +example: + +```toml +provider = "moonshot" +model = "kimi-k2.6" + +[subagents] +worker_model = "kimi-k2.5" +``` + +Spawn-time `model` arguments on `agent_open` are validated the same way; an +invalid id on the official DeepSeek API fails the spawn with the accepted-id +list instead of an opaque provider 400. + +With `/model auto`, sub-agent routing is provider-aware too: providers with a +known big/cheap pair (DeepSeek, and the hosted DeepSeek routes on NVIDIA NIM, +OpenRouter, Novita, SiliconFlow, SGLang, vLLM) route between that pair; +providers without a known cheap tier (e.g. Ollama, Moonshot) skip the +network router and keep children on the session model. + ## Per-step API timeout (#1806, #1808) Each sub-agent step wraps its DeepSeek `create_message` call in a