Merge PR #3045 from Hmbown: un-hardcode DeepSeek from subagent model validation — provider-aware router candidates

fix(subagent): un-hardcode DeepSeek from model validation — accept any provider id
2026-06-10 22:29:35 -07:00
parent e0b6441a26 3f2f154ad2
commit 18d5302da1
5 changed files with 564 additions and 60 deletions
@@ -575,6 +575,19 @@ pub(crate) fn normalize_custom_model_id(model: &str) -> Option<String> {
    }
 }

+/// Validate a user-requested model id against the active provider (#3018).
+///
+/// DeepSeek providers use the strict `normalize_model_name` gate (official
+/// API only accepts DeepSeek IDs).  All other providers pass any non-empty,
+/// non-control-character string through — the provider API is the authority.
+#[must_use]
+pub fn requested_model_for_provider(provider: ApiProvider, model: &str) -> Option<String> {
+    match provider {
+        ApiProvider::Deepseek | ApiProvider::DeepseekCN => normalize_model_name(model),
+        _ => normalize_custom_model_id(model),
+    }
+}
+
 fn canonical_official_deepseek_model_id(model: &str) -> Option<&'static str> {
    match model.trim().to_ascii_lowercase().as_str() {
        "deepseek-v4-pro"
@@ -7891,6 +7904,25 @@ api_key = "old-openrouter-key"
        assert_eq!(config.default_model(), DEFAULT_TEXT_MODEL);
    }

+    #[test]
+    fn requested_model_for_provider_is_permissive_off_deepseek() {
+        // #3018: the provider API is the authority for non-DeepSeek routes.
+        assert_eq!(
+            requested_model_for_provider(ApiProvider::Moonshot, "kimi-k2.5").as_deref(),
+            Some("kimi-k2.5")
+        );
+        assert_eq!(
+            requested_model_for_provider(ApiProvider::Ollama, "qwen3:32b").as_deref(),
+            Some("qwen3:32b")
+        );
+        // The official DeepSeek API stays strict.
+        assert!(requested_model_for_provider(ApiProvider::Deepseek, "kimi-k2.5").is_none());
+        assert_eq!(
+            requested_model_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro").as_deref(),
+            Some("deepseek-v4-pro")
+        );
+    }
+
    #[test]
    fn wire_model_for_provider_matches_active_provider_shape() {
        assert_eq!(
@@ -13,16 +13,95 @@ use crate::llm_client::LlmClient;
 use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
 use crate::tui::app::ReasoningEffort;

-/// Auto-select a model based on request complexity.
+/// Big/cheap model pair the auto-router may choose between for the active
+/// provider (#3018).
 ///
-/// Short messages (<100 chars) go to Flash. Long messages and requests with
-/// complex keywords go to Pro. The fallback is Flash.
-pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String {
-    auto_model_heuristic_with_bias(input, current_model, false)
+/// `cheap == None` means the provider has no known cheap tier: heuristics
+/// stay on the current model (only thinking effort varies) and the network
+/// router is skipped entirely (#1549).
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct RouterCandidates {
+    pub(crate) big: String,
+    pub(crate) cheap: Option<String>,
 }

+impl RouterCandidates {
+    pub(crate) fn deepseek() -> Self {
+        Self {
+            big: "deepseek-v4-pro".to_string(),
+            cheap: Some("deepseek-v4-flash".to_string()),
+        }
+    }
+
+    /// The cheap-tier id, falling back to `big` when no cheap tier exists.
+    pub(crate) fn cheap_or_big(&self) -> &str {
+        self.cheap.as_deref().unwrap_or(&self.big)
+    }
+}
+
+/// Derive the auto-router's candidate pair for the active provider (#3018).
+///
+/// DeepSeek providers route between the canonical pro/flash pair. Hosted
+/// routes with known wire ids for that pair (NVIDIA NIM, OpenRouter, Novita,
+/// SiliconFlow, SGLang, vLLM) use their provider-prefixed spellings. Every
+/// other provider has no known cheap tier: `big` is the session model and
+/// `cheap` is `None`, so auto mode never fabricates a DeepSeek id for a
+/// provider that cannot serve it.
+pub(crate) fn provider_router_candidates(
+    provider: crate::config::ApiProvider,
+    current_model: &str,
+) -> RouterCandidates {
+    use crate::config::ApiProvider;
+    match provider {
+        ApiProvider::Deepseek | ApiProvider::DeepseekCN => RouterCandidates::deepseek(),
+        ApiProvider::NvidiaNim
+        | ApiProvider::Openrouter
+        | ApiProvider::Novita
+        | ApiProvider::Siliconflow
+        | ApiProvider::SiliconflowCn
+        | ApiProvider::Sglang
+        | ApiProvider::Vllm => RouterCandidates {
+            big: crate::config::wire_model_for_provider(provider, "deepseek-v4-pro"),
+            cheap: Some(crate::config::wire_model_for_provider(
+                provider,
+                "deepseek-v4-flash",
+            )),
+        },
+        _ => RouterCandidates {
+            big: current_model.to_string(),
+            cheap: None,
+        },
+    }
+}
+
+/// Auto-select a model based on request complexity.
+///
+/// Short messages (<100 chars) go to the cheap tier. Long messages and
+/// requests with complex keywords go to the big tier. The fallback is cheap.
+/// This DeepSeek-candidate wrapper keeps legacy callers and tests intact;
+/// provider-aware callers use [`auto_model_heuristic_for_candidates`].
+pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String {
+    auto_model_heuristic_for_candidates(input, current_model, &RouterCandidates::deepseek())
+}
+
+/// Candidate-aware variant of [`auto_model_heuristic`] (#3018).
+pub(crate) fn auto_model_heuristic_for_candidates(
+    input: &str,
+    current_model: &str,
+    candidates: &RouterCandidates,
+) -> String {
+    auto_model_heuristic_selection_with_bias(input, current_model, false, candidates).model
+}
+
+#[cfg(test)]
 fn auto_model_heuristic_with_bias(input: &str, current_model: &str, cost_saving: bool) -> String {
-    auto_model_heuristic_selection_with_bias(input, current_model, cost_saving).model
+    auto_model_heuristic_selection_with_bias(
+        input,
+        current_model,
+        cost_saving,
+        &RouterCandidates::deepseek(),
+    )
+    .model
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -41,6 +120,7 @@ fn auto_model_heuristic_selection_with_bias(
    input: &str,
    _current_model: &str,
    cost_saving: bool,
+    candidates: &RouterCandidates,
 ) -> AutoModelHeuristicSelection {
    let len = input.chars().count();
    let lower = input.to_lowercase();
@@ -58,26 +138,26 @@ fn auto_model_heuristic_selection_with_bias(
    let pro_match = strong_match || (!cost_saving && borderline_match);
    if pro_match {
        return AutoModelHeuristicSelection {
-            model: "deepseek-v4-pro".to_string(),
+            model: candidates.big.clone(),
            confidence: AutoModelHeuristicConfidence::Decisive,
        };
    }
    if len < 100 {
        return AutoModelHeuristicSelection {
-            model: "deepseek-v4-flash".to_string(),
+            model: candidates.cheap_or_big().to_string(),
            confidence: AutoModelHeuristicConfidence::Decisive,
        };
    }
    let long_threshold = if cost_saving { 1_000 } else { 500 };
    if len > long_threshold {
        return AutoModelHeuristicSelection {
-            model: "deepseek-v4-pro".to_string(),
+            model: candidates.big.clone(),
            confidence: AutoModelHeuristicConfidence::Decisive,
        };
    }

    AutoModelHeuristicSelection {
-        model: "deepseek-v4-flash".to_string(),
+        model: candidates.cheap_or_big().to_string(),
        confidence: AutoModelHeuristicConfidence::Ambiguous,
    }
 }
@@ -148,26 +228,50 @@ pub(crate) struct AutoRouteSelection {
    pub(crate) source: AutoRouteSource,
 }

-const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
-You are the codewhale auto-routing classifier. Return only compact JSON: \
-{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
-Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
-Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
+/// Render the auto-router system prompt with the actual candidate ids
+/// (#3018): the classifier must answer with ids the active provider can
+/// serve, not hardcoded DeepSeek spellings.
+pub(crate) fn auto_router_system_prompt(
+    candidates: &RouterCandidates,
+    cost_saving: bool,
+) -> String {
+    let cheap = candidates.cheap_or_big();
+    let big = &candidates.big;
+    let mut prompt = format!(
+        "You are the codewhale auto-routing classifier. Return only compact JSON: \
+{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \
+Use {cheap} for trivial, conversational, status, or single-step work. \
+Use {big} for coding, debugging, release work, multi-step tasks, high-risk decisions, \
 tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
 Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
-agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
-
-const AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM: &str = "\
-\n\nCost-saving mode is ON. Prefer deepseek-v4-flash for any request that is \
+agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work."
+    );
+    if cost_saving {
+        prompt.push_str(&format!(
+            "\n\nCost-saving mode is ON. Prefer {cheap} for any request that is \
 not unmistakably agentic, multi-step, architecture/design, security review, \
-debugging, or otherwise clearly out of Flash's capability. Resolve ambiguous \
-cases in favour of deepseek-v4-flash, not deepseek-v4-pro.";
+debugging, or otherwise clearly out of the cheap tier's capability. Resolve \
+ambiguous cases in favour of {cheap}, not {big}."
+        ));
+    }
+    prompt
+}

+/// DeepSeek-candidate wrapper kept for the legacy parser tests; the
+/// network router parses with [`parse_auto_route_recommendation_for_candidates`].
+#[cfg(test)]
 pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
+    parse_auto_route_recommendation_for_candidates(raw, &RouterCandidates::deepseek())
+}
+
+pub(crate) fn parse_auto_route_recommendation_for_candidates(
+    raw: &str,
+    candidates: &RouterCandidates,
+) -> Option<AutoRouteRecommendation> {
    let json = extract_first_json_object(raw)?;
    let value: serde_json::Value = serde_json::from_str(json).ok()?;
    let model = value.get("model").and_then(serde_json::Value::as_str)?;
-    let model = normalize_auto_route_model(model)?;
+    let model = normalize_auto_route_model(model, candidates)?;
    let reasoning_effort = value
        .get("thinking")
        .or_else(|| value.get("reasoning_effort"))
@@ -176,7 +280,7 @@ pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteReco
        .and_then(parse_auto_route_reasoning_effort);

    Some(AutoRouteRecommendation {
-        model: model.to_string(),
+        model,
        reasoning_effort,
    })
 }
@@ -187,10 +291,21 @@ fn extract_first_json_object(raw: &str) -> Option<&str> {
    (end >= start).then_some(&raw[start..=end])
 }

-fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
-    match model.trim().to_ascii_lowercase().as_str() {
-        "deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
-        "deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
+fn normalize_auto_route_model(model: &str, candidates: &RouterCandidates) -> Option<String> {
+    let normalized = model.trim().to_ascii_lowercase();
+    // Exact candidate ids, case-insensitively (#3018).
+    if normalized == candidates.big.to_ascii_lowercase() {
+        return Some(candidates.big.clone());
+    }
+    if let Some(cheap) = candidates.cheap.as_deref()
+        && normalized == cheap.to_ascii_lowercase()
+    {
+        return Some(cheap.to_string());
+    }
+    // Legacy pro/flash shorthand maps onto the big/cheap tiers.
+    match normalized.as_str() {
+        "deepseek-v4-pro" | "v4-pro" | "pro" => Some(candidates.big.clone()),
+        "deepseek-v4-flash" | "v4-flash" | "flash" => Some(candidates.cheap_or_big().to_string()),
        _ => None,
    }
 }
@@ -221,14 +336,29 @@ pub(crate) async fn resolve_auto_route_with_flash(
    selected_thinking_mode: &str,
 ) -> AutoRouteSelection {
    let cost_saving = config.auto_cost_saving();
-    let heuristic =
-        auto_model_heuristic_selection_with_bias(latest_request, selected_model_mode, cost_saving);
+    // #3018: derive the candidate pair from the active provider. The
+    // config-resolved default model stands in for the session model — with
+    // auto mode on, that is the canonical id the provider serves.
+    let candidates = provider_router_candidates(config.api_provider(), &config.default_model());
+    let heuristic = auto_model_heuristic_selection_with_bias(
+        latest_request,
+        selected_model_mode,
+        cost_saving,
+        &candidates,
+    );
    if heuristic.confidence == AutoModelHeuristicConfidence::Decisive {
        return auto_route_from_heuristic(latest_request, heuristic);
    }

+    // #1549/#3018: no cheap tier → no network round-trip. The heuristic is
+    // the only signal and the routed model stays on the session model.
+    if candidates.cheap.is_none() {
+        return auto_route_from_heuristic(latest_request, heuristic);
+    }
+
    match auto_route_flash_recommendation(
        config,
+        &candidates,
        latest_request,
        recent_context,
        selected_model_mode,
@@ -261,6 +391,7 @@ fn auto_route_from_heuristic(

 async fn auto_route_flash_recommendation(
    config: &Config,
+    candidates: &RouterCandidates,
    latest_request: &str,
    recent_context: &str,
    selected_model_mode: &str,
@@ -269,14 +400,16 @@ async fn auto_route_flash_recommendation(
    if cfg!(test) {
        return Ok(None);
    }
+    let Some(cheap_model) = candidates.cheap.clone() else {
+        // Callers skip the router when there is no cheap tier; this is a
+        // defensive second gate so a future caller cannot 400 the provider.
+        return Ok(None);
+    };

    let client = DeepSeekClient::new(config)?;
-    let mut router_system = AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string();
-    if config.auto_cost_saving() {
-        router_system.push_str(AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM);
-    }
+    let router_system = auto_router_system_prompt(candidates, config.auto_cost_saving());
    let request = MessageRequest {
-        model: "deepseek-v4-flash".to_string(),
+        model: cheap_model,
        messages: vec![Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
@@ -303,9 +436,10 @@ async fn auto_route_flash_recommendation(

    let response =
        tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
-    Ok(parse_auto_route_recommendation(&message_response_text(
-        &response,
-    )))
+    Ok(parse_auto_route_recommendation_for_candidates(
+        &message_response_text(&response),
+        candidates,
+    ))
 }

 fn auto_route_prompt(
@@ -416,7 +550,12 @@ mod tests {

    #[test]
    fn auto_heuristic_selection_marks_short_and_complex_routes_decisive() {
-        let short = auto_model_heuristic_selection_with_bias("yes", "auto", false);
+        let short = auto_model_heuristic_selection_with_bias(
+            "yes",
+            "auto",
+            false,
+            &RouterCandidates::deepseek(),
+        );
        assert_eq!(short.model, "deepseek-v4-flash");
        assert_eq!(
            short.confidence,
@@ -428,6 +567,7 @@ mod tests {
            "Please review the auth migration",
            "auto",
            false,
+            &RouterCandidates::deepseek(),
        );
        assert_eq!(complex.model, "deepseek-v4-pro");
        assert_eq!(
@@ -446,7 +586,12 @@ mod tests {
            "test request must stay in the default grey zone"
        );

-        let selection = auto_model_heuristic_selection_with_bias(&request, "auto", false);
+        let selection = auto_model_heuristic_selection_with_bias(
+            &request,
+            "auto",
+            false,
+            &RouterCandidates::deepseek(),
+        );
        assert_eq!(selection.model, "deepseek-v4-flash");
        assert_eq!(
            selection.confidence,
@@ -550,6 +695,91 @@ mod tests {
        );
    }

+    #[test]
+    fn provider_router_candidates_cover_known_provider_classes() {
+        use crate::config::ApiProvider;
+
+        let deepseek = provider_router_candidates(ApiProvider::Deepseek, "deepseek-v4-pro");
+        assert_eq!(deepseek.big, "deepseek-v4-pro");
+        assert_eq!(deepseek.cheap.as_deref(), Some("deepseek-v4-flash"));
+
+        let openrouter =
+            provider_router_candidates(ApiProvider::Openrouter, "deepseek/deepseek-v4-pro");
+        assert_eq!(openrouter.big, "deepseek/deepseek-v4-pro");
+        assert_eq!(
+            openrouter.cheap.as_deref(),
+            Some("deepseek/deepseek-v4-flash")
+        );
+
+        // Providers without a known cheap tier: big = session model, no cheap.
+        let ollama = provider_router_candidates(ApiProvider::Ollama, "qwen3:32b");
+        assert_eq!(ollama.big, "qwen3:32b");
+        assert_eq!(ollama.cheap, None);
+
+        let moonshot = provider_router_candidates(ApiProvider::Moonshot, "kimi-k2.6");
+        assert_eq!(moonshot.big, "kimi-k2.6");
+        assert_eq!(moonshot.cheap, None);
+    }
+
+    #[test]
+    fn heuristic_without_cheap_tier_always_returns_current_model() {
+        // #3018 AC: Ollama + auto must never fabricate a DeepSeek id.
+        let candidates = RouterCandidates {
+            big: "qwen3:32b".to_string(),
+            cheap: None,
+        };
+        for prompt in [
+            "hi",
+            "please refactor the auth module for security",
+            &"long filler sentence. ".repeat(60),
+        ] {
+            let model = auto_model_heuristic_for_candidates(prompt, "qwen3:32b", &candidates);
+            assert_eq!(model, "qwen3:32b", "prompt {prompt:?}");
+        }
+    }
+
+    #[test]
+    fn auto_route_parser_accepts_candidate_ids_and_legacy_shorthand() {
+        let candidates = RouterCandidates {
+            big: "deepseek/deepseek-v4-pro".to_string(),
+            cheap: Some("deepseek/deepseek-v4-flash".to_string()),
+        };
+
+        let rec = parse_auto_route_recommendation_for_candidates(
+            r#"{"model":"DeepSeek/DeepSeek-V4-Pro","thinking":"max"}"#,
+            &candidates,
+        )
+        .expect("exact candidate id parses case-insensitively");
+        assert_eq!(rec.model, "deepseek/deepseek-v4-pro");
+
+        let rec = parse_auto_route_recommendation_for_candidates(
+            r#"{"model":"flash","thinking":"off"}"#,
+            &candidates,
+        )
+        .expect("legacy shorthand maps onto the cheap tier");
+        assert_eq!(rec.model, "deepseek/deepseek-v4-flash");
+
+        assert!(
+            parse_auto_route_recommendation_for_candidates(
+                r#"{"model":"gpt-4o","thinking":"high"}"#,
+                &candidates,
+            )
+            .is_none(),
+            "non-candidate ids are rejected"
+        );
+    }
+
+    #[test]
+    fn auto_router_system_prompt_names_candidate_ids() {
+        let candidates = RouterCandidates {
+            big: "deepseek/deepseek-v4-pro".to_string(),
+            cheap: Some("deepseek/deepseek-v4-flash".to_string()),
+        };
+        let prompt = auto_router_system_prompt(&candidates, true);
+        assert!(prompt.contains("deepseek/deepseek-v4-flash|deepseek/deepseek-v4-pro"));
+        assert!(prompt.contains("Cost-saving mode is ON"));
+    }
+
    #[test]
    fn config_auto_cost_saving_defaults_to_false() {
        let cfg = Config::default();
@@ -2648,7 +2648,14 @@ impl ToolSpec for AgentSpawnTool {
            child_runtime.context.workspace = cwd;
        }
        let configured_model = match spawn_request.model.clone() {
-            Some(model) => Some(model),
+            // #3018 audit fix: spawn-time `model` must be validated and
+            // canonicalized here — passing it through raw turns a typo into
+            // an opaque provider 400 instead of a clear spawn-time error.
+            Some(model) => Some(normalize_requested_subagent_model(
+                &model,
+                "model",
+                self.runtime.client.api_provider(),
+            )?),
            None => configured_model_for_role_or_type(
                &self.runtime,
                spawn_request.assignment.role.as_deref(),
@@ -5081,18 +5088,39 @@ fn with_default_fork_context(mut input: Value, default: bool) -> Value {
 pub(crate) fn normalize_requested_subagent_model(
    value: &str,
    field: &str,
+    provider: crate::config::ApiProvider,
 ) -> Result<String, ToolError> {
    let trimmed = value.trim();
    if trimmed.is_empty() {
        return Err(ToolError::invalid_input(format!("{field} cannot be blank")));
    }
-    crate::config::normalize_model_name(trimmed).ok_or_else(|| {
+    // #3018: Use provider-aware validation so non-DeepSeek providers can
+    // accept their own model IDs instead of failing with "Expected a
+    // DeepSeek model id".
+    crate::config::requested_model_for_provider(provider, trimmed).ok_or_else(|| {
+        let valid_names = crate::config::model_completion_names_for_provider(provider);
+        let valid_hint = if valid_names.is_empty() {
+            String::new()
+        } else {
+            format!(" (accepted: {})", valid_names.join(", "))
+        };
        ToolError::invalid_input(format!(
-            "Invalid {field} '{trimmed}'. Expected a DeepSeek model id such as deepseek-v4-pro or deepseek-v4-flash"
+            "Invalid {field} '{trimmed}' for provider {}{valid_hint}",
+            provider_name_for_error(provider)
        ))
    })
 }

+fn provider_name_for_error(provider: crate::config::ApiProvider) -> &'static str {
+    match provider {
+        crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => "DeepSeek",
+        crate::config::ApiProvider::Openai | crate::config::ApiProvider::OpenaiCodex => "OpenAI",
+        crate::config::ApiProvider::Moonshot => "Moonshot",
+        crate::config::ApiProvider::Ollama => "Ollama",
+        _ => "this provider",
+    }
+}
+
 pub(crate) fn configured_model_for_role_or_type(
    runtime: &SubAgentRuntime,
    role: Option<&str>,
@@ -5107,8 +5135,12 @@ pub(crate) fn configured_model_for_role_or_type(

    for key in keys {
        if let Some(model) = runtime.role_models.get(&key) {
-            return normalize_requested_subagent_model(model, &format!("subagents.{key}.model"))
-                .map(Some);
+            return normalize_requested_subagent_model(
+                model,
+                &format!("subagents.{key}.model"),
+                runtime.client.api_provider(),
+            )
+            .map(Some);
        }
    }
    Ok(None)
@@ -5161,7 +5193,15 @@ fn tool_agent_route(parent_model: &str, configured_model: Option<String>) -> Sub
 }

 fn should_use_subagent_flash_router(runtime: &SubAgentRuntime) -> bool {
-    runtime.auto_model
+    // #3018: providers without a known cheap tier skip the network router
+    // entirely — there is no alternative model worth a round-trip.
+    runtime.auto_model && subagent_router_candidates(runtime).cheap.is_some()
+}
+
+/// Candidate pair for the sub-agent router, derived from the active
+/// provider and the (already provider-resolved) parent model (#3018).
+fn subagent_router_candidates(runtime: &SubAgentRuntime) -> crate::model_routing::RouterCandidates {
+    crate::model_routing::provider_router_candidates(runtime.client.api_provider(), &runtime.model)
 }

 fn fallback_subagent_assignment_route(
@@ -5172,7 +5212,13 @@ fn fallback_subagent_assignment_route(
    let model = if let Some(model) = configured_model {
        model
    } else if runtime.auto_model {
-        crate::model_routing::auto_model_heuristic(prompt, &runtime.model)
+        // #3018: candidate-aware — on providers without a cheap tier this
+        // always resolves to the parent model instead of a DeepSeek id.
+        crate::model_routing::auto_model_heuristic_for_candidates(
+            prompt,
+            &runtime.model,
+            &subagent_router_candidates(runtime),
+        )
    } else {
        runtime.model.clone()
    };
@@ -5203,8 +5249,15 @@ async fn subagent_flash_router(
        return Ok(None);
    }

+    let candidates = subagent_router_candidates(runtime);
+    let Some(cheap_model) = candidates.cheap.clone() else {
+        // should_use_subagent_flash_router already gates on this; defensive
+        // second gate so the router can never 400 a cheap-tier-less provider.
+        return Ok(None);
+    };
+
    let request = MessageRequest {
-        model: "deepseek-v4-flash".to_string(),
+        model: cheap_model,
        messages: vec![Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
@@ -5213,9 +5266,9 @@ async fn subagent_flash_router(
            }],
        }],
        max_tokens: 96,
-        system: Some(SystemPrompt::Text(
-            SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(),
-        )),
+        system: Some(SystemPrompt::Text(subagent_router_system_prompt(
+            &candidates,
+        ))),
        tools: None,
        tool_choice: None,
        metadata: None,
@@ -5231,21 +5284,31 @@ async fn subagent_flash_router(
        runtime.client.create_message(request),
    )
    .await??;
-    Ok(crate::model_routing::parse_auto_route_recommendation(
-        &message_response_text(&response.content),
-    ))
+    Ok(
+        crate::model_routing::parse_auto_route_recommendation_for_candidates(
+            &message_response_text(&response.content),
+            &candidates,
+        ),
+    )
 }

-const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\
-You are the codewhale sub-agent routing manager. Return only compact JSON: \
-{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
+/// Render the sub-agent router system prompt from the actual candidate ids
+/// (#3018) so the classifier answers with ids the active provider serves.
+fn subagent_router_system_prompt(candidates: &crate::model_routing::RouterCandidates) -> String {
+    let cheap = candidates.cheap_or_big();
+    let big = &candidates.big;
+    format!(
+        "You are the codewhale sub-agent routing manager. Return only compact JSON: \
+{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \
 Treat each child assignment like a customer request entering a team queue: decide the least \
 sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \
-important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \
-Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \
+important by itself. Use {cheap} for trivial, read-only, status, lookup, or single-step work. \
+Use {big} for coding, debugging, release work, multi-file changes, security, architecture, \
 high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \
 off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \
-multi-step, uncertain, or tool-heavy work.";
+multi-step, uncertain, or tool-heavy work."
+    )
+}

 fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String {
    format!(
@@ -5297,7 +5360,15 @@ fn message_response_text(blocks: &[ContentBlock]) -> String {
 fn parse_optional_subagent_model(input: &Value, key: &str) -> Result<Option<String>, ToolError> {
    match input.get(key) {
        None | Some(Value::Null) => Ok(None),
-        Some(Value::String(value)) => normalize_requested_subagent_model(value, key).map(Some),
+        Some(Value::String(value)) => {
+            let trimmed = value.trim();
+            if trimmed.is_empty() {
+                return Err(ToolError::invalid_input(format!("{key} cannot be blank")));
+            }
+            // #3018: Basic parsing only — provider-aware validation is deferred
+            // to the spawn path where the runtime's ApiProvider is available.
+            Ok(Some(trimmed.to_string()))
+        }
        Some(_) => Err(ToolError::invalid_input(format!("{key} must be a string"))),
    }
 }
@@ -2665,6 +2665,35 @@ fn stub_runtime() -> SubAgentRuntime {
 /// *some* `DeepSeekClient` because `SubAgentRuntime.client` isn't
 /// `Option<...>`. `Config::default()` is enough — `DeepSeekClient::new`
 /// only validates that an API key field exists, not that the key works.
+fn stub_runtime_for_provider(provider: &str) -> SubAgentRuntime {
+    let mut runtime = stub_runtime();
+    runtime.client = stub_client_for_provider(provider);
+    runtime
+}
+
+fn stub_client_for_provider(provider: &str) -> DeepSeekClient {
+    let _ = rustls::crypto::ring::default_provider().install_default();
+    let mut providers = crate::config::ProvidersConfig::default();
+    match provider {
+        "moonshot" => {
+            providers.moonshot = crate::config::ProviderConfig {
+                api_key: Some("test-key".to_string()),
+                ..Default::default()
+            };
+        }
+        // Ollama is keyless (local runtime); extend per-provider as needed.
+        "ollama" => {}
+        other => panic!("extend stub_client_for_provider for provider {other}"),
+    }
+    let config = crate::config::Config {
+        api_key: Some("test-key".to_string()),
+        provider: Some(provider.to_string()),
+        providers: Some(providers),
+        ..crate::config::Config::default()
+    };
+    DeepSeekClient::new(&config).expect("stub client should construct")
+}
+
 fn stub_client() -> DeepSeekClient {
    let _ = rustls::crypto::ring::default_provider().install_default();
    let config = crate::config::Config {
@@ -3197,6 +3226,104 @@ fn model_catalog_only_advertises_canonical_subagent_tools() {
    }
 }

+// ── #3018: provider-aware auto routing and model validation ─────────────────
+
+#[tokio::test]
+async fn auto_route_on_provider_without_cheap_tier_stays_on_parent_model() {
+    // AC: Ollama + auto-model must never build a request with a DeepSeek id;
+    // the routed model equals the session model for any prompt class.
+    let mut runtime = stub_runtime_for_provider("ollama").with_auto_model(true);
+    runtime.model = "qwen3:32b".to_string();
+
+    for prompt in ["hi", "please refactor the whole auth module for security"] {
+        let route =
+            resolve_subagent_assignment_route(&runtime, None, prompt, &SubAgentType::General).await;
+        assert_eq!(route.model, "qwen3:32b", "prompt {prompt:?}");
+        assert!(
+            !route.model.contains("deepseek"),
+            "no DeepSeek id may be fabricated: {route:?}"
+        );
+    }
+}
+
+#[test]
+fn flash_router_gate_requires_cheap_tier() {
+    let deepseek = stub_runtime().with_auto_model(true);
+    assert!(
+        should_use_subagent_flash_router(&deepseek),
+        "DeepSeek keeps the network router"
+    );
+
+    let mut moonshot = stub_runtime_for_provider("moonshot").with_auto_model(true);
+    moonshot.model = "kimi-k2.6".to_string();
+    assert!(
+        !should_use_subagent_flash_router(&moonshot),
+        "providers without a cheap tier skip the network router"
+    );
+}
+
+#[test]
+fn role_model_validation_accepts_provider_native_ids() {
+    // AC: [subagents] worker_model = "kimi-k2.5" on Moonshot must not fail
+    // with "Expected a DeepSeek model id".
+    let mut runtime = stub_runtime_for_provider("moonshot");
+    runtime
+        .role_models
+        .insert("worker".to_string(), "kimi-k2.5".to_string());
+
+    let model = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General)
+        .expect("provider-native id is accepted");
+    assert_eq!(model.as_deref(), Some("kimi-k2.5"));
+}
+
+#[test]
+fn role_model_validation_stays_strict_on_official_deepseek() {
+    let mut runtime = stub_runtime();
+    runtime
+        .role_models
+        .insert("worker".to_string(), "kimi-k2.5".to_string());
+
+    let err = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General)
+        .expect_err("non-DeepSeek id is rejected on the official API");
+    let msg = err.to_string();
+    assert!(msg.contains("kimi-k2.5"), "names the bad id: {msg}");
+    assert!(
+        msg.contains("deepseek-v4-pro"),
+        "lists accepted ids from model_completion_names_for_provider: {msg}"
+    );
+}
+
+#[test]
+fn normalize_requested_subagent_model_is_provider_aware() {
+    assert_eq!(
+        normalize_requested_subagent_model(
+            "kimi-k2.5",
+            "model",
+            crate::config::ApiProvider::Moonshot
+        )
+        .expect("Moonshot accepts its own ids"),
+        "kimi-k2.5"
+    );
+    assert_eq!(
+        normalize_requested_subagent_model(
+            "qwen3:32b",
+            "model",
+            crate::config::ApiProvider::Ollama
+        )
+        .expect("Ollama tags pass through"),
+        "qwen3:32b"
+    );
+    assert!(
+        normalize_requested_subagent_model(
+            "kimi-k2.5",
+            "model",
+            crate::config::ApiProvider::Deepseek
+        )
+        .is_err(),
+        "official DeepSeek API rejects foreign ids"
+    );
+}
+
 // ── #3030: step-counter formatting ──────────────────────────────────────────

 #[test]
@@ -124,6 +124,50 @@ cancelled records persist for inspection but don't occupy a slot.
 Agents that lost their `task_handle` (e.g. across a process
 restart) also don't count against the cap.

+## Per-role models (#3018)
+
+Children can run on a different model than the parent. Two config surfaces
+feed the same override map (`[subagents.models]` keys win on conflict, keys
+are case-insensitive):
+
+```toml
+[subagents]
+default_model  = "deepseek-v4-flash"   # fallback for every role
+worker_model   = "deepseek-v4-pro"     # worker / general
+explorer_model = "deepseek-v4-flash"   # explorer / explore
+awaiter_model  = "deepseek-v4-flash"   # awaiter / plan
+review_model   = "deepseek-v4-pro"     # review
+custom_model   = "deepseek-v4-pro"     # custom
+
+[subagents.models]
+# Free-form role → model map; any role alias accepted by agent_open works.
+implementation = "deepseek-v4-pro"
+```
+
+Model ids may be **any model the active provider accepts** — validation is
+provider-aware and happens at spawn time, not load time. On the official
+DeepSeek API only DeepSeek ids are accepted; every other provider passes the
+id through to the provider API, which is the authority. A non-DeepSeek
+example:
+
+```toml
+provider = "moonshot"
+model = "kimi-k2.6"
+
+[subagents]
+worker_model = "kimi-k2.5"
+```
+
+Spawn-time `model` arguments on `agent_open` are validated the same way; an
+invalid id on the official DeepSeek API fails the spawn with the accepted-id
+list instead of an opaque provider 400.
+
+With `/model auto`, sub-agent routing is provider-aware too: providers with a
+known big/cheap pair (DeepSeek, and the hosted DeepSeek routes on NVIDIA NIM,
+OpenRouter, Novita, SiliconFlow, SGLang, vLLM) route between that pair;
+providers without a known cheap tier (e.g. Ollama, Moonshot) skip the
+network router and keep children on the session model.
+
 ## Per-step API timeout (#1806, #1808)

 Each sub-agent step wraps its DeepSeek `create_message` call in a