feat(routing): complete #3018 — RouterCandidates + provider_router_candidates; candidate-aware heuristics, router prompts and recommendation parsing; skip the network router when no cheap tier exists; validate spawn-time model against the active provider; tests + SUBAGENTS.md per-role models docs
Co-Authored-By: Claude <noreply@anthropic.com> https://claude.ai/code/session_018zaP8vUfTAsrE38L6h6fw5
This commit is contained in:
@@ -7904,6 +7904,25 @@ api_key = "old-openrouter-key"
|
||||
assert_eq!(config.default_model(), DEFAULT_TEXT_MODEL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn requested_model_for_provider_is_permissive_off_deepseek() {
|
||||
// #3018: the provider API is the authority for non-DeepSeek routes.
|
||||
assert_eq!(
|
||||
requested_model_for_provider(ApiProvider::Moonshot, "kimi-k2.5").as_deref(),
|
||||
Some("kimi-k2.5")
|
||||
);
|
||||
assert_eq!(
|
||||
requested_model_for_provider(ApiProvider::Ollama, "qwen3:32b").as_deref(),
|
||||
Some("qwen3:32b")
|
||||
);
|
||||
// The official DeepSeek API stays strict.
|
||||
assert!(requested_model_for_provider(ApiProvider::Deepseek, "kimi-k2.5").is_none());
|
||||
assert_eq!(
|
||||
requested_model_for_provider(ApiProvider::Deepseek, "deepseek-v4-pro").as_deref(),
|
||||
Some("deepseek-v4-pro")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wire_model_for_provider_matches_active_provider_shape() {
|
||||
assert_eq!(
|
||||
|
||||
+269
-39
@@ -13,16 +13,95 @@ use crate::llm_client::LlmClient;
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
|
||||
use crate::tui::app::ReasoningEffort;
|
||||
|
||||
/// Auto-select a model based on request complexity.
|
||||
/// Big/cheap model pair the auto-router may choose between for the active
|
||||
/// provider (#3018).
|
||||
///
|
||||
/// Short messages (<100 chars) go to Flash. Long messages and requests with
|
||||
/// complex keywords go to Pro. The fallback is Flash.
|
||||
pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String {
|
||||
auto_model_heuristic_with_bias(input, current_model, false)
|
||||
/// `cheap == None` means the provider has no known cheap tier: heuristics
|
||||
/// stay on the current model (only thinking effort varies) and the network
|
||||
/// router is skipped entirely (#1549).
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct RouterCandidates {
|
||||
pub(crate) big: String,
|
||||
pub(crate) cheap: Option<String>,
|
||||
}
|
||||
|
||||
impl RouterCandidates {
|
||||
pub(crate) fn deepseek() -> Self {
|
||||
Self {
|
||||
big: "deepseek-v4-pro".to_string(),
|
||||
cheap: Some("deepseek-v4-flash".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
/// The cheap-tier id, falling back to `big` when no cheap tier exists.
|
||||
pub(crate) fn cheap_or_big(&self) -> &str {
|
||||
self.cheap.as_deref().unwrap_or(&self.big)
|
||||
}
|
||||
}
|
||||
|
||||
/// Derive the auto-router's candidate pair for the active provider (#3018).
|
||||
///
|
||||
/// DeepSeek providers route between the canonical pro/flash pair. Hosted
|
||||
/// routes with known wire ids for that pair (NVIDIA NIM, OpenRouter, Novita,
|
||||
/// SiliconFlow, SGLang, vLLM) use their provider-prefixed spellings. Every
|
||||
/// other provider has no known cheap tier: `big` is the session model and
|
||||
/// `cheap` is `None`, so auto mode never fabricates a DeepSeek id for a
|
||||
/// provider that cannot serve it.
|
||||
pub(crate) fn provider_router_candidates(
|
||||
provider: crate::config::ApiProvider,
|
||||
current_model: &str,
|
||||
) -> RouterCandidates {
|
||||
use crate::config::ApiProvider;
|
||||
match provider {
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => RouterCandidates::deepseek(),
|
||||
ApiProvider::NvidiaNim
|
||||
| ApiProvider::Openrouter
|
||||
| ApiProvider::Novita
|
||||
| ApiProvider::Siliconflow
|
||||
| ApiProvider::SiliconflowCn
|
||||
| ApiProvider::Sglang
|
||||
| ApiProvider::Vllm => RouterCandidates {
|
||||
big: crate::config::wire_model_for_provider(provider, "deepseek-v4-pro"),
|
||||
cheap: Some(crate::config::wire_model_for_provider(
|
||||
provider,
|
||||
"deepseek-v4-flash",
|
||||
)),
|
||||
},
|
||||
_ => RouterCandidates {
|
||||
big: current_model.to_string(),
|
||||
cheap: None,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Auto-select a model based on request complexity.
|
||||
///
|
||||
/// Short messages (<100 chars) go to the cheap tier. Long messages and
|
||||
/// requests with complex keywords go to the big tier. The fallback is cheap.
|
||||
/// This DeepSeek-candidate wrapper keeps legacy callers and tests intact;
|
||||
/// provider-aware callers use [`auto_model_heuristic_for_candidates`].
|
||||
pub(crate) fn auto_model_heuristic(input: &str, current_model: &str) -> String {
|
||||
auto_model_heuristic_for_candidates(input, current_model, &RouterCandidates::deepseek())
|
||||
}
|
||||
|
||||
/// Candidate-aware variant of [`auto_model_heuristic`] (#3018).
|
||||
pub(crate) fn auto_model_heuristic_for_candidates(
|
||||
input: &str,
|
||||
current_model: &str,
|
||||
candidates: &RouterCandidates,
|
||||
) -> String {
|
||||
auto_model_heuristic_selection_with_bias(input, current_model, false, candidates).model
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn auto_model_heuristic_with_bias(input: &str, current_model: &str, cost_saving: bool) -> String {
|
||||
auto_model_heuristic_selection_with_bias(input, current_model, cost_saving).model
|
||||
auto_model_heuristic_selection_with_bias(
|
||||
input,
|
||||
current_model,
|
||||
cost_saving,
|
||||
&RouterCandidates::deepseek(),
|
||||
)
|
||||
.model
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
@@ -41,6 +120,7 @@ fn auto_model_heuristic_selection_with_bias(
|
||||
input: &str,
|
||||
_current_model: &str,
|
||||
cost_saving: bool,
|
||||
candidates: &RouterCandidates,
|
||||
) -> AutoModelHeuristicSelection {
|
||||
let len = input.chars().count();
|
||||
let lower = input.to_lowercase();
|
||||
@@ -58,26 +138,26 @@ fn auto_model_heuristic_selection_with_bias(
|
||||
let pro_match = strong_match || (!cost_saving && borderline_match);
|
||||
if pro_match {
|
||||
return AutoModelHeuristicSelection {
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
model: candidates.big.clone(),
|
||||
confidence: AutoModelHeuristicConfidence::Decisive,
|
||||
};
|
||||
}
|
||||
if len < 100 {
|
||||
return AutoModelHeuristicSelection {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
model: candidates.cheap_or_big().to_string(),
|
||||
confidence: AutoModelHeuristicConfidence::Decisive,
|
||||
};
|
||||
}
|
||||
let long_threshold = if cost_saving { 1_000 } else { 500 };
|
||||
if len > long_threshold {
|
||||
return AutoModelHeuristicSelection {
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
model: candidates.big.clone(),
|
||||
confidence: AutoModelHeuristicConfidence::Decisive,
|
||||
};
|
||||
}
|
||||
|
||||
AutoModelHeuristicSelection {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
model: candidates.cheap_or_big().to_string(),
|
||||
confidence: AutoModelHeuristicConfidence::Ambiguous,
|
||||
}
|
||||
}
|
||||
@@ -148,26 +228,50 @@ pub(crate) struct AutoRouteSelection {
|
||||
pub(crate) source: AutoRouteSource,
|
||||
}
|
||||
|
||||
const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
|
||||
You are the codewhale auto-routing classifier. Return only compact JSON: \
|
||||
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
|
||||
Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
|
||||
Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
|
||||
/// Render the auto-router system prompt with the actual candidate ids
|
||||
/// (#3018): the classifier must answer with ids the active provider can
|
||||
/// serve, not hardcoded DeepSeek spellings.
|
||||
pub(crate) fn auto_router_system_prompt(
|
||||
candidates: &RouterCandidates,
|
||||
cost_saving: bool,
|
||||
) -> String {
|
||||
let cheap = candidates.cheap_or_big();
|
||||
let big = &candidates.big;
|
||||
let mut prompt = format!(
|
||||
"You are the codewhale auto-routing classifier. Return only compact JSON: \
|
||||
{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \
|
||||
Use {cheap} for trivial, conversational, status, or single-step work. \
|
||||
Use {big} for coding, debugging, release work, multi-step tasks, high-risk decisions, \
|
||||
tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
|
||||
Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
|
||||
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
|
||||
|
||||
const AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM: &str = "\
|
||||
\n\nCost-saving mode is ON. Prefer deepseek-v4-flash for any request that is \
|
||||
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work."
|
||||
);
|
||||
if cost_saving {
|
||||
prompt.push_str(&format!(
|
||||
"\n\nCost-saving mode is ON. Prefer {cheap} for any request that is \
|
||||
not unmistakably agentic, multi-step, architecture/design, security review, \
|
||||
debugging, or otherwise clearly out of Flash's capability. Resolve ambiguous \
|
||||
cases in favour of deepseek-v4-flash, not deepseek-v4-pro.";
|
||||
debugging, or otherwise clearly out of the cheap tier's capability. Resolve \
|
||||
ambiguous cases in favour of {cheap}, not {big}."
|
||||
));
|
||||
}
|
||||
prompt
|
||||
}
|
||||
|
||||
/// DeepSeek-candidate wrapper kept for the legacy parser tests; the
|
||||
/// network router parses with [`parse_auto_route_recommendation_for_candidates`].
|
||||
#[cfg(test)]
|
||||
pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
|
||||
parse_auto_route_recommendation_for_candidates(raw, &RouterCandidates::deepseek())
|
||||
}
|
||||
|
||||
pub(crate) fn parse_auto_route_recommendation_for_candidates(
|
||||
raw: &str,
|
||||
candidates: &RouterCandidates,
|
||||
) -> Option<AutoRouteRecommendation> {
|
||||
let json = extract_first_json_object(raw)?;
|
||||
let value: serde_json::Value = serde_json::from_str(json).ok()?;
|
||||
let model = value.get("model").and_then(serde_json::Value::as_str)?;
|
||||
let model = normalize_auto_route_model(model)?;
|
||||
let model = normalize_auto_route_model(model, candidates)?;
|
||||
let reasoning_effort = value
|
||||
.get("thinking")
|
||||
.or_else(|| value.get("reasoning_effort"))
|
||||
@@ -176,7 +280,7 @@ pub(crate) fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteReco
|
||||
.and_then(parse_auto_route_reasoning_effort);
|
||||
|
||||
Some(AutoRouteRecommendation {
|
||||
model: model.to_string(),
|
||||
model,
|
||||
reasoning_effort,
|
||||
})
|
||||
}
|
||||
@@ -187,10 +291,21 @@ fn extract_first_json_object(raw: &str) -> Option<&str> {
|
||||
(end >= start).then_some(&raw[start..=end])
|
||||
}
|
||||
|
||||
fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
|
||||
match model.trim().to_ascii_lowercase().as_str() {
|
||||
"deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
|
||||
"deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
|
||||
fn normalize_auto_route_model(model: &str, candidates: &RouterCandidates) -> Option<String> {
|
||||
let normalized = model.trim().to_ascii_lowercase();
|
||||
// Exact candidate ids, case-insensitively (#3018).
|
||||
if normalized == candidates.big.to_ascii_lowercase() {
|
||||
return Some(candidates.big.clone());
|
||||
}
|
||||
if let Some(cheap) = candidates.cheap.as_deref()
|
||||
&& normalized == cheap.to_ascii_lowercase()
|
||||
{
|
||||
return Some(cheap.to_string());
|
||||
}
|
||||
// Legacy pro/flash shorthand maps onto the big/cheap tiers.
|
||||
match normalized.as_str() {
|
||||
"deepseek-v4-pro" | "v4-pro" | "pro" => Some(candidates.big.clone()),
|
||||
"deepseek-v4-flash" | "v4-flash" | "flash" => Some(candidates.cheap_or_big().to_string()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -221,14 +336,29 @@ pub(crate) async fn resolve_auto_route_with_flash(
|
||||
selected_thinking_mode: &str,
|
||||
) -> AutoRouteSelection {
|
||||
let cost_saving = config.auto_cost_saving();
|
||||
let heuristic =
|
||||
auto_model_heuristic_selection_with_bias(latest_request, selected_model_mode, cost_saving);
|
||||
// #3018: derive the candidate pair from the active provider. The
|
||||
// config-resolved default model stands in for the session model — with
|
||||
// auto mode on, that is the canonical id the provider serves.
|
||||
let candidates = provider_router_candidates(config.api_provider(), &config.default_model());
|
||||
let heuristic = auto_model_heuristic_selection_with_bias(
|
||||
latest_request,
|
||||
selected_model_mode,
|
||||
cost_saving,
|
||||
&candidates,
|
||||
);
|
||||
if heuristic.confidence == AutoModelHeuristicConfidence::Decisive {
|
||||
return auto_route_from_heuristic(latest_request, heuristic);
|
||||
}
|
||||
|
||||
// #1549/#3018: no cheap tier → no network round-trip. The heuristic is
|
||||
// the only signal and the routed model stays on the session model.
|
||||
if candidates.cheap.is_none() {
|
||||
return auto_route_from_heuristic(latest_request, heuristic);
|
||||
}
|
||||
|
||||
match auto_route_flash_recommendation(
|
||||
config,
|
||||
&candidates,
|
||||
latest_request,
|
||||
recent_context,
|
||||
selected_model_mode,
|
||||
@@ -261,6 +391,7 @@ fn auto_route_from_heuristic(
|
||||
|
||||
async fn auto_route_flash_recommendation(
|
||||
config: &Config,
|
||||
candidates: &RouterCandidates,
|
||||
latest_request: &str,
|
||||
recent_context: &str,
|
||||
selected_model_mode: &str,
|
||||
@@ -269,14 +400,16 @@ async fn auto_route_flash_recommendation(
|
||||
if cfg!(test) {
|
||||
return Ok(None);
|
||||
}
|
||||
let Some(cheap_model) = candidates.cheap.clone() else {
|
||||
// Callers skip the router when there is no cheap tier; this is a
|
||||
// defensive second gate so a future caller cannot 400 the provider.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let client = DeepSeekClient::new(config)?;
|
||||
let mut router_system = AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string();
|
||||
if config.auto_cost_saving() {
|
||||
router_system.push_str(AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM);
|
||||
}
|
||||
let router_system = auto_router_system_prompt(candidates, config.auto_cost_saving());
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
model: cheap_model,
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
@@ -303,9 +436,10 @@ async fn auto_route_flash_recommendation(
|
||||
|
||||
let response =
|
||||
tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
|
||||
Ok(parse_auto_route_recommendation(&message_response_text(
|
||||
&response,
|
||||
)))
|
||||
Ok(parse_auto_route_recommendation_for_candidates(
|
||||
&message_response_text(&response),
|
||||
candidates,
|
||||
))
|
||||
}
|
||||
|
||||
fn auto_route_prompt(
|
||||
@@ -416,7 +550,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn auto_heuristic_selection_marks_short_and_complex_routes_decisive() {
|
||||
let short = auto_model_heuristic_selection_with_bias("yes", "auto", false);
|
||||
let short = auto_model_heuristic_selection_with_bias(
|
||||
"yes",
|
||||
"auto",
|
||||
false,
|
||||
&RouterCandidates::deepseek(),
|
||||
);
|
||||
assert_eq!(short.model, "deepseek-v4-flash");
|
||||
assert_eq!(
|
||||
short.confidence,
|
||||
@@ -428,6 +567,7 @@ mod tests {
|
||||
"Please review the auth migration",
|
||||
"auto",
|
||||
false,
|
||||
&RouterCandidates::deepseek(),
|
||||
);
|
||||
assert_eq!(complex.model, "deepseek-v4-pro");
|
||||
assert_eq!(
|
||||
@@ -446,7 +586,12 @@ mod tests {
|
||||
"test request must stay in the default grey zone"
|
||||
);
|
||||
|
||||
let selection = auto_model_heuristic_selection_with_bias(&request, "auto", false);
|
||||
let selection = auto_model_heuristic_selection_with_bias(
|
||||
&request,
|
||||
"auto",
|
||||
false,
|
||||
&RouterCandidates::deepseek(),
|
||||
);
|
||||
assert_eq!(selection.model, "deepseek-v4-flash");
|
||||
assert_eq!(
|
||||
selection.confidence,
|
||||
@@ -550,6 +695,91 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_router_candidates_cover_known_provider_classes() {
|
||||
use crate::config::ApiProvider;
|
||||
|
||||
let deepseek = provider_router_candidates(ApiProvider::Deepseek, "deepseek-v4-pro");
|
||||
assert_eq!(deepseek.big, "deepseek-v4-pro");
|
||||
assert_eq!(deepseek.cheap.as_deref(), Some("deepseek-v4-flash"));
|
||||
|
||||
let openrouter =
|
||||
provider_router_candidates(ApiProvider::Openrouter, "deepseek/deepseek-v4-pro");
|
||||
assert_eq!(openrouter.big, "deepseek/deepseek-v4-pro");
|
||||
assert_eq!(
|
||||
openrouter.cheap.as_deref(),
|
||||
Some("deepseek/deepseek-v4-flash")
|
||||
);
|
||||
|
||||
// Providers without a known cheap tier: big = session model, no cheap.
|
||||
let ollama = provider_router_candidates(ApiProvider::Ollama, "qwen3:32b");
|
||||
assert_eq!(ollama.big, "qwen3:32b");
|
||||
assert_eq!(ollama.cheap, None);
|
||||
|
||||
let moonshot = provider_router_candidates(ApiProvider::Moonshot, "kimi-k2.6");
|
||||
assert_eq!(moonshot.big, "kimi-k2.6");
|
||||
assert_eq!(moonshot.cheap, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn heuristic_without_cheap_tier_always_returns_current_model() {
|
||||
// #3018 AC: Ollama + auto must never fabricate a DeepSeek id.
|
||||
let candidates = RouterCandidates {
|
||||
big: "qwen3:32b".to_string(),
|
||||
cheap: None,
|
||||
};
|
||||
for prompt in [
|
||||
"hi",
|
||||
"please refactor the auth module for security",
|
||||
&"long filler sentence. ".repeat(60),
|
||||
] {
|
||||
let model = auto_model_heuristic_for_candidates(prompt, "qwen3:32b", &candidates);
|
||||
assert_eq!(model, "qwen3:32b", "prompt {prompt:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_route_parser_accepts_candidate_ids_and_legacy_shorthand() {
|
||||
let candidates = RouterCandidates {
|
||||
big: "deepseek/deepseek-v4-pro".to_string(),
|
||||
cheap: Some("deepseek/deepseek-v4-flash".to_string()),
|
||||
};
|
||||
|
||||
let rec = parse_auto_route_recommendation_for_candidates(
|
||||
r#"{"model":"DeepSeek/DeepSeek-V4-Pro","thinking":"max"}"#,
|
||||
&candidates,
|
||||
)
|
||||
.expect("exact candidate id parses case-insensitively");
|
||||
assert_eq!(rec.model, "deepseek/deepseek-v4-pro");
|
||||
|
||||
let rec = parse_auto_route_recommendation_for_candidates(
|
||||
r#"{"model":"flash","thinking":"off"}"#,
|
||||
&candidates,
|
||||
)
|
||||
.expect("legacy shorthand maps onto the cheap tier");
|
||||
assert_eq!(rec.model, "deepseek/deepseek-v4-flash");
|
||||
|
||||
assert!(
|
||||
parse_auto_route_recommendation_for_candidates(
|
||||
r#"{"model":"gpt-4o","thinking":"high"}"#,
|
||||
&candidates,
|
||||
)
|
||||
.is_none(),
|
||||
"non-candidate ids are rejected"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_router_system_prompt_names_candidate_ids() {
|
||||
let candidates = RouterCandidates {
|
||||
big: "deepseek/deepseek-v4-pro".to_string(),
|
||||
cheap: Some("deepseek/deepseek-v4-flash".to_string()),
|
||||
};
|
||||
let prompt = auto_router_system_prompt(&candidates, true);
|
||||
assert!(prompt.contains("deepseek/deepseek-v4-flash|deepseek/deepseek-v4-pro"));
|
||||
assert!(prompt.contains("Cost-saving mode is ON"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn config_auto_cost_saving_defaults_to_false() {
|
||||
let cfg = Config::default();
|
||||
|
||||
@@ -2625,7 +2625,14 @@ impl ToolSpec for AgentSpawnTool {
|
||||
child_runtime.context.workspace = cwd;
|
||||
}
|
||||
let configured_model = match spawn_request.model.clone() {
|
||||
Some(model) => Some(model),
|
||||
// #3018 audit fix: spawn-time `model` must be validated and
|
||||
// canonicalized here — passing it through raw turns a typo into
|
||||
// an opaque provider 400 instead of a clear spawn-time error.
|
||||
Some(model) => Some(normalize_requested_subagent_model(
|
||||
&model,
|
||||
"model",
|
||||
self.runtime.client.api_provider(),
|
||||
)?),
|
||||
None => configured_model_for_role_or_type(
|
||||
&self.runtime,
|
||||
spawn_request.assignment.role.as_deref(),
|
||||
@@ -5153,7 +5160,15 @@ fn tool_agent_route(parent_model: &str, configured_model: Option<String>) -> Sub
|
||||
}
|
||||
|
||||
fn should_use_subagent_flash_router(runtime: &SubAgentRuntime) -> bool {
|
||||
runtime.auto_model
|
||||
// #3018: providers without a known cheap tier skip the network router
|
||||
// entirely — there is no alternative model worth a round-trip.
|
||||
runtime.auto_model && subagent_router_candidates(runtime).cheap.is_some()
|
||||
}
|
||||
|
||||
/// Candidate pair for the sub-agent router, derived from the active
|
||||
/// provider and the (already provider-resolved) parent model (#3018).
|
||||
fn subagent_router_candidates(runtime: &SubAgentRuntime) -> crate::model_routing::RouterCandidates {
|
||||
crate::model_routing::provider_router_candidates(runtime.client.api_provider(), &runtime.model)
|
||||
}
|
||||
|
||||
fn fallback_subagent_assignment_route(
|
||||
@@ -5164,7 +5179,13 @@ fn fallback_subagent_assignment_route(
|
||||
let model = if let Some(model) = configured_model {
|
||||
model
|
||||
} else if runtime.auto_model {
|
||||
crate::model_routing::auto_model_heuristic(prompt, &runtime.model)
|
||||
// #3018: candidate-aware — on providers without a cheap tier this
|
||||
// always resolves to the parent model instead of a DeepSeek id.
|
||||
crate::model_routing::auto_model_heuristic_for_candidates(
|
||||
prompt,
|
||||
&runtime.model,
|
||||
&subagent_router_candidates(runtime),
|
||||
)
|
||||
} else {
|
||||
runtime.model.clone()
|
||||
};
|
||||
@@ -5195,8 +5216,15 @@ async fn subagent_flash_router(
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let candidates = subagent_router_candidates(runtime);
|
||||
let Some(cheap_model) = candidates.cheap.clone() else {
|
||||
// should_use_subagent_flash_router already gates on this; defensive
|
||||
// second gate so the router can never 400 a cheap-tier-less provider.
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
model: cheap_model,
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
@@ -5205,9 +5233,9 @@ async fn subagent_flash_router(
|
||||
}],
|
||||
}],
|
||||
max_tokens: 96,
|
||||
system: Some(SystemPrompt::Text(
|
||||
SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(),
|
||||
)),
|
||||
system: Some(SystemPrompt::Text(subagent_router_system_prompt(
|
||||
&candidates,
|
||||
))),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
@@ -5223,21 +5251,31 @@ async fn subagent_flash_router(
|
||||
runtime.client.create_message(request),
|
||||
)
|
||||
.await??;
|
||||
Ok(crate::model_routing::parse_auto_route_recommendation(
|
||||
&message_response_text(&response.content),
|
||||
))
|
||||
Ok(
|
||||
crate::model_routing::parse_auto_route_recommendation_for_candidates(
|
||||
&message_response_text(&response.content),
|
||||
&candidates,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\
|
||||
You are the codewhale sub-agent routing manager. Return only compact JSON: \
|
||||
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
|
||||
/// Render the sub-agent router system prompt from the actual candidate ids
|
||||
/// (#3018) so the classifier answers with ids the active provider serves.
|
||||
fn subagent_router_system_prompt(candidates: &crate::model_routing::RouterCandidates) -> String {
|
||||
let cheap = candidates.cheap_or_big();
|
||||
let big = &candidates.big;
|
||||
format!(
|
||||
"You are the codewhale sub-agent routing manager. Return only compact JSON: \
|
||||
{{\"model\":\"{cheap}|{big}\",\"thinking\":\"off|high|max\"}}. \
|
||||
Treat each child assignment like a customer request entering a team queue: decide the least \
|
||||
sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \
|
||||
important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \
|
||||
Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \
|
||||
important by itself. Use {cheap} for trivial, read-only, status, lookup, or single-step work. \
|
||||
Use {big} for coding, debugging, release work, multi-file changes, security, architecture, \
|
||||
high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \
|
||||
off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \
|
||||
multi-step, uncertain, or tool-heavy work.";
|
||||
multi-step, uncertain, or tool-heavy work."
|
||||
)
|
||||
}
|
||||
|
||||
fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String {
|
||||
format!(
|
||||
|
||||
@@ -2642,6 +2642,35 @@ fn stub_runtime() -> SubAgentRuntime {
|
||||
/// *some* `DeepSeekClient` because `SubAgentRuntime.client` isn't
|
||||
/// `Option<...>`. `Config::default()` is enough — `DeepSeekClient::new`
|
||||
/// only validates that an API key field exists, not that the key works.
|
||||
fn stub_runtime_for_provider(provider: &str) -> SubAgentRuntime {
|
||||
let mut runtime = stub_runtime();
|
||||
runtime.client = stub_client_for_provider(provider);
|
||||
runtime
|
||||
}
|
||||
|
||||
fn stub_client_for_provider(provider: &str) -> DeepSeekClient {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
let mut providers = crate::config::ProvidersConfig::default();
|
||||
match provider {
|
||||
"moonshot" => {
|
||||
providers.moonshot = crate::config::ProviderConfig {
|
||||
api_key: Some("test-key".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
}
|
||||
// Ollama is keyless (local runtime); extend per-provider as needed.
|
||||
"ollama" => {}
|
||||
other => panic!("extend stub_client_for_provider for provider {other}"),
|
||||
}
|
||||
let config = crate::config::Config {
|
||||
api_key: Some("test-key".to_string()),
|
||||
provider: Some(provider.to_string()),
|
||||
providers: Some(providers),
|
||||
..crate::config::Config::default()
|
||||
};
|
||||
DeepSeekClient::new(&config).expect("stub client should construct")
|
||||
}
|
||||
|
||||
fn stub_client() -> DeepSeekClient {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
let config = crate::config::Config {
|
||||
@@ -3173,3 +3202,101 @@ fn model_catalog_only_advertises_canonical_subagent_tools() {
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── #3018: provider-aware auto routing and model validation ─────────────────
|
||||
|
||||
#[tokio::test]
|
||||
async fn auto_route_on_provider_without_cheap_tier_stays_on_parent_model() {
|
||||
// AC: Ollama + auto-model must never build a request with a DeepSeek id;
|
||||
// the routed model equals the session model for any prompt class.
|
||||
let mut runtime = stub_runtime_for_provider("ollama").with_auto_model(true);
|
||||
runtime.model = "qwen3:32b".to_string();
|
||||
|
||||
for prompt in ["hi", "please refactor the whole auth module for security"] {
|
||||
let route =
|
||||
resolve_subagent_assignment_route(&runtime, None, prompt, &SubAgentType::General).await;
|
||||
assert_eq!(route.model, "qwen3:32b", "prompt {prompt:?}");
|
||||
assert!(
|
||||
!route.model.contains("deepseek"),
|
||||
"no DeepSeek id may be fabricated: {route:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flash_router_gate_requires_cheap_tier() {
|
||||
let deepseek = stub_runtime().with_auto_model(true);
|
||||
assert!(
|
||||
should_use_subagent_flash_router(&deepseek),
|
||||
"DeepSeek keeps the network router"
|
||||
);
|
||||
|
||||
let mut moonshot = stub_runtime_for_provider("moonshot").with_auto_model(true);
|
||||
moonshot.model = "kimi-k2.6".to_string();
|
||||
assert!(
|
||||
!should_use_subagent_flash_router(&moonshot),
|
||||
"providers without a cheap tier skip the network router"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn role_model_validation_accepts_provider_native_ids() {
|
||||
// AC: [subagents] worker_model = "kimi-k2.5" on Moonshot must not fail
|
||||
// with "Expected a DeepSeek model id".
|
||||
let mut runtime = stub_runtime_for_provider("moonshot");
|
||||
runtime
|
||||
.role_models
|
||||
.insert("worker".to_string(), "kimi-k2.5".to_string());
|
||||
|
||||
let model = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General)
|
||||
.expect("provider-native id is accepted");
|
||||
assert_eq!(model.as_deref(), Some("kimi-k2.5"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn role_model_validation_stays_strict_on_official_deepseek() {
|
||||
let mut runtime = stub_runtime();
|
||||
runtime
|
||||
.role_models
|
||||
.insert("worker".to_string(), "kimi-k2.5".to_string());
|
||||
|
||||
let err = configured_model_for_role_or_type(&runtime, Some("worker"), &SubAgentType::General)
|
||||
.expect_err("non-DeepSeek id is rejected on the official API");
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("kimi-k2.5"), "names the bad id: {msg}");
|
||||
assert!(
|
||||
msg.contains("deepseek-v4-pro"),
|
||||
"lists accepted ids from model_completion_names_for_provider: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_requested_subagent_model_is_provider_aware() {
|
||||
assert_eq!(
|
||||
normalize_requested_subagent_model(
|
||||
"kimi-k2.5",
|
||||
"model",
|
||||
crate::config::ApiProvider::Moonshot
|
||||
)
|
||||
.expect("Moonshot accepts its own ids"),
|
||||
"kimi-k2.5"
|
||||
);
|
||||
assert_eq!(
|
||||
normalize_requested_subagent_model(
|
||||
"qwen3:32b",
|
||||
"model",
|
||||
crate::config::ApiProvider::Ollama
|
||||
)
|
||||
.expect("Ollama tags pass through"),
|
||||
"qwen3:32b"
|
||||
);
|
||||
assert!(
|
||||
normalize_requested_subagent_model(
|
||||
"kimi-k2.5",
|
||||
"model",
|
||||
crate::config::ApiProvider::Deepseek
|
||||
)
|
||||
.is_err(),
|
||||
"official DeepSeek API rejects foreign ids"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -124,6 +124,50 @@ cancelled records persist for inspection but don't occupy a slot.
|
||||
Agents that lost their `task_handle` (e.g. across a process
|
||||
restart) also don't count against the cap.
|
||||
|
||||
## Per-role models (#3018)
|
||||
|
||||
Children can run on a different model than the parent. Two config surfaces
|
||||
feed the same override map (`[subagents.models]` keys win on conflict, keys
|
||||
are case-insensitive):
|
||||
|
||||
```toml
|
||||
[subagents]
|
||||
default_model = "deepseek-v4-flash" # fallback for every role
|
||||
worker_model = "deepseek-v4-pro" # worker / general
|
||||
explorer_model = "deepseek-v4-flash" # explorer / explore
|
||||
awaiter_model = "deepseek-v4-flash" # awaiter / plan
|
||||
review_model = "deepseek-v4-pro" # review
|
||||
custom_model = "deepseek-v4-pro" # custom
|
||||
|
||||
[subagents.models]
|
||||
# Free-form role → model map; any role alias accepted by agent_open works.
|
||||
implementation = "deepseek-v4-pro"
|
||||
```
|
||||
|
||||
Model ids may be **any model the active provider accepts** — validation is
|
||||
provider-aware and happens at spawn time, not load time. On the official
|
||||
DeepSeek API only DeepSeek ids are accepted; every other provider passes the
|
||||
id through to the provider API, which is the authority. A non-DeepSeek
|
||||
example:
|
||||
|
||||
```toml
|
||||
provider = "moonshot"
|
||||
model = "kimi-k2.6"
|
||||
|
||||
[subagents]
|
||||
worker_model = "kimi-k2.5"
|
||||
```
|
||||
|
||||
Spawn-time `model` arguments on `agent_open` are validated the same way; an
|
||||
invalid id on the official DeepSeek API fails the spawn with the accepted-id
|
||||
list instead of an opaque provider 400.
|
||||
|
||||
With `/model auto`, sub-agent routing is provider-aware too: providers with a
|
||||
known big/cheap pair (DeepSeek, and the hosted DeepSeek routes on NVIDIA NIM,
|
||||
OpenRouter, Novita, SiliconFlow, SGLang, vLLM) route between that pair;
|
||||
providers without a known cheap tier (e.g. Ollama, Moonshot) skip the
|
||||
network router and keep children on the session model.
|
||||
|
||||
## Per-step API timeout (#1806, #1808)
|
||||
|
||||
Each sub-agent step wraps its DeepSeek `create_message` call in a
|
||||
|
||||
Reference in New Issue
Block a user