fix(exec): route auto through model inventory

Add a runnable provider/model inventory and use it for CLI auto routing so exec, review, ACP, and fleet worker subprocesses stop pairing stale root DeepSeek defaults with direct providers like Z.ai.

Forward codewhale fleet through the dispatcher instead of falling back to prompt mode, and tighten the runtime prompt policy for blocking choice questions after stale wakeups.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter B
2026-06-13 12:56:43 -07:00
parent 6a353d77de
commit 2de967c858
7 changed files with 726 additions and 57 deletions
+28
View File
@@ -200,6 +200,8 @@ and writes a SWE-bench-compatible JSONL prediction row from the resulting
working-tree diff. `export` only writes the current diff.
")]
Swebench(TuiPassthroughArgs),
/// Manage durable Agent Fleet runs via the TUI runtime.
Fleet(TuiPassthroughArgs),
/// Run a CodeWhale-powered code review over a git diff.
Review(TuiPassthroughArgs),
/// Apply a patch file or stdin to the working tree.
@@ -581,6 +583,10 @@ fn run() -> Result<()> {
let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides);
delegate_to_tui(&cli, &resolved_runtime, tui_args("swebench", args))
}
Some(Commands::Fleet(args)) => {
let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides);
delegate_to_tui(&cli, &resolved_runtime, tui_args("fleet", args))
}
Some(Commands::Review(args)) => {
let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides);
delegate_to_tui(&cli, &resolved_runtime, tui_args("review", args))
@@ -2414,6 +2420,28 @@ mod tests {
Some(Commands::Setup(TuiPassthroughArgs { ref args }))
if args == &["--skills", "--local"]
));
let cli = parse_ok(&["codewhale", "fleet", "init"]);
assert!(cli.prompt.is_empty());
assert!(matches!(
cli.command,
Some(Commands::Fleet(TuiPassthroughArgs { ref args })) if args == &["init"]
));
let cli = parse_ok(&[
"codewhale",
"fleet",
"run",
"tasks.json",
"--max-workers",
"2",
]);
assert!(cli.prompt.is_empty());
assert!(matches!(
cli.command,
Some(Commands::Fleet(TuiPassthroughArgs { ref args }))
if args == &["run", "tasks.json", "--max-workers", "2"]
));
}
#[test]
+5 -3
View File
@@ -181,11 +181,13 @@ impl AcpServer {
async fn run_prompt(&self, prompt: &str, cwd: &PathBuf) -> Result<String> {
let _cwd_guard = ScopedCurrentDir::new(cwd)?;
let client = DeepSeekClient::new(&self.config)?;
let route = crate::resolve_cli_auto_route(&self.config, &self.model, prompt).await;
let route = crate::resolve_cli_auto_route(&self.config, &self.model, prompt).await?;
let execution_config = crate::config_for_cli_route(&self.config, &route);
let client = DeepSeekClient::new(&execution_config)?;
let reasoning_effort = route
.reasoning_effort
.map(|effort| effort.as_setting().to_string());
.and_then(|effort| effort.api_value_for_provider(execution_config.api_provider()))
.map(str::to_string);
let request = MessageRequest {
model: route.model,
+38
View File
@@ -2639,6 +2639,7 @@ impl Config {
return model.trim().to_string();
}
if let Some(model) = self.default_text_model.as_deref()
&& !root_deepseek_model_is_foreign_to_direct_provider(provider, model)
&& let Some(normalized) = normalize_model_name_for_provider(provider, model)
{
return normalized;
@@ -3358,6 +3359,31 @@ impl Config {
}
}
fn root_deepseek_model_is_foreign_to_direct_provider(provider: ApiProvider, model: &str) -> bool {
if matches!(provider, ApiProvider::Deepseek | ApiProvider::DeepseekCN)
|| provider_passes_model_through(provider)
{
return false;
}
if matches!(
provider,
ApiProvider::NvidiaNim
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Siliconflow
| ApiProvider::SiliconflowCn
| ApiProvider::Sglang
| ApiProvider::Vllm
| ApiProvider::Volcengine
| ApiProvider::Atlascloud
| ApiProvider::WanjieArk
) {
return false;
}
normalize_model_name(model).is_some()
}
// === Defaults ===
fn default_config_path() -> Option<PathBuf> {
@@ -9136,6 +9162,18 @@ http_headers = { "X-Model-Provider-Id" = "from-file" }
assert_eq!(pinned.default_model(), "gpt-5.5-codex-preview");
}
#[test]
fn direct_provider_ignores_foreign_deepseek_root_default_model() {
let config = Config {
provider: Some("zai".to_string()),
default_text_model: Some(DEFAULT_TEXT_MODEL.to_string()),
..Default::default()
};
assert_eq!(config.api_provider(), ApiProvider::Zai);
assert_eq!(config.default_model(), DEFAULT_ZAI_MODEL);
}
#[test]
fn insecure_skip_tls_verify_is_scoped_to_active_provider() {
let mut providers = ProvidersConfig::default();
+137 -53
View File
@@ -49,6 +49,7 @@ mod lsp;
mod mcp;
mod mcp_server;
mod memory;
mod model_inventory;
mod model_routing;
mod models;
mod network_policy;
@@ -4762,11 +4763,12 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> {
.model
.or_else(|| config.default_text_model.clone())
.unwrap_or_else(|| config.default_model());
let route = resolve_cli_auto_route(config, &model, &diff).await;
let model = route.model;
let route = resolve_cli_auto_route(config, &model, &diff).await?;
let execution_config = config_for_cli_route(config, &route);
let model = route.model.clone();
let reasoning_effort = route
.reasoning_effort
.and_then(|effort| cli_reasoning_effort_value(config, effort));
.and_then(|effort| cli_reasoning_effort_value(&execution_config, effort));
let system = SystemPrompt::Text(
"You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \
@@ -4776,7 +4778,7 @@ Provide findings ordered by severity with file references, then open questions,
let user_prompt =
format!("Review the following diff and provide feedback:\n\n{diff}\n\nEnd of diff.");
let client = DeepSeekClient::new(config)?;
let client = DeepSeekClient::new(&execution_config)?;
let request = MessageRequest {
model: model.clone(),
messages: vec![Message {
@@ -6096,6 +6098,7 @@ async fn run_interactive(
}
struct CliAutoRoute {
provider: crate::config::ApiProvider,
model: String,
reasoning_effort: Option<crate::tui::app::ReasoningEffort>,
auto_model: bool,
@@ -6110,15 +6113,36 @@ fn cli_reasoning_effort_value(
.map(str::to_string)
}
async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute {
fn config_for_cli_route(config: &Config, route: &CliAutoRoute) -> Config {
let mut execution_config = config.clone();
execution_config.provider = Some(route.provider.as_str().to_string());
execution_config
.provider_config_for_mut(route.provider)
.model = Some(route.model.clone());
if matches!(
route.provider,
crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN
) {
execution_config.default_text_model = Some(route.model.clone());
}
execution_config
}
async fn resolve_cli_auto_route(
config: &Config,
model: &str,
prompt: &str,
) -> Result<CliAutoRoute> {
if model.trim().eq_ignore_ascii_case("auto") {
let selection =
model_routing::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await;
CliAutoRoute {
model_routing::resolve_auto_route_with_inventory(config, prompt, "", "auto", "auto")
.await?;
Ok(CliAutoRoute {
provider: selection.provider,
model: selection.model,
reasoning_effort: selection.reasoning_effort,
auto_model: true,
}
})
} else {
// When --model is not `auto`, fall back to the reasoning_effort
// declared in the user's config.toml. The previous hard-coded `None`
@@ -6126,13 +6150,14 @@ async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> C
// call, which (for example) prevented vllm + Qwen3 users from
// disabling thinking via `reasoning_effort = "off"` and caused
// 30+ second SSE idle timeouts on trivial prompts.
CliAutoRoute {
Ok(CliAutoRoute {
provider: config.api_provider(),
model: model.to_string(),
reasoning_effort: config
.reasoning_effort()
.map(crate::tui::app::ReasoningEffort::from_setting),
auto_model: false,
}
})
}
}
@@ -6140,11 +6165,12 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()>
use crate::client::DeepSeekClient;
use crate::models::{ContentBlock, Message, MessageRequest};
let client = DeepSeekClient::new(config)?;
let route = resolve_cli_auto_route(config, model, prompt).await;
let route = resolve_cli_auto_route(config, model, prompt).await?;
let execution_config = config_for_cli_route(config, &route);
let client = DeepSeekClient::new(&execution_config)?;
let reasoning_effort = route
.reasoning_effort
.and_then(|effort| cli_reasoning_effort_value(config, effort));
.and_then(|effort| cli_reasoning_effort_value(&execution_config, effort));
let request = MessageRequest {
model: route.model,
@@ -6182,12 +6208,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
use crate::client::DeepSeekClient;
use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt};
let client = DeepSeekClient::new(config)?;
let route = resolve_cli_auto_route(config, model, prompt).await;
let model = route.model;
let route = resolve_cli_auto_route(config, model, prompt).await?;
let execution_config = config_for_cli_route(config, &route);
let client = DeepSeekClient::new(&execution_config)?;
let model = route.model.clone();
let reasoning_effort = route
.reasoning_effort
.and_then(|effort| cli_reasoning_effort_value(config, effort));
.and_then(|effort| cli_reasoning_effort_value(&execution_config, effort));
let request = MessageRequest {
model: model.clone(),
messages: vec![Message {
@@ -6358,12 +6385,13 @@ async fn run_exec_agent(
use crate::tools::todo::new_shared_todo_list;
use crate::tui::app::AppMode;
let route = resolve_cli_auto_route(config, model, prompt).await;
let route = resolve_cli_auto_route(config, model, prompt).await?;
let execution_config = config_for_cli_route(config, &route);
let auto_model = route.auto_model;
let effective_model = route.model;
let effective_reasoning_effort = route
.reasoning_effort
.and_then(|effort| cli_reasoning_effort_value(config, effort));
.and_then(|effort| cli_reasoning_effort_value(&execution_config, effort));
let settings = crate::settings::Settings::load().unwrap_or_default();
let auto_compact_enabled = if crate::settings::Settings::auto_compact_explicitly_configured() {
@@ -6381,24 +6409,24 @@ async fn run_exec_agent(
..Default::default()
};
let network_policy = config.network.clone().map(|toml_cfg| {
let network_policy = execution_config.network.clone().map(|toml_cfg| {
crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime())
});
let lsp_config = config
let lsp_config = execution_config
.lsp
.clone()
.map(crate::config::LspConfigToml::into_runtime);
let engine_config = EngineConfig {
model: effective_model.clone(),
workspace: workspace.clone(),
allow_shell: auto_approve || config.allow_shell(),
allow_shell: auto_approve || execution_config.allow_shell(),
trust_mode,
notes_path: config.notes_path(),
mcp_config_path: config.mcp_config_path(),
skills_dir: config.skills_dir(),
notes_path: execution_config.notes_path(),
mcp_config_path: execution_config.mcp_config_path(),
skills_dir: execution_config.skills_dir(),
instructions: {
let mut instrs: Vec<crate::prompts::InstructionSource> = config
let mut instrs: Vec<crate::prompts::InstructionSource> = execution_config
.instructions_paths()
.into_iter()
.map(Into::into)
@@ -6411,39 +6439,45 @@ async fn run_exec_agent(
}
instrs
},
project_context_pack_enabled: config.project_context_pack_enabled(),
project_context_pack_enabled: execution_config.project_context_pack_enabled(),
translation_enabled: false,
show_thinking: settings.show_thinking,
max_steps: max_turns,
max_subagents,
interactive_launch_limit: config.interactive_launch_limit(),
features: config.features(),
interactive_launch_limit: execution_config.interactive_launch_limit(),
features: execution_config.features(),
compaction,
capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config),
capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(
&execution_config,
),
todos: new_shared_todo_list(),
plan_state: new_shared_plan_state(),
goal_state: crate::tools::goal::new_shared_goal_state(),
max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH,
network_policy,
snapshots_enabled: config.snapshots_config().enabled,
snapshots_max_workspace_bytes: config
snapshots_enabled: execution_config.snapshots_config().enabled,
snapshots_max_workspace_bytes: execution_config
.snapshots_config()
.max_workspace_gb
.saturating_mul(1024 * 1024 * 1024),
lsp_config,
runtime_services: crate::tools::spec::RuntimeToolServices::default(),
subagent_model_overrides: config.subagent_model_overrides(),
subagent_api_timeout: std::time::Duration::from_secs(config.subagent_api_timeout_secs()),
stream_chunk_timeout: std::time::Duration::from_secs(config.stream_chunk_timeout_secs()),
subagent_heartbeat_timeout: std::time::Duration::from_secs(
config.subagent_heartbeat_timeout_secs(),
subagent_model_overrides: execution_config.subagent_model_overrides(),
subagent_api_timeout: std::time::Duration::from_secs(
execution_config.subagent_api_timeout_secs(),
),
prefer_bwrap: config.prefer_bwrap.unwrap_or(false),
memory_enabled: config.memory_enabled(),
memory_path: config.memory_path(),
speech_output_dir: config.speech_output_dir(),
vision_config: config.vision_model_config(),
strict_tool_mode: config.strict_tool_mode.unwrap_or(false),
stream_chunk_timeout: std::time::Duration::from_secs(
execution_config.stream_chunk_timeout_secs(),
),
subagent_heartbeat_timeout: std::time::Duration::from_secs(
execution_config.subagent_heartbeat_timeout_secs(),
),
prefer_bwrap: execution_config.prefer_bwrap.unwrap_or(false),
memory_enabled: execution_config.memory_enabled(),
memory_path: execution_config.memory_path(),
speech_output_dir: execution_config.speech_output_dir(),
vision_config: execution_config.vision_model_config(),
strict_tool_mode: execution_config.strict_tool_mode.unwrap_or(false),
goal_objective: None,
goal_token_budget: None,
goal_status: crate::tools::goal::GoalStatus::Active,
@@ -6454,15 +6488,21 @@ async fn run_exec_agent(
.tag()
.to_string(),
workshop: config.workshop.clone(),
search_provider: config.search_provider(),
search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()),
search_base_url: config.search.as_ref().and_then(|s| s.base_url.clone()),
tools_always_load: config.tools_always_load(),
tools: config.tools.clone(),
verbosity: config.verbosity.clone(),
search_provider: execution_config.search_provider(),
search_api_key: execution_config
.search
.as_ref()
.and_then(|s| s.api_key.clone()),
search_base_url: execution_config
.search
.as_ref()
.and_then(|s| s.base_url.clone()),
tools_always_load: execution_config.tools_always_load(),
tools: execution_config.tools.clone(),
verbosity: execution_config.verbosity.clone(),
};
let engine_handle = spawn_engine(engine_config, config);
let engine_handle = spawn_engine(engine_config, &execution_config);
let mode = if auto_approve {
AppMode::Yolo
} else {
@@ -6514,7 +6554,7 @@ async fn run_exec_agent(
reasoning_effort: effective_reasoning_effort,
reasoning_effort_auto: auto_model,
auto_model,
allow_shell: auto_approve || config.allow_shell(),
allow_shell: auto_approve || execution_config.allow_shell(),
trust_mode,
auto_approve,
translation_enabled: false,
@@ -6522,13 +6562,13 @@ async fn run_exec_agent(
approval_mode: if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
config
execution_config
.approval_policy
.as_deref()
.and_then(crate::tui::approval::ApprovalMode::from_config_value)
.unwrap_or_default()
},
verbosity: config.verbosity.clone(),
verbosity: execution_config.verbosity.clone(),
})
.await?;
@@ -7246,6 +7286,50 @@ mod terminal_mode_tests {
assert_eq!(resolve_exec_model(&config, None), "auto");
}
#[test]
fn exec_model_resolution_uses_provider_safe_default_for_zai() {
let _env_lock = crate::test_support::lock_test_env();
let _codewhale_model = crate::test_support::EnvVarGuard::remove("CODEWHALE_MODEL");
let _deepseek_model = crate::test_support::EnvVarGuard::remove("DEEPSEEK_MODEL");
let config = Config {
provider: Some("zai".to_string()),
default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()),
..Default::default()
};
assert_eq!(
resolve_exec_model(&config, None),
crate::config::DEFAULT_ZAI_MODEL
);
}
#[test]
fn cli_route_execution_config_stamps_routed_model_into_provider_slot() {
let mut providers = crate::config::ProvidersConfig::default();
providers.deepseek.model = Some("deepseek-v4-pro".to_string());
let config = Config {
provider: Some("deepseek".to_string()),
providers: Some(providers),
..Default::default()
};
let route = CliAutoRoute {
provider: crate::config::ApiProvider::Deepseek,
model: "deepseek-v4-flash".to_string(),
reasoning_effort: None,
auto_model: true,
};
let execution_config = config_for_cli_route(&config, &route);
assert_eq!(execution_config.default_model(), "deepseek-v4-flash");
assert_eq!(
execution_config
.provider_config_for(crate::config::ApiProvider::Deepseek)
.and_then(|entry| entry.model.as_deref()),
Some("deepseek-v4-flash")
);
}
#[test]
fn exec_accepts_split_prompt_words_for_windows_cmd_shims() {
let cli = parse_cli(&["codewhale", "exec", "hello", "world"]);
+309
View File
@@ -0,0 +1,309 @@
//! Provider/model inventory for routing policy.
//!
//! This is the high-level "what can this user actually run?" object. Auto
//! routing, fleet workers, and sub-agent policy should consume this shape
//! instead of guessing model strings from global defaults.
use serde::Serialize;
use crate::config::{
ApiProvider, Config, has_api_key_for, model_completion_names_for_provider,
normalize_model_name_for_provider, provider_capability,
};
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
#[serde(rename_all = "snake_case")]
pub(crate) enum ModelAuthSource {
Config,
Env,
OAuthCli,
KeylessLocal,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub(crate) struct ModelRouteCandidate {
pub(crate) provider: ApiProvider,
pub(crate) provider_name: &'static str,
pub(crate) provider_display_name: &'static str,
pub(crate) model: String,
pub(crate) context_window: u32,
pub(crate) max_output: u32,
pub(crate) thinking_supported: bool,
pub(crate) cache_telemetry_supported: bool,
pub(crate) auth_source: ModelAuthSource,
pub(crate) default_for_provider: bool,
pub(crate) tags: Vec<&'static str>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub(crate) struct ModelInventory {
pub(crate) active_provider: ApiProvider,
pub(crate) router_provider: ApiProvider,
pub(crate) router_model: &'static str,
pub(crate) router_available: bool,
pub(crate) candidates: Vec<ModelRouteCandidate>,
}
impl ModelInventory {
pub(crate) fn from_config(config: &Config) -> Self {
let active_provider = config.api_provider();
let mut candidates = Vec::new();
for provider in ApiProvider::all().iter().copied() {
let Some(auth_source) = auth_source_for_provider(config, provider) else {
continue;
};
let default_model = provider_default_model(config, provider);
let mut models = Vec::<String>::new();
if let Some(model) = configured_model_for_provider(config, provider) {
push_model(&mut models, provider, &model);
}
if provider == active_provider {
let active_model = config.default_model();
if !active_model.trim().eq_ignore_ascii_case("auto") {
push_model(&mut models, provider, &active_model);
}
}
for model in model_completion_names_for_provider(provider) {
push_model(&mut models, provider, model);
}
if models.is_empty() {
push_model(&mut models, provider, &default_model);
}
for model in models {
let capability = provider_capability(provider, &model);
let mut tags = Vec::new();
if capability.context_window >= 1_000_000 {
tags.push("long_context");
}
if capability.thinking_supported {
tags.push("thinking");
}
if matches!(
provider,
ApiProvider::Ollama | ApiProvider::Sglang | ApiProvider::Vllm
) {
tags.push("local");
}
if model.eq_ignore_ascii_case(&default_model) {
tags.push("default");
}
candidates.push(ModelRouteCandidate {
provider,
provider_name: provider.as_str(),
provider_display_name: provider.display_name(),
default_for_provider: model.eq_ignore_ascii_case(&default_model),
model,
context_window: capability.context_window,
max_output: capability.max_output,
thinking_supported: capability.thinking_supported,
cache_telemetry_supported: capability.cache_telemetry_supported,
auth_source: auth_source.clone(),
tags,
});
}
}
Self {
active_provider,
router_provider: ApiProvider::Deepseek,
router_model: "deepseek-v4-flash",
router_available: has_api_key_for(config, ApiProvider::Deepseek),
candidates,
}
}
pub(crate) fn candidate(
&self,
provider: ApiProvider,
model: &str,
) -> Option<&ModelRouteCandidate> {
self.candidates.iter().find(|candidate| {
candidate.provider == provider && candidate.model.eq_ignore_ascii_case(model.trim())
})
}
pub(crate) fn active_default(&self) -> Option<&ModelRouteCandidate> {
self.candidates
.iter()
.find(|candidate| {
candidate.provider == self.active_provider && candidate.default_for_provider
})
.or_else(|| {
self.candidates
.iter()
.find(|candidate| candidate.provider == self.active_provider)
})
.or_else(|| self.candidates.first())
}
pub(crate) fn router_context_json(&self) -> String {
serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string())
}
}
fn push_model(models: &mut Vec<String>, provider: ApiProvider, model: &str) {
let Some(model) = normalize_model_name_for_provider(provider, model)
.or_else(|| crate::config::normalize_custom_model_id(model))
else {
return;
};
if !models
.iter()
.any(|existing| existing.eq_ignore_ascii_case(&model))
{
models.push(model);
}
}
fn configured_model_for_provider(config: &Config, provider: ApiProvider) -> Option<String> {
config
.provider_config_for(provider)
.and_then(|entry| entry.model.clone())
.map(|model| model.trim().to_string())
.filter(|model| !model.is_empty())
}
fn provider_default_model(config: &Config, provider: ApiProvider) -> String {
if provider == config.api_provider() {
let model = config.default_model();
if !model.trim().eq_ignore_ascii_case("auto") {
return model;
}
}
model_completion_names_for_provider(provider)
.first()
.copied()
.unwrap_or_else(|| match provider {
ApiProvider::Ollama => crate::config::DEFAULT_OLLAMA_MODEL,
ApiProvider::Sglang => crate::config::DEFAULT_SGLANG_MODEL,
ApiProvider::Vllm => crate::config::DEFAULT_VLLM_MODEL,
_ => crate::config::DEFAULT_TEXT_MODEL,
})
.to_string()
}
fn auth_source_for_provider(config: &Config, provider: ApiProvider) -> Option<ModelAuthSource> {
if matches!(
provider,
ApiProvider::Ollama | ApiProvider::Sglang | ApiProvider::Vllm
) {
return Some(ModelAuthSource::KeylessLocal);
}
if env_has_key_for(provider) {
return Some(ModelAuthSource::Env);
}
if provider_uses_oauth_cli(config, provider) && has_api_key_for(config, provider) {
return Some(ModelAuthSource::OAuthCli);
}
has_api_key_for(config, provider).then_some(ModelAuthSource::Config)
}
fn provider_uses_oauth_cli(config: &Config, provider: ApiProvider) -> bool {
match provider {
ApiProvider::OpenaiCodex => true,
ApiProvider::Moonshot => config
.provider_config_for(provider)
.and_then(|entry| entry.auth_mode.as_deref())
.is_some_and(|mode| {
let mode = mode.trim().to_ascii_lowercase().replace('-', "_");
matches!(mode.as_str(), "kimi" | "kimi_oauth" | "kimi_cli" | "oauth")
}),
_ => false,
}
}
fn env_has_key_for(provider: ApiProvider) -> bool {
env_keys_for_provider(provider)
.iter()
.any(|key| std::env::var(key).is_ok_and(|value| !value.trim().is_empty()))
}
fn env_keys_for_provider(provider: ApiProvider) -> &'static [&'static str] {
match provider {
ApiProvider::Deepseek | ApiProvider::DeepseekCN => &["DEEPSEEK_API_KEY"],
ApiProvider::NvidiaNim => &["NVIDIA_API_KEY", "NVIDIA_NIM_API_KEY"],
ApiProvider::Openai => &["OPENAI_API_KEY"],
ApiProvider::Atlascloud => &["ATLASCLOUD_API_KEY"],
ApiProvider::WanjieArk => &[
"WANJIE_ARK_API_KEY",
"WANJIE_API_KEY",
"WANJIE_MAAS_API_KEY",
],
ApiProvider::Volcengine => &[
"VOLCENGINE_API_KEY",
"VOLCENGINE_ARK_API_KEY",
"ARK_API_KEY",
],
ApiProvider::Openrouter => &["OPENROUTER_API_KEY"],
ApiProvider::XiaomiMimo => &["XIAOMI_MIMO_API_KEY", "XIAOMI_API_KEY", "MIMO_API_KEY"],
ApiProvider::Novita => &["NOVITA_API_KEY"],
ApiProvider::Fireworks => &["FIREWORKS_API_KEY"],
ApiProvider::Siliconflow | ApiProvider::SiliconflowCn => &["SILICONFLOW_API_KEY"],
ApiProvider::Arcee => &["ARCEE_API_KEY"],
ApiProvider::Moonshot => &["MOONSHOT_API_KEY", "KIMI_API_KEY"],
ApiProvider::Sglang => &["SGLANG_API_KEY"],
ApiProvider::Vllm => &["VLLM_API_KEY"],
ApiProvider::Ollama => &["OLLAMA_API_KEY"],
ApiProvider::Huggingface => &["HUGGINGFACE_API_KEY", "HF_TOKEN"],
ApiProvider::Together => &["TOGETHER_API_KEY"],
ApiProvider::OpenaiCodex => &["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"],
ApiProvider::Anthropic => &["ANTHROPIC_API_KEY"],
ApiProvider::Zai => &["ZAI_API_KEY", "Z_AI_API_KEY"],
ApiProvider::Stepfun => &["STEPFUN_API_KEY", "STEP_API_KEY"],
ApiProvider::Minimax => &["MINIMAX_API_KEY"],
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn inventory_includes_only_usable_authenticated_providers() {
let _env_lock = crate::test_support::lock_test_env();
let _deepseek = crate::test_support::EnvVarGuard::set("DEEPSEEK_API_KEY", "ds-key");
let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key");
let _minimax = crate::test_support::EnvVarGuard::remove("MINIMAX_API_KEY");
let config = Config {
provider: Some("zai".to_string()),
default_text_model: Some("deepseek-v4-pro".to_string()),
..Default::default()
};
let inventory = ModelInventory::from_config(&config);
assert!(inventory.router_available);
assert!(
inventory
.candidate(ApiProvider::Zai, crate::config::ZAI_GLM_5_2_MODEL)
.is_some()
);
assert!(
inventory
.candidates
.iter()
.all(|candidate| candidate.provider != ApiProvider::Minimax)
);
}
#[test]
fn inventory_marks_local_providers_keyless() {
let _env_lock = crate::test_support::lock_test_env();
let _deepseek = crate::test_support::EnvVarGuard::remove("DEEPSEEK_API_KEY");
let config = Config::default();
let inventory = ModelInventory::from_config(&config);
assert!(
inventory
.candidates
.iter()
.any(|candidate| candidate.provider == ApiProvider::Ollama
&& candidate.auth_source == ModelAuthSource::KeylessLocal)
);
}
}
+192 -1
View File
@@ -5,11 +5,12 @@
use std::time::Duration;
use anyhow::Result;
use anyhow::{Result, bail};
use crate::client::DeepSeekClient;
use crate::config::{ApiProvider, Config};
use crate::llm_client::LlmClient;
use crate::model_inventory::ModelInventory;
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
use crate::tui::app::ReasoningEffort;
@@ -223,6 +224,7 @@ impl AutoRouteSource {
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct AutoRouteSelection {
pub(crate) provider: ApiProvider,
pub(crate) model: String,
pub(crate) reasoning_effort: Option<ReasoningEffort>,
pub(crate) source: AutoRouteSource,
@@ -378,6 +380,7 @@ pub(crate) async fn resolve_auto_route_with_flash(
.await
{
Ok(Some(recommendation)) => AutoRouteSelection {
provider: config.api_provider(),
model: recommendation.model,
reasoning_effort: recommendation.reasoning_effort,
source: AutoRouteSource::FlashRouter,
@@ -394,6 +397,7 @@ fn auto_route_from_heuristic(
heuristic: AutoModelHeuristicSelection,
) -> AutoRouteSelection {
AutoRouteSelection {
provider,
model: heuristic.model,
reasoning_effort: Some(normalize_auto_route_effort_for_provider(
provider,
@@ -403,6 +407,162 @@ fn auto_route_from_heuristic(
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct InventoryAutoRouteRecommendation {
provider: ApiProvider,
model: String,
reasoning_effort: Option<ReasoningEffort>,
}
pub(crate) async fn resolve_auto_route_with_inventory(
config: &Config,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> Result<AutoRouteSelection> {
let inventory = ModelInventory::from_config(config);
if !inventory.router_available {
bail!(
"model auto requires a DeepSeek API key so codewhale can use deepseek-v4-flash as the non-thinking router. Run `codewhale auth set --provider deepseek` or choose an explicit model."
);
}
let heuristic = auto_route_from_inventory_heuristic(config, latest_request, &inventory);
if cfg!(test) {
return Ok(heuristic);
}
match auto_route_inventory_recommendation(
config,
&inventory,
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
)
.await
{
Ok(Some(recommendation)) => Ok(AutoRouteSelection {
provider: recommendation.provider,
model: recommendation.model,
reasoning_effort: recommendation.reasoning_effort,
source: AutoRouteSource::FlashRouter,
}),
Ok(None) | Err(_) => Ok(heuristic),
}
}
fn auto_route_from_inventory_heuristic(
config: &Config,
latest_request: &str,
inventory: &ModelInventory,
) -> AutoRouteSelection {
let fallback = inventory
.active_default()
.or_else(|| inventory.candidates.first());
let Some(candidate) = fallback else {
return AutoRouteSelection {
provider: config.api_provider(),
model: config.default_model(),
reasoning_effort: Some(crate::auto_reasoning::select(false, latest_request)),
source: AutoRouteSource::Heuristic,
};
};
AutoRouteSelection {
provider: candidate.provider,
model: candidate.model.clone(),
reasoning_effort: Some(crate::auto_reasoning::select(false, latest_request)),
source: AutoRouteSource::Heuristic,
}
}
async fn auto_route_inventory_recommendation(
config: &Config,
inventory: &ModelInventory,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> Result<Option<InventoryAutoRouteRecommendation>> {
let mut router_config = config.clone();
router_config.provider = Some(ApiProvider::Deepseek.as_str().to_string());
router_config.default_text_model = Some(inventory.router_model.to_string());
let client = DeepSeekClient::new(&router_config)?;
let router_system = inventory_auto_router_system_prompt(inventory);
let request = MessageRequest {
model: inventory.router_model.to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: auto_route_prompt(
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
),
cache_control: None,
}],
}],
max_tokens: 128,
system: Some(SystemPrompt::Text(router_system)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("off".to_string()),
stream: Some(false),
temperature: Some(0.0),
top_p: None,
};
let response =
tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
Ok(parse_inventory_auto_route_recommendation(
&message_response_text(&response),
inventory,
))
}
fn inventory_auto_router_system_prompt(inventory: &ModelInventory) -> String {
format!(
"You are the codewhale model-routing classifier. Return only compact JSON: \
{{\"provider\":\"<provider>\",\"model\":\"<model>\",\"thinking\":\"off|high|max\"}}.\n\
Choose only provider/model pairs present in the inventory JSON. Use off only for trivial no-tool answers, \
high for ordinary reasoning, and max for agentic, coding, multi-file, release, architecture, debugging, \
security, tool-heavy, or uncertain work.\n\nInventory JSON:\n{}",
inventory.router_context_json()
)
}
fn parse_inventory_auto_route_recommendation(
raw: &str,
inventory: &ModelInventory,
) -> Option<InventoryAutoRouteRecommendation> {
let json = extract_first_json_object(raw)?;
let value: serde_json::Value = serde_json::from_str(json).ok()?;
let provider = value
.get("provider")
.and_then(serde_json::Value::as_str)
.and_then(ApiProvider::parse)?;
let model = value.get("model").and_then(serde_json::Value::as_str)?;
let candidate = inventory.candidate(provider, model)?;
let reasoning_effort = value
.get("thinking")
.or_else(|| value.get("reasoning_effort"))
.or_else(|| value.get("effort"))
.and_then(serde_json::Value::as_str)
.and_then(parse_auto_route_reasoning_effort)
.map(|effort| normalize_auto_route_effort_for_provider(provider, effort));
Some(InventoryAutoRouteRecommendation {
provider,
model: candidate.model.clone(),
reasoning_effort,
})
}
async fn auto_route_flash_recommendation(
config: &Config,
candidates: &RouterCandidates,
@@ -695,6 +855,37 @@ mod tests {
);
}
#[test]
fn inventory_auto_route_recommendation_requires_runnable_pair() {
let _env_lock = crate::test_support::lock_test_env();
let _deepseek = crate::test_support::EnvVarGuard::set("DEEPSEEK_API_KEY", "ds-key");
let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key");
let config = Config {
provider: Some("zai".to_string()),
default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()),
..Default::default()
};
let inventory = ModelInventory::from_config(&config);
let route = parse_inventory_auto_route_recommendation(
r#"{"provider":"zai","model":"GLM-5.2","thinking":"max"}"#,
&inventory,
)
.expect("valid inventory route should parse");
assert_eq!(route.provider, ApiProvider::Zai);
assert_eq!(route.model, crate::config::ZAI_GLM_5_2_MODEL);
assert_eq!(route.reasoning_effort, Some(ReasoningEffort::Max));
assert!(
parse_inventory_auto_route_recommendation(
r#"{"provider":"zai","model":"deepseek-v4-pro","thinking":"max"}"#,
&inventory,
)
.is_none(),
"router must not pair a DeepSeek model with the Z.ai provider"
);
}
#[test]
fn auto_heuristic_default_routes_implement_to_pro() {
assert_eq!(
+17
View File
@@ -765,6 +765,16 @@ pub(crate) fn render_runtime_policy_reference() -> String {
commands, git commits, or sub-agent launches. End the turn and wait \
for the user's next message.\n\n",
);
out.push_str(
"If your previous assistant message asked the user a blocking choice \
question (for example, \"How do you want me to proceed?\" with \
mutually exclusive options), treat the run as paused until the user \
answers. Stale tool output, stale sub-agent completion events, or the \
runtime tag alone do not override that pause. If a question is \
informational and you intend to continue without waiting, say so \
explicitly in the same message (for example, \"I am going to keep \
moving unless you redirect me\").\n\n",
);
// ── Mode reference ─────────────────────────────────────────────────
out.push_str("### Modes\n\n");
@@ -1735,6 +1745,13 @@ mod tests {
&& text.contains("wait for the user's next message"),
"Runtime Policy Reference must pin the #3061 runtime-prompt-only guard"
);
assert!(
text.contains("blocking choice question")
&& text.contains("treat the run as paused")
&& text.contains("stale sub-agent completion events")
&& text.contains("I am going to keep moving unless you redirect me"),
"Runtime Policy Reference must tell agents to stop after asking a blocking question"
);
assert!(
text.contains("### Modes"),
"Runtime Policy Reference must contain the Modes section"