diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index df519f15..a62f71ca 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -200,6 +200,8 @@ and writes a SWE-bench-compatible JSONL prediction row from the resulting working-tree diff. `export` only writes the current diff. ")] Swebench(TuiPassthroughArgs), + /// Manage durable Agent Fleet runs via the TUI runtime. + Fleet(TuiPassthroughArgs), /// Run a CodeWhale-powered code review over a git diff. Review(TuiPassthroughArgs), /// Apply a patch file or stdin to the working tree. @@ -581,6 +583,10 @@ fn run() -> Result<()> { let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides); delegate_to_tui(&cli, &resolved_runtime, tui_args("swebench", args)) } + Some(Commands::Fleet(args)) => { + let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides); + delegate_to_tui(&cli, &resolved_runtime, tui_args("fleet", args)) + } Some(Commands::Review(args)) => { let resolved_runtime = resolve_runtime_for_dispatch(&mut store, &runtime_overrides); delegate_to_tui(&cli, &resolved_runtime, tui_args("review", args)) @@ -2414,6 +2420,28 @@ mod tests { Some(Commands::Setup(TuiPassthroughArgs { ref args })) if args == &["--skills", "--local"] )); + + let cli = parse_ok(&["codewhale", "fleet", "init"]); + assert!(cli.prompt.is_empty()); + assert!(matches!( + cli.command, + Some(Commands::Fleet(TuiPassthroughArgs { ref args })) if args == &["init"] + )); + + let cli = parse_ok(&[ + "codewhale", + "fleet", + "run", + "tasks.json", + "--max-workers", + "2", + ]); + assert!(cli.prompt.is_empty()); + assert!(matches!( + cli.command, + Some(Commands::Fleet(TuiPassthroughArgs { ref args })) + if args == &["run", "tasks.json", "--max-workers", "2"] + )); } #[test] diff --git a/crates/tui/src/acp_server.rs b/crates/tui/src/acp_server.rs index 6d28c66d..dd4727ce 100644 --- a/crates/tui/src/acp_server.rs +++ b/crates/tui/src/acp_server.rs @@ -181,11 +181,13 @@ impl AcpServer { async fn run_prompt(&self, prompt: &str, cwd: &PathBuf) -> Result { let _cwd_guard = ScopedCurrentDir::new(cwd)?; - let client = DeepSeekClient::new(&self.config)?; - let route = crate::resolve_cli_auto_route(&self.config, &self.model, prompt).await; + let route = crate::resolve_cli_auto_route(&self.config, &self.model, prompt).await?; + let execution_config = crate::config_for_cli_route(&self.config, &route); + let client = DeepSeekClient::new(&execution_config)?; let reasoning_effort = route .reasoning_effort - .map(|effort| effort.as_setting().to_string()); + .and_then(|effort| effort.api_value_for_provider(execution_config.api_provider())) + .map(str::to_string); let request = MessageRequest { model: route.model, diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 3aa03fd6..c73bbeb0 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -2639,6 +2639,7 @@ impl Config { return model.trim().to_string(); } if let Some(model) = self.default_text_model.as_deref() + && !root_deepseek_model_is_foreign_to_direct_provider(provider, model) && let Some(normalized) = normalize_model_name_for_provider(provider, model) { return normalized; @@ -3358,6 +3359,31 @@ impl Config { } } +fn root_deepseek_model_is_foreign_to_direct_provider(provider: ApiProvider, model: &str) -> bool { + if matches!(provider, ApiProvider::Deepseek | ApiProvider::DeepseekCN) + || provider_passes_model_through(provider) + { + return false; + } + if matches!( + provider, + ApiProvider::NvidiaNim + | ApiProvider::Openrouter + | ApiProvider::Novita + | ApiProvider::Fireworks + | ApiProvider::Siliconflow + | ApiProvider::SiliconflowCn + | ApiProvider::Sglang + | ApiProvider::Vllm + | ApiProvider::Volcengine + | ApiProvider::Atlascloud + | ApiProvider::WanjieArk + ) { + return false; + } + normalize_model_name(model).is_some() +} + // === Defaults === fn default_config_path() -> Option { @@ -9136,6 +9162,18 @@ http_headers = { "X-Model-Provider-Id" = "from-file" } assert_eq!(pinned.default_model(), "gpt-5.5-codex-preview"); } + #[test] + fn direct_provider_ignores_foreign_deepseek_root_default_model() { + let config = Config { + provider: Some("zai".to_string()), + default_text_model: Some(DEFAULT_TEXT_MODEL.to_string()), + ..Default::default() + }; + + assert_eq!(config.api_provider(), ApiProvider::Zai); + assert_eq!(config.default_model(), DEFAULT_ZAI_MODEL); + } + #[test] fn insecure_skip_tls_verify_is_scoped_to_active_provider() { let mut providers = ProvidersConfig::default(); diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 1d5353cd..808ea121 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -49,6 +49,7 @@ mod lsp; mod mcp; mod mcp_server; mod memory; +mod model_inventory; mod model_routing; mod models; mod network_policy; @@ -4762,11 +4763,12 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> { .model .or_else(|| config.default_text_model.clone()) .unwrap_or_else(|| config.default_model()); - let route = resolve_cli_auto_route(config, &model, &diff).await; - let model = route.model; + let route = resolve_cli_auto_route(config, &model, &diff).await?; + let execution_config = config_for_cli_route(config, &route); + let model = route.model.clone(); let reasoning_effort = route .reasoning_effort - .and_then(|effort| cli_reasoning_effort_value(config, effort)); + .and_then(|effort| cli_reasoning_effort_value(&execution_config, effort)); let system = SystemPrompt::Text( "You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \ @@ -4776,7 +4778,7 @@ Provide findings ordered by severity with file references, then open questions, let user_prompt = format!("Review the following diff and provide feedback:\n\n{diff}\n\nEnd of diff."); - let client = DeepSeekClient::new(config)?; + let client = DeepSeekClient::new(&execution_config)?; let request = MessageRequest { model: model.clone(), messages: vec![Message { @@ -6096,6 +6098,7 @@ async fn run_interactive( } struct CliAutoRoute { + provider: crate::config::ApiProvider, model: String, reasoning_effort: Option, auto_model: bool, @@ -6110,15 +6113,36 @@ fn cli_reasoning_effort_value( .map(str::to_string) } -async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute { +fn config_for_cli_route(config: &Config, route: &CliAutoRoute) -> Config { + let mut execution_config = config.clone(); + execution_config.provider = Some(route.provider.as_str().to_string()); + execution_config + .provider_config_for_mut(route.provider) + .model = Some(route.model.clone()); + if matches!( + route.provider, + crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN + ) { + execution_config.default_text_model = Some(route.model.clone()); + } + execution_config +} + +async fn resolve_cli_auto_route( + config: &Config, + model: &str, + prompt: &str, +) -> Result { if model.trim().eq_ignore_ascii_case("auto") { let selection = - model_routing::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await; - CliAutoRoute { + model_routing::resolve_auto_route_with_inventory(config, prompt, "", "auto", "auto") + .await?; + Ok(CliAutoRoute { + provider: selection.provider, model: selection.model, reasoning_effort: selection.reasoning_effort, auto_model: true, - } + }) } else { // When --model is not `auto`, fall back to the reasoning_effort // declared in the user's config.toml. The previous hard-coded `None` @@ -6126,13 +6150,14 @@ async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> C // call, which (for example) prevented vllm + Qwen3 users from // disabling thinking via `reasoning_effort = "off"` and caused // 30+ second SSE idle timeouts on trivial prompts. - CliAutoRoute { + Ok(CliAutoRoute { + provider: config.api_provider(), model: model.to_string(), reasoning_effort: config .reasoning_effort() .map(crate::tui::app::ReasoningEffort::from_setting), auto_model: false, - } + }) } } @@ -6140,11 +6165,12 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> use crate::client::DeepSeekClient; use crate::models::{ContentBlock, Message, MessageRequest}; - let client = DeepSeekClient::new(config)?; - let route = resolve_cli_auto_route(config, model, prompt).await; + let route = resolve_cli_auto_route(config, model, prompt).await?; + let execution_config = config_for_cli_route(config, &route); + let client = DeepSeekClient::new(&execution_config)?; let reasoning_effort = route .reasoning_effort - .and_then(|effort| cli_reasoning_effort_value(config, effort)); + .and_then(|effort| cli_reasoning_effort_value(&execution_config, effort)); let request = MessageRequest { model: route.model, @@ -6182,12 +6208,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result use crate::client::DeepSeekClient; use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt}; - let client = DeepSeekClient::new(config)?; - let route = resolve_cli_auto_route(config, model, prompt).await; - let model = route.model; + let route = resolve_cli_auto_route(config, model, prompt).await?; + let execution_config = config_for_cli_route(config, &route); + let client = DeepSeekClient::new(&execution_config)?; + let model = route.model.clone(); let reasoning_effort = route .reasoning_effort - .and_then(|effort| cli_reasoning_effort_value(config, effort)); + .and_then(|effort| cli_reasoning_effort_value(&execution_config, effort)); let request = MessageRequest { model: model.clone(), messages: vec![Message { @@ -6358,12 +6385,13 @@ async fn run_exec_agent( use crate::tools::todo::new_shared_todo_list; use crate::tui::app::AppMode; - let route = resolve_cli_auto_route(config, model, prompt).await; + let route = resolve_cli_auto_route(config, model, prompt).await?; + let execution_config = config_for_cli_route(config, &route); let auto_model = route.auto_model; let effective_model = route.model; let effective_reasoning_effort = route .reasoning_effort - .and_then(|effort| cli_reasoning_effort_value(config, effort)); + .and_then(|effort| cli_reasoning_effort_value(&execution_config, effort)); let settings = crate::settings::Settings::load().unwrap_or_default(); let auto_compact_enabled = if crate::settings::Settings::auto_compact_explicitly_configured() { @@ -6381,24 +6409,24 @@ async fn run_exec_agent( ..Default::default() }; - let network_policy = config.network.clone().map(|toml_cfg| { + let network_policy = execution_config.network.clone().map(|toml_cfg| { crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime()) }); - let lsp_config = config + let lsp_config = execution_config .lsp .clone() .map(crate::config::LspConfigToml::into_runtime); let engine_config = EngineConfig { model: effective_model.clone(), workspace: workspace.clone(), - allow_shell: auto_approve || config.allow_shell(), + allow_shell: auto_approve || execution_config.allow_shell(), trust_mode, - notes_path: config.notes_path(), - mcp_config_path: config.mcp_config_path(), - skills_dir: config.skills_dir(), + notes_path: execution_config.notes_path(), + mcp_config_path: execution_config.mcp_config_path(), + skills_dir: execution_config.skills_dir(), instructions: { - let mut instrs: Vec = config + let mut instrs: Vec = execution_config .instructions_paths() .into_iter() .map(Into::into) @@ -6411,39 +6439,45 @@ async fn run_exec_agent( } instrs }, - project_context_pack_enabled: config.project_context_pack_enabled(), + project_context_pack_enabled: execution_config.project_context_pack_enabled(), translation_enabled: false, show_thinking: settings.show_thinking, max_steps: max_turns, max_subagents, - interactive_launch_limit: config.interactive_launch_limit(), - features: config.features(), + interactive_launch_limit: execution_config.interactive_launch_limit(), + features: execution_config.features(), compaction, - capacity: crate::core::capacity::CapacityControllerConfig::from_app_config(config), + capacity: crate::core::capacity::CapacityControllerConfig::from_app_config( + &execution_config, + ), todos: new_shared_todo_list(), plan_state: new_shared_plan_state(), goal_state: crate::tools::goal::new_shared_goal_state(), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, network_policy, - snapshots_enabled: config.snapshots_config().enabled, - snapshots_max_workspace_bytes: config + snapshots_enabled: execution_config.snapshots_config().enabled, + snapshots_max_workspace_bytes: execution_config .snapshots_config() .max_workspace_gb .saturating_mul(1024 * 1024 * 1024), lsp_config, runtime_services: crate::tools::spec::RuntimeToolServices::default(), - subagent_model_overrides: config.subagent_model_overrides(), - subagent_api_timeout: std::time::Duration::from_secs(config.subagent_api_timeout_secs()), - stream_chunk_timeout: std::time::Duration::from_secs(config.stream_chunk_timeout_secs()), - subagent_heartbeat_timeout: std::time::Duration::from_secs( - config.subagent_heartbeat_timeout_secs(), + subagent_model_overrides: execution_config.subagent_model_overrides(), + subagent_api_timeout: std::time::Duration::from_secs( + execution_config.subagent_api_timeout_secs(), ), - prefer_bwrap: config.prefer_bwrap.unwrap_or(false), - memory_enabled: config.memory_enabled(), - memory_path: config.memory_path(), - speech_output_dir: config.speech_output_dir(), - vision_config: config.vision_model_config(), - strict_tool_mode: config.strict_tool_mode.unwrap_or(false), + stream_chunk_timeout: std::time::Duration::from_secs( + execution_config.stream_chunk_timeout_secs(), + ), + subagent_heartbeat_timeout: std::time::Duration::from_secs( + execution_config.subagent_heartbeat_timeout_secs(), + ), + prefer_bwrap: execution_config.prefer_bwrap.unwrap_or(false), + memory_enabled: execution_config.memory_enabled(), + memory_path: execution_config.memory_path(), + speech_output_dir: execution_config.speech_output_dir(), + vision_config: execution_config.vision_model_config(), + strict_tool_mode: execution_config.strict_tool_mode.unwrap_or(false), goal_objective: None, goal_token_budget: None, goal_status: crate::tools::goal::GoalStatus::Active, @@ -6454,15 +6488,21 @@ async fn run_exec_agent( .tag() .to_string(), workshop: config.workshop.clone(), - search_provider: config.search_provider(), - search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()), - search_base_url: config.search.as_ref().and_then(|s| s.base_url.clone()), - tools_always_load: config.tools_always_load(), - tools: config.tools.clone(), - verbosity: config.verbosity.clone(), + search_provider: execution_config.search_provider(), + search_api_key: execution_config + .search + .as_ref() + .and_then(|s| s.api_key.clone()), + search_base_url: execution_config + .search + .as_ref() + .and_then(|s| s.base_url.clone()), + tools_always_load: execution_config.tools_always_load(), + tools: execution_config.tools.clone(), + verbosity: execution_config.verbosity.clone(), }; - let engine_handle = spawn_engine(engine_config, config); + let engine_handle = spawn_engine(engine_config, &execution_config); let mode = if auto_approve { AppMode::Yolo } else { @@ -6514,7 +6554,7 @@ async fn run_exec_agent( reasoning_effort: effective_reasoning_effort, reasoning_effort_auto: auto_model, auto_model, - allow_shell: auto_approve || config.allow_shell(), + allow_shell: auto_approve || execution_config.allow_shell(), trust_mode, auto_approve, translation_enabled: false, @@ -6522,13 +6562,13 @@ async fn run_exec_agent( approval_mode: if auto_approve { crate::tui::approval::ApprovalMode::Auto } else { - config + execution_config .approval_policy .as_deref() .and_then(crate::tui::approval::ApprovalMode::from_config_value) .unwrap_or_default() }, - verbosity: config.verbosity.clone(), + verbosity: execution_config.verbosity.clone(), }) .await?; @@ -7246,6 +7286,50 @@ mod terminal_mode_tests { assert_eq!(resolve_exec_model(&config, None), "auto"); } + #[test] + fn exec_model_resolution_uses_provider_safe_default_for_zai() { + let _env_lock = crate::test_support::lock_test_env(); + let _codewhale_model = crate::test_support::EnvVarGuard::remove("CODEWHALE_MODEL"); + let _deepseek_model = crate::test_support::EnvVarGuard::remove("DEEPSEEK_MODEL"); + let config = Config { + provider: Some("zai".to_string()), + default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()), + ..Default::default() + }; + + assert_eq!( + resolve_exec_model(&config, None), + crate::config::DEFAULT_ZAI_MODEL + ); + } + + #[test] + fn cli_route_execution_config_stamps_routed_model_into_provider_slot() { + let mut providers = crate::config::ProvidersConfig::default(); + providers.deepseek.model = Some("deepseek-v4-pro".to_string()); + let config = Config { + provider: Some("deepseek".to_string()), + providers: Some(providers), + ..Default::default() + }; + let route = CliAutoRoute { + provider: crate::config::ApiProvider::Deepseek, + model: "deepseek-v4-flash".to_string(), + reasoning_effort: None, + auto_model: true, + }; + + let execution_config = config_for_cli_route(&config, &route); + + assert_eq!(execution_config.default_model(), "deepseek-v4-flash"); + assert_eq!( + execution_config + .provider_config_for(crate::config::ApiProvider::Deepseek) + .and_then(|entry| entry.model.as_deref()), + Some("deepseek-v4-flash") + ); + } + #[test] fn exec_accepts_split_prompt_words_for_windows_cmd_shims() { let cli = parse_cli(&["codewhale", "exec", "hello", "world"]); diff --git a/crates/tui/src/model_inventory.rs b/crates/tui/src/model_inventory.rs new file mode 100644 index 00000000..2be59c3b --- /dev/null +++ b/crates/tui/src/model_inventory.rs @@ -0,0 +1,309 @@ +//! Provider/model inventory for routing policy. +//! +//! This is the high-level "what can this user actually run?" object. Auto +//! routing, fleet workers, and sub-agent policy should consume this shape +//! instead of guessing model strings from global defaults. + +use serde::Serialize; + +use crate::config::{ + ApiProvider, Config, has_api_key_for, model_completion_names_for_provider, + normalize_model_name_for_provider, provider_capability, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub(crate) enum ModelAuthSource { + Config, + Env, + OAuthCli, + KeylessLocal, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub(crate) struct ModelRouteCandidate { + pub(crate) provider: ApiProvider, + pub(crate) provider_name: &'static str, + pub(crate) provider_display_name: &'static str, + pub(crate) model: String, + pub(crate) context_window: u32, + pub(crate) max_output: u32, + pub(crate) thinking_supported: bool, + pub(crate) cache_telemetry_supported: bool, + pub(crate) auth_source: ModelAuthSource, + pub(crate) default_for_provider: bool, + pub(crate) tags: Vec<&'static str>, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub(crate) struct ModelInventory { + pub(crate) active_provider: ApiProvider, + pub(crate) router_provider: ApiProvider, + pub(crate) router_model: &'static str, + pub(crate) router_available: bool, + pub(crate) candidates: Vec, +} + +impl ModelInventory { + pub(crate) fn from_config(config: &Config) -> Self { + let active_provider = config.api_provider(); + let mut candidates = Vec::new(); + + for provider in ApiProvider::all().iter().copied() { + let Some(auth_source) = auth_source_for_provider(config, provider) else { + continue; + }; + + let default_model = provider_default_model(config, provider); + let mut models = Vec::::new(); + if let Some(model) = configured_model_for_provider(config, provider) { + push_model(&mut models, provider, &model); + } + if provider == active_provider { + let active_model = config.default_model(); + if !active_model.trim().eq_ignore_ascii_case("auto") { + push_model(&mut models, provider, &active_model); + } + } + for model in model_completion_names_for_provider(provider) { + push_model(&mut models, provider, model); + } + if models.is_empty() { + push_model(&mut models, provider, &default_model); + } + + for model in models { + let capability = provider_capability(provider, &model); + let mut tags = Vec::new(); + if capability.context_window >= 1_000_000 { + tags.push("long_context"); + } + if capability.thinking_supported { + tags.push("thinking"); + } + if matches!( + provider, + ApiProvider::Ollama | ApiProvider::Sglang | ApiProvider::Vllm + ) { + tags.push("local"); + } + if model.eq_ignore_ascii_case(&default_model) { + tags.push("default"); + } + + candidates.push(ModelRouteCandidate { + provider, + provider_name: provider.as_str(), + provider_display_name: provider.display_name(), + default_for_provider: model.eq_ignore_ascii_case(&default_model), + model, + context_window: capability.context_window, + max_output: capability.max_output, + thinking_supported: capability.thinking_supported, + cache_telemetry_supported: capability.cache_telemetry_supported, + auth_source: auth_source.clone(), + tags, + }); + } + } + + Self { + active_provider, + router_provider: ApiProvider::Deepseek, + router_model: "deepseek-v4-flash", + router_available: has_api_key_for(config, ApiProvider::Deepseek), + candidates, + } + } + + pub(crate) fn candidate( + &self, + provider: ApiProvider, + model: &str, + ) -> Option<&ModelRouteCandidate> { + self.candidates.iter().find(|candidate| { + candidate.provider == provider && candidate.model.eq_ignore_ascii_case(model.trim()) + }) + } + + pub(crate) fn active_default(&self) -> Option<&ModelRouteCandidate> { + self.candidates + .iter() + .find(|candidate| { + candidate.provider == self.active_provider && candidate.default_for_provider + }) + .or_else(|| { + self.candidates + .iter() + .find(|candidate| candidate.provider == self.active_provider) + }) + .or_else(|| self.candidates.first()) + } + + pub(crate) fn router_context_json(&self) -> String { + serde_json::to_string(self).unwrap_or_else(|_| "{}".to_string()) + } +} + +fn push_model(models: &mut Vec, provider: ApiProvider, model: &str) { + let Some(model) = normalize_model_name_for_provider(provider, model) + .or_else(|| crate::config::normalize_custom_model_id(model)) + else { + return; + }; + if !models + .iter() + .any(|existing| existing.eq_ignore_ascii_case(&model)) + { + models.push(model); + } +} + +fn configured_model_for_provider(config: &Config, provider: ApiProvider) -> Option { + config + .provider_config_for(provider) + .and_then(|entry| entry.model.clone()) + .map(|model| model.trim().to_string()) + .filter(|model| !model.is_empty()) +} + +fn provider_default_model(config: &Config, provider: ApiProvider) -> String { + if provider == config.api_provider() { + let model = config.default_model(); + if !model.trim().eq_ignore_ascii_case("auto") { + return model; + } + } + model_completion_names_for_provider(provider) + .first() + .copied() + .unwrap_or_else(|| match provider { + ApiProvider::Ollama => crate::config::DEFAULT_OLLAMA_MODEL, + ApiProvider::Sglang => crate::config::DEFAULT_SGLANG_MODEL, + ApiProvider::Vllm => crate::config::DEFAULT_VLLM_MODEL, + _ => crate::config::DEFAULT_TEXT_MODEL, + }) + .to_string() +} + +fn auth_source_for_provider(config: &Config, provider: ApiProvider) -> Option { + if matches!( + provider, + ApiProvider::Ollama | ApiProvider::Sglang | ApiProvider::Vllm + ) { + return Some(ModelAuthSource::KeylessLocal); + } + if env_has_key_for(provider) { + return Some(ModelAuthSource::Env); + } + if provider_uses_oauth_cli(config, provider) && has_api_key_for(config, provider) { + return Some(ModelAuthSource::OAuthCli); + } + has_api_key_for(config, provider).then_some(ModelAuthSource::Config) +} + +fn provider_uses_oauth_cli(config: &Config, provider: ApiProvider) -> bool { + match provider { + ApiProvider::OpenaiCodex => true, + ApiProvider::Moonshot => config + .provider_config_for(provider) + .and_then(|entry| entry.auth_mode.as_deref()) + .is_some_and(|mode| { + let mode = mode.trim().to_ascii_lowercase().replace('-', "_"); + matches!(mode.as_str(), "kimi" | "kimi_oauth" | "kimi_cli" | "oauth") + }), + _ => false, + } +} + +fn env_has_key_for(provider: ApiProvider) -> bool { + env_keys_for_provider(provider) + .iter() + .any(|key| std::env::var(key).is_ok_and(|value| !value.trim().is_empty())) +} + +fn env_keys_for_provider(provider: ApiProvider) -> &'static [&'static str] { + match provider { + ApiProvider::Deepseek | ApiProvider::DeepseekCN => &["DEEPSEEK_API_KEY"], + ApiProvider::NvidiaNim => &["NVIDIA_API_KEY", "NVIDIA_NIM_API_KEY"], + ApiProvider::Openai => &["OPENAI_API_KEY"], + ApiProvider::Atlascloud => &["ATLASCLOUD_API_KEY"], + ApiProvider::WanjieArk => &[ + "WANJIE_ARK_API_KEY", + "WANJIE_API_KEY", + "WANJIE_MAAS_API_KEY", + ], + ApiProvider::Volcengine => &[ + "VOLCENGINE_API_KEY", + "VOLCENGINE_ARK_API_KEY", + "ARK_API_KEY", + ], + ApiProvider::Openrouter => &["OPENROUTER_API_KEY"], + ApiProvider::XiaomiMimo => &["XIAOMI_MIMO_API_KEY", "XIAOMI_API_KEY", "MIMO_API_KEY"], + ApiProvider::Novita => &["NOVITA_API_KEY"], + ApiProvider::Fireworks => &["FIREWORKS_API_KEY"], + ApiProvider::Siliconflow | ApiProvider::SiliconflowCn => &["SILICONFLOW_API_KEY"], + ApiProvider::Arcee => &["ARCEE_API_KEY"], + ApiProvider::Moonshot => &["MOONSHOT_API_KEY", "KIMI_API_KEY"], + ApiProvider::Sglang => &["SGLANG_API_KEY"], + ApiProvider::Vllm => &["VLLM_API_KEY"], + ApiProvider::Ollama => &["OLLAMA_API_KEY"], + ApiProvider::Huggingface => &["HUGGINGFACE_API_KEY", "HF_TOKEN"], + ApiProvider::Together => &["TOGETHER_API_KEY"], + ApiProvider::OpenaiCodex => &["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"], + ApiProvider::Anthropic => &["ANTHROPIC_API_KEY"], + ApiProvider::Zai => &["ZAI_API_KEY", "Z_AI_API_KEY"], + ApiProvider::Stepfun => &["STEPFUN_API_KEY", "STEP_API_KEY"], + ApiProvider::Minimax => &["MINIMAX_API_KEY"], + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn inventory_includes_only_usable_authenticated_providers() { + let _env_lock = crate::test_support::lock_test_env(); + let _deepseek = crate::test_support::EnvVarGuard::set("DEEPSEEK_API_KEY", "ds-key"); + let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key"); + let _minimax = crate::test_support::EnvVarGuard::remove("MINIMAX_API_KEY"); + let config = Config { + provider: Some("zai".to_string()), + default_text_model: Some("deepseek-v4-pro".to_string()), + ..Default::default() + }; + + let inventory = ModelInventory::from_config(&config); + + assert!(inventory.router_available); + assert!( + inventory + .candidate(ApiProvider::Zai, crate::config::ZAI_GLM_5_2_MODEL) + .is_some() + ); + assert!( + inventory + .candidates + .iter() + .all(|candidate| candidate.provider != ApiProvider::Minimax) + ); + } + + #[test] + fn inventory_marks_local_providers_keyless() { + let _env_lock = crate::test_support::lock_test_env(); + let _deepseek = crate::test_support::EnvVarGuard::remove("DEEPSEEK_API_KEY"); + let config = Config::default(); + + let inventory = ModelInventory::from_config(&config); + + assert!( + inventory + .candidates + .iter() + .any(|candidate| candidate.provider == ApiProvider::Ollama + && candidate.auth_source == ModelAuthSource::KeylessLocal) + ); + } +} diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index 274470f9..56408c0c 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -5,11 +5,12 @@ use std::time::Duration; -use anyhow::Result; +use anyhow::{Result, bail}; use crate::client::DeepSeekClient; use crate::config::{ApiProvider, Config}; use crate::llm_client::LlmClient; +use crate::model_inventory::ModelInventory; use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt}; use crate::tui::app::ReasoningEffort; @@ -223,6 +224,7 @@ impl AutoRouteSource { #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct AutoRouteSelection { + pub(crate) provider: ApiProvider, pub(crate) model: String, pub(crate) reasoning_effort: Option, pub(crate) source: AutoRouteSource, @@ -378,6 +380,7 @@ pub(crate) async fn resolve_auto_route_with_flash( .await { Ok(Some(recommendation)) => AutoRouteSelection { + provider: config.api_provider(), model: recommendation.model, reasoning_effort: recommendation.reasoning_effort, source: AutoRouteSource::FlashRouter, @@ -394,6 +397,7 @@ fn auto_route_from_heuristic( heuristic: AutoModelHeuristicSelection, ) -> AutoRouteSelection { AutoRouteSelection { + provider, model: heuristic.model, reasoning_effort: Some(normalize_auto_route_effort_for_provider( provider, @@ -403,6 +407,162 @@ fn auto_route_from_heuristic( } } +#[derive(Debug, Clone, PartialEq, Eq)] +struct InventoryAutoRouteRecommendation { + provider: ApiProvider, + model: String, + reasoning_effort: Option, +} + +pub(crate) async fn resolve_auto_route_with_inventory( + config: &Config, + latest_request: &str, + recent_context: &str, + selected_model_mode: &str, + selected_thinking_mode: &str, +) -> Result { + let inventory = ModelInventory::from_config(config); + if !inventory.router_available { + bail!( + "model auto requires a DeepSeek API key so codewhale can use deepseek-v4-flash as the non-thinking router. Run `codewhale auth set --provider deepseek` or choose an explicit model." + ); + } + + let heuristic = auto_route_from_inventory_heuristic(config, latest_request, &inventory); + if cfg!(test) { + return Ok(heuristic); + } + + match auto_route_inventory_recommendation( + config, + &inventory, + latest_request, + recent_context, + selected_model_mode, + selected_thinking_mode, + ) + .await + { + Ok(Some(recommendation)) => Ok(AutoRouteSelection { + provider: recommendation.provider, + model: recommendation.model, + reasoning_effort: recommendation.reasoning_effort, + source: AutoRouteSource::FlashRouter, + }), + Ok(None) | Err(_) => Ok(heuristic), + } +} + +fn auto_route_from_inventory_heuristic( + config: &Config, + latest_request: &str, + inventory: &ModelInventory, +) -> AutoRouteSelection { + let fallback = inventory + .active_default() + .or_else(|| inventory.candidates.first()); + let Some(candidate) = fallback else { + return AutoRouteSelection { + provider: config.api_provider(), + model: config.default_model(), + reasoning_effort: Some(crate::auto_reasoning::select(false, latest_request)), + source: AutoRouteSource::Heuristic, + }; + }; + AutoRouteSelection { + provider: candidate.provider, + model: candidate.model.clone(), + reasoning_effort: Some(crate::auto_reasoning::select(false, latest_request)), + source: AutoRouteSource::Heuristic, + } +} + +async fn auto_route_inventory_recommendation( + config: &Config, + inventory: &ModelInventory, + latest_request: &str, + recent_context: &str, + selected_model_mode: &str, + selected_thinking_mode: &str, +) -> Result> { + let mut router_config = config.clone(); + router_config.provider = Some(ApiProvider::Deepseek.as_str().to_string()); + router_config.default_text_model = Some(inventory.router_model.to_string()); + + let client = DeepSeekClient::new(&router_config)?; + let router_system = inventory_auto_router_system_prompt(inventory); + let request = MessageRequest { + model: inventory.router_model.to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: auto_route_prompt( + latest_request, + recent_context, + selected_model_mode, + selected_thinking_mode, + ), + cache_control: None, + }], + }], + max_tokens: 128, + system: Some(SystemPrompt::Text(router_system)), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: Some("off".to_string()), + stream: Some(false), + temperature: Some(0.0), + top_p: None, + }; + + let response = + tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??; + Ok(parse_inventory_auto_route_recommendation( + &message_response_text(&response), + inventory, + )) +} + +fn inventory_auto_router_system_prompt(inventory: &ModelInventory) -> String { + format!( + "You are the codewhale model-routing classifier. Return only compact JSON: \ +{{\"provider\":\"\",\"model\":\"\",\"thinking\":\"off|high|max\"}}.\n\ +Choose only provider/model pairs present in the inventory JSON. Use off only for trivial no-tool answers, \ +high for ordinary reasoning, and max for agentic, coding, multi-file, release, architecture, debugging, \ +security, tool-heavy, or uncertain work.\n\nInventory JSON:\n{}", + inventory.router_context_json() + ) +} + +fn parse_inventory_auto_route_recommendation( + raw: &str, + inventory: &ModelInventory, +) -> Option { + let json = extract_first_json_object(raw)?; + let value: serde_json::Value = serde_json::from_str(json).ok()?; + let provider = value + .get("provider") + .and_then(serde_json::Value::as_str) + .and_then(ApiProvider::parse)?; + let model = value.get("model").and_then(serde_json::Value::as_str)?; + let candidate = inventory.candidate(provider, model)?; + let reasoning_effort = value + .get("thinking") + .or_else(|| value.get("reasoning_effort")) + .or_else(|| value.get("effort")) + .and_then(serde_json::Value::as_str) + .and_then(parse_auto_route_reasoning_effort) + .map(|effort| normalize_auto_route_effort_for_provider(provider, effort)); + + Some(InventoryAutoRouteRecommendation { + provider, + model: candidate.model.clone(), + reasoning_effort, + }) +} + async fn auto_route_flash_recommendation( config: &Config, candidates: &RouterCandidates, @@ -695,6 +855,37 @@ mod tests { ); } + #[test] + fn inventory_auto_route_recommendation_requires_runnable_pair() { + let _env_lock = crate::test_support::lock_test_env(); + let _deepseek = crate::test_support::EnvVarGuard::set("DEEPSEEK_API_KEY", "ds-key"); + let _zai = crate::test_support::EnvVarGuard::set("ZAI_API_KEY", "zai-key"); + let config = Config { + provider: Some("zai".to_string()), + default_text_model: Some(crate::config::DEFAULT_TEXT_MODEL.to_string()), + ..Default::default() + }; + let inventory = ModelInventory::from_config(&config); + + let route = parse_inventory_auto_route_recommendation( + r#"{"provider":"zai","model":"GLM-5.2","thinking":"max"}"#, + &inventory, + ) + .expect("valid inventory route should parse"); + assert_eq!(route.provider, ApiProvider::Zai); + assert_eq!(route.model, crate::config::ZAI_GLM_5_2_MODEL); + assert_eq!(route.reasoning_effort, Some(ReasoningEffort::Max)); + + assert!( + parse_inventory_auto_route_recommendation( + r#"{"provider":"zai","model":"deepseek-v4-pro","thinking":"max"}"#, + &inventory, + ) + .is_none(), + "router must not pair a DeepSeek model with the Z.ai provider" + ); + } + #[test] fn auto_heuristic_default_routes_implement_to_pro() { assert_eq!( diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 81851973..b3f1cfd1 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -765,6 +765,16 @@ pub(crate) fn render_runtime_policy_reference() -> String { commands, git commits, or sub-agent launches. End the turn and wait \ for the user's next message.\n\n", ); + out.push_str( + "If your previous assistant message asked the user a blocking choice \ + question (for example, \"How do you want me to proceed?\" with \ + mutually exclusive options), treat the run as paused until the user \ + answers. Stale tool output, stale sub-agent completion events, or the \ + runtime tag alone do not override that pause. If a question is \ + informational and you intend to continue without waiting, say so \ + explicitly in the same message (for example, \"I am going to keep \ + moving unless you redirect me\").\n\n", + ); // ── Mode reference ───────────────────────────────────────────────── out.push_str("### Modes\n\n"); @@ -1735,6 +1745,13 @@ mod tests { && text.contains("wait for the user's next message"), "Runtime Policy Reference must pin the #3061 runtime-prompt-only guard" ); + assert!( + text.contains("blocking choice question") + && text.contains("treat the run as paused") + && text.contains("stale sub-agent completion events") + && text.contains("I am going to keep moving unless you redirect me"), + "Runtime Policy Reference must tell agents to stop after asking a blocking question" + ); assert!( text.contains("### Modes"), "Runtime Policy Reference must contain the Modes section"