diff --git a/crates/tui/src/commands/config.rs b/crates/tui/src/commands/config.rs index 3e72c67c..4753de59 100644 --- a/crates/tui/src/commands/config.rs +++ b/crates/tui/src/commands/config.rs @@ -1,14 +1,19 @@ //! Config commands: config, settings, mode switches, trust, logout use std::path::{Path, PathBuf}; +use std::time::Duration; use super::CommandResult; +use crate::client::DeepSeekClient; use crate::config::{COMMON_DEEPSEEK_MODELS, clear_api_key, normalize_model_name}; use crate::config_ui::{ConfigUiMode, parse_mode}; +use crate::llm_client::LlmClient; use crate::localization::resolve_locale; +use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt}; use crate::settings::Settings; -use crate::tui::app::{App, AppAction, AppMode, OnboardingState, SidebarFocus}; +use crate::tui::app::{App, AppAction, AppMode, OnboardingState, ReasoningEffort, SidebarFocus}; use crate::tui::approval::ApprovalMode; +use anyhow::Result; /// Open the interactive config editor. /// @@ -91,7 +96,13 @@ fn show_single_setting(app: &App, key: &str) -> CommandResult { let value = match key.as_str() { "model" => { if app.auto_model { - Some("auto (auto-select by request complexity)".to_string()) + let mut label = "auto (auto-select model per turn)".to_string(); + if let Some(effective) = app.last_effective_model.as_deref() + && effective != "auto" + { + label.push_str(&format!("; last: {effective}")); + } + Some(label) } else { Some(app.model.clone()) } @@ -247,16 +258,20 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) -> if value.trim().eq_ignore_ascii_case("auto") { app.auto_model = true; app.model = "auto".to_string(); + app.last_effective_model = None; + app.reasoning_effort = ReasoningEffort::Auto; + app.last_effective_reasoning_effort = None; app.update_model_compaction_budget(); app.session.last_prompt_tokens = None; app.session.last_completion_tokens = None; return CommandResult::with_message_and_action( - "model = auto (auto-select by request complexity)".to_string(), + "model = auto (auto-select model and thinking per turn)".to_string(), AppAction::UpdateCompaction(app.compaction_config()), ); } // Clear auto mode when a specific model is set app.auto_model = false; + app.last_effective_model = None; let Some(model) = normalize_model_name(value) else { return CommandResult::error(format!( "Invalid model '{value}'. Expected a DeepSeek model ID. Common models: {}", @@ -385,7 +400,13 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) -> } "default_model" => { if let Some(ref model) = settings.default_model { + app.auto_model = model.trim().eq_ignore_ascii_case("auto"); app.model.clone_from(model); + app.last_effective_model = None; + if app.auto_model { + app.reasoning_effort = ReasoningEffort::Auto; + app.last_effective_reasoning_effort = None; + } app.update_model_compaction_budget(); app.session.last_prompt_tokens = None; app.session.last_completion_tokens = None; @@ -604,14 +625,6 @@ fn expand_tilde(raw: &str) -> String { /// Default → Flash (cost savings). pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String { let len = input.chars().count(); - // Short messages → Flash - if len < 100 { - return "deepseek-v4-flash".to_string(); - } - // Long complex requests → Pro - if len > 500 { - return "deepseek-v4-pro".to_string(); - } let lower = input.to_lowercase(); let complex_keywords = [ "refactor", @@ -630,10 +643,249 @@ pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String { if complex_keywords.iter().any(|kw| lower.contains(kw)) { return "deepseek-v4-pro".to_string(); } + // Short messages → Flash + if len < 100 { + return "deepseek-v4-flash".to_string(); + } + // Long complex requests → Pro + if len > 500 { + return "deepseek-v4-pro".to_string(); + } // Default to Flash for cost savings "deepseek-v4-flash".to_string() } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AutoRouteRecommendation { + pub model: String, + pub reasoning_effort: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum AutoRouteSource { + FlashRouter, + Heuristic, +} + +impl AutoRouteSource { + #[must_use] + pub fn label(self) -> &'static str { + match self { + AutoRouteSource::FlashRouter => "flash-router", + AutoRouteSource::Heuristic => "heuristic", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct AutoRouteSelection { + pub model: String, + pub reasoning_effort: Option, + pub source: AutoRouteSource, +} + +pub const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\ +You are the DeepSeek TUI auto-routing classifier. Return only compact JSON: \ +{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \ +Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \ +Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \ +tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \ +Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \ +agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work."; + +/// Parse the Flash router's JSON-only response. +/// +/// The runtime treats classifier output as untrusted: only known V4 model IDs +/// and supported reasoning tiers are accepted. Anything else falls back to the +/// deterministic heuristic. +pub fn parse_auto_route_recommendation(raw: &str) -> Option { + let json = extract_first_json_object(raw)?; + let value: serde_json::Value = serde_json::from_str(json).ok()?; + let model = value.get("model").and_then(serde_json::Value::as_str)?; + let model = normalize_auto_route_model(model)?; + let reasoning_effort = value + .get("thinking") + .or_else(|| value.get("reasoning_effort")) + .or_else(|| value.get("effort")) + .and_then(serde_json::Value::as_str) + .and_then(parse_auto_route_reasoning_effort); + + Some(AutoRouteRecommendation { + model: model.to_string(), + reasoning_effort, + }) +} + +fn extract_first_json_object(raw: &str) -> Option<&str> { + let start = raw.find('{')?; + let end = raw.rfind('}')?; + (end >= start).then_some(&raw[start..=end]) +} + +fn normalize_auto_route_model(model: &str) -> Option<&'static str> { + match model.trim().to_ascii_lowercase().as_str() { + "deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"), + "deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"), + _ => None, + } +} + +fn parse_auto_route_reasoning_effort(effort: &str) -> Option { + match effort.trim().to_ascii_lowercase().as_str() { + "off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off), + "low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High), + "high" => Some(ReasoningEffort::High), + "max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max), + _ => None, + } +} + +#[must_use] +pub fn normalize_auto_route_effort(effort: ReasoningEffort) -> ReasoningEffort { + match effort { + ReasoningEffort::Low | ReasoningEffort::Medium => ReasoningEffort::High, + other => other, + } +} + +pub async fn resolve_auto_route_with_flash( + config: &crate::config::Config, + latest_request: &str, + recent_context: &str, + selected_model_mode: &str, + selected_thinking_mode: &str, +) -> AutoRouteSelection { + match auto_route_flash_recommendation( + config, + latest_request, + recent_context, + selected_model_mode, + selected_thinking_mode, + ) + .await + { + Ok(Some(recommendation)) => AutoRouteSelection { + model: recommendation.model, + reasoning_effort: recommendation.reasoning_effort, + source: AutoRouteSource::FlashRouter, + }, + Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode), + } +} + +fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection { + AutoRouteSelection { + model: auto_model_heuristic(latest_request, selected_model_mode), + reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select( + false, + latest_request, + ))), + source: AutoRouteSource::Heuristic, + } +} + +async fn auto_route_flash_recommendation( + config: &crate::config::Config, + latest_request: &str, + recent_context: &str, + selected_model_mode: &str, + selected_thinking_mode: &str, +) -> Result> { + if cfg!(test) { + return Ok(None); + } + + let client = DeepSeekClient::new(config)?; + let request = MessageRequest { + model: "deepseek-v4-flash".to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: auto_route_prompt( + latest_request, + recent_context, + selected_model_mode, + selected_thinking_mode, + ), + cache_control: None, + }], + }], + max_tokens: 96, + system: Some(SystemPrompt::Text( + AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(), + )), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: Some("off".to_string()), + stream: Some(false), + temperature: Some(0.0), + top_p: None, + }; + + let response = + tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??; + Ok(parse_auto_route_recommendation(&message_response_text( + &response, + ))) +} + +fn auto_route_prompt( + latest_request: &str, + recent_context: &str, + selected_model_mode: &str, + selected_thinking_mode: &str, +) -> String { + format!( + "Session mode: agent\nSelected model mode: {}\nSelected thinking mode: {}\n\nRecent context:\n{}\n\nLatest user request:\n{}\n\nReturn JSON only.", + selected_model_mode, + selected_thinking_mode, + if recent_context.trim().is_empty() { + "No prior context." + } else { + recent_context + }, + truncate_for_auto_router(latest_request, 4_000) + ) +} + +fn message_response_text(response: &MessageResponse) -> String { + let mut out = String::new(); + for block in &response.content { + match block { + ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => { + append_router_text(&mut out, text); + } + ContentBlock::Thinking { thinking } => { + append_router_text(&mut out, thinking); + } + ContentBlock::ToolUse { name, .. } => { + append_router_text(&mut out, &format!("[tool call: {name}]")); + } + _ => {} + } + } + out +} + +fn append_router_text(out: &mut String, text: &str) { + if !out.is_empty() { + out.push('\n'); + } + out.push_str(text); +} + +fn truncate_for_auto_router(text: &str, max_chars: usize) -> String { + let mut chars = text.chars(); + let truncated: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + format!("{truncated}...") + } else { + truncated + } +} + /// Toggle LSP diagnostics on/off or show status. /// /// - `/lsp on` — enable inline LSP diagnostics @@ -864,6 +1116,21 @@ mod tests { )); } + #[test] + fn test_set_model_auto_enables_auto_thinking() { + let mut app = create_test_app(); + app.reasoning_effort = ReasoningEffort::Off; + + let result = set_config(&mut app, Some("model auto")); + + assert!(result.message.is_some()); + assert!(app.auto_model); + assert_eq!(app.model, "auto"); + assert_eq!(app.reasoning_effort, ReasoningEffort::Auto); + assert!(app.last_effective_model.is_none()); + assert!(app.last_effective_reasoning_effort.is_none()); + } + #[test] fn test_set_model_accepts_future_deepseek_model_id() { let mut app = create_test_app(); @@ -883,6 +1150,45 @@ mod tests { assert_eq!(app.model, "deepseek-v4-flash"); } + #[test] + fn auto_route_recommendation_parses_strict_json() { + let rec = + parse_auto_route_recommendation(r#"{"model":"deepseek-v4-pro","thinking":"max"}"#) + .expect("valid router response should parse"); + + assert_eq!(rec.model, "deepseek-v4-pro"); + assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max)); + } + + #[test] + fn auto_route_recommendation_accepts_wrapped_json_aliases() { + let rec = + parse_auto_route_recommendation(r#"route: {"model":"flash","reasoning_effort":"off"}"#) + .expect("wrapped router response should parse"); + + assert_eq!(rec.model, "deepseek-v4-flash"); + assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Off)); + } + + #[test] + fn auto_route_recommendation_normalizes_legacy_low_medium_to_high() { + let rec = parse_auto_route_recommendation( + r#"{"model":"deepseek-v4-pro","reasoning_effort":"medium"}"#, + ) + .expect("medium should parse for back-compat"); + + assert_eq!(rec.model, "deepseek-v4-pro"); + assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::High)); + } + + #[test] + fn auto_route_recommendation_rejects_unknown_model() { + assert!( + parse_auto_route_recommendation(r#"{"model":"some-other-model","thinking":"max"}"#,) + .is_none() + ); + } + #[test] fn test_set_default_mode_normal_save_reports_normalized_value() { let _lock = lock_test_env(); diff --git a/crates/tui/src/commands/core.rs b/crates/tui/src/commands/core.rs index 9e93e3ad..4c2aea9a 100644 --- a/crates/tui/src/commands/core.rs +++ b/crates/tui/src/commands/core.rs @@ -4,7 +4,7 @@ use std::fmt::Write; use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name}; use crate::localization::{MessageId, tr}; -use crate::tui::app::{App, AppAction, AppMode}; +use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort}; use crate::tui::views::{HelpView, ModalKind, SubAgentsView}; use super::CommandResult; @@ -91,14 +91,33 @@ pub fn exit() -> CommandResult { /// way to flip both knobs without memorising the docs. pub fn model(app: &mut App, model_name: Option<&str>) -> CommandResult { if let Some(name) = model_name { + if name.trim().eq_ignore_ascii_case("auto") { + let old_model = app.model_display_label(); + app.auto_model = true; + app.model = "auto".to_string(); + app.last_effective_model = None; + app.reasoning_effort = ReasoningEffort::Auto; + app.last_effective_reasoning_effort = None; + app.update_model_compaction_budget(); + app.session.last_prompt_tokens = None; + app.session.last_completion_tokens = None; + return CommandResult::with_message_and_action( + tr(app.ui_locale, MessageId::ModelChanged) + .replace("{old}", &old_model) + .replace("{new}", "auto"), + AppAction::UpdateCompaction(app.compaction_config()), + ); + } let Some(model_id) = normalize_model_name(name) else { return CommandResult::error(format!( - "Invalid model '{name}'. Expected a DeepSeek model ID. Common models: {}", + "Invalid model '{name}'. Expected auto or a DeepSeek model ID. Common models: {}", COMMON_DEEPSEEK_MODELS.join(", ") )); }; - let old_model = app.model.clone(); + let old_model = app.model_display_label(); + app.auto_model = false; app.model = model_id.clone(); + app.last_effective_model = None; app.update_model_compaction_budget(); app.session.last_prompt_tokens = None; app.session.last_completion_tokens = None; @@ -427,6 +446,21 @@ mod tests { assert_eq!(app.session.last_completion_tokens, None); } + #[test] + fn test_model_auto_enables_auto_thinking() { + let mut app = create_test_app(); + app.reasoning_effort = ReasoningEffort::Off; + + let result = model(&mut app, Some("auto")); + + assert!(result.message.is_some()); + assert!(app.auto_model); + assert_eq!(app.model, "auto"); + assert_eq!(app.reasoning_effort, ReasoningEffort::Auto); + assert!(app.last_effective_model.is_none()); + assert!(app.last_effective_reasoning_effort.is_none()); + } + #[test] fn test_model_change_accepts_future_deepseek_model() { let mut app = create_test_app(); diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index 25cdea3e..b28be125 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -614,6 +614,11 @@ pub fn auto_model_heuristic(input: &str, current_model: &str) -> String { config::auto_model_heuristic(input, current_model) } +pub use config::{ + AutoRouteRecommendation, AutoRouteSelection, normalize_auto_route_effort, + parse_auto_route_recommendation, resolve_auto_route_with_flash, +}; + /// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from /// Zhang et al. (arXiv:2512.24601). /// diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 8cfb9a7b..83db0131 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -977,10 +977,11 @@ impl Config { } } if let Some(model) = self.default_text_model.as_deref() + && !model.trim().eq_ignore_ascii_case("auto") && normalize_model_name(model).is_none() { anyhow::bail!( - "Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)." + "Invalid default_text_model '{model}': expected auto or a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)." ); } if let Some(policy) = self.approval_policy.as_deref() { @@ -1095,6 +1096,11 @@ impl Config { { return normalized; } + if let Some(model) = self.default_text_model.as_deref() + && model.trim().eq_ignore_ascii_case("auto") + { + return "auto".to_string(); + } if let Some(model) = self.default_text_model.as_deref() && let Some(normalized) = normalize_model_name(model) { @@ -3420,6 +3426,17 @@ api_key = "old-openrouter-key" Ok(()) } + #[test] + fn validate_accepts_auto_default_text_model() -> Result<()> { + let config = Config { + default_text_model: Some("auto".to_string()), + ..Default::default() + }; + config.validate()?; + assert_eq!(config.default_model(), "auto"); + Ok(()) + } + #[test] fn deepseek_model_env_overrides_default_text_model() -> Result<()> { let _lock = lock_test_env(); diff --git a/crates/tui/src/config_ui.rs b/crates/tui/src/config_ui.rs index 4472bcd3..b9d84913 100644 --- a/crates/tui/src/config_ui.rs +++ b/crates/tui/src/config_ui.rs @@ -539,7 +539,9 @@ pub fn open_browser(url: &str) -> Result<()> { } fn validate_document(doc: &ConfigUiDocument) -> Result<()> { - if normalize_model_name(&doc.runtime.model).is_none() { + if !doc.runtime.model.trim().eq_ignore_ascii_case("auto") + && normalize_model_name(&doc.runtime.model).is_none() + { bail!("invalid model '{}'", doc.runtime.model); } if doc.config.mcp_config_path.trim().is_empty() { @@ -557,6 +559,7 @@ fn reload_runtime_config(app: &mut App, config: &mut Config) -> Result<()> { .reasoning_effort() .unwrap_or_else(|| app.reasoning_effort.as_setting()), ); + app.last_effective_reasoning_effort = None; app.update_model_compaction_budget(); app.mcp_config_path = reloaded.mcp_config_path(); app.skills_dir = reloaded.skills_dir(); @@ -584,6 +587,7 @@ fn apply_reasoning_effort( ) -> Result<()> { let effort: ReasoningEffort = value.into(); app.reasoning_effort = effort; + app.last_effective_reasoning_effort = None; app.update_model_compaction_budget(); if persist { commands::persist_root_string_key("reasoning_effort", effort.as_setting())?; @@ -844,7 +848,10 @@ mod tests { #[test] fn build_document_reflects_app_state() { - let app = app(); + let mut app = app(); + app.auto_model = false; + app.model = "deepseek-v4-pro".to_string(); + app.reasoning_effort = ReasoningEffort::Max; let config = Config::default(); let doc = build_document(&app, &config).expect("document"); assert_eq!(doc.runtime.model, app.model); diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index f72c6c40..0b4c645a 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -48,6 +48,7 @@ use crate::tools::spec::RuntimeToolServices; use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult}; use crate::tools::subagent::{ Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager, + resolve_subagent_assignment_route, }; use crate::tools::todo::{SharedTodoList, new_shared_todo_list}; use crate::tools::user_input::{UserInputRequest, UserInputResponse}; @@ -515,6 +516,8 @@ impl Engine { model, goal_objective, reasoning_effort, + reasoning_effort_auto, + auto_model, allow_shell, trust_mode, auto_approve, @@ -525,6 +528,8 @@ impl Engine { model, goal_objective, reasoning_effort, + reasoning_effort_auto, + auto_model, allow_shell, trust_mode, auto_approve, @@ -564,7 +569,7 @@ impl Engine { continue; }; - let runtime = SubAgentRuntime::new( + let mut runtime = SubAgentRuntime::new( client, self.session.model.clone(), // Sub-agents don't inherit YOLO mode - use Agent mode defaults @@ -574,8 +579,17 @@ impl Engine { Arc::clone(&self.subagent_manager), ) .with_role_models(self.config.subagent_model_overrides.clone()) + .with_auto_model(self.session.auto_model) + .with_reasoning_effort( + self.session.reasoning_effort.clone(), + self.session.reasoning_effort_auto, + ) .with_max_spawn_depth(self.config.max_spawn_depth) .background_runtime(); + let route = resolve_subagent_assignment_route(&runtime, None, &prompt).await; + runtime.model = route.model; + runtime.reasoning_effort = route.reasoning_effort; + runtime.reasoning_effort_auto = false; let result = { let mut manager = self.subagent_manager.write().await; @@ -623,6 +637,7 @@ impl Engine { .await; } Op::SetModel { model } => { + self.session.auto_model = model.trim().eq_ignore_ascii_case("auto"); self.session.model = model; self.config.model.clone_from(&self.session.model); let _ = self @@ -654,6 +669,7 @@ impl Engine { self.session.compaction_summary_prompt = extract_compaction_summary_prompt(system_prompt.clone()); self.session.system_prompt = system_prompt; + self.session.auto_model = model.trim().eq_ignore_ascii_case("auto"); self.session.model = model; self.session.workspace = workspace.clone(); self.config.model.clone_from(&self.session.model); @@ -709,6 +725,8 @@ impl Engine { self.session.model.clone(), self.config.goal_objective.clone(), self.session.reasoning_effort.clone(), + self.session.reasoning_effort_auto, + self.session.auto_model, self.session.allow_shell, self.session.trust_mode, self.session.auto_approve, @@ -758,6 +776,8 @@ impl Engine { model: String, goal_objective: Option, reasoning_effort: Option, + reasoning_effort_auto: bool, + auto_model: bool, allow_shell: bool, trust_mode: bool, auto_approve: bool, @@ -838,6 +858,8 @@ impl Engine { self.config.model.clone_from(&self.session.model); self.config.goal_objective = goal_objective; self.session.reasoning_effort = reasoning_effort; + self.session.reasoning_effort_auto = reasoning_effort_auto; + self.session.auto_model = auto_model; self.session.allow_shell = allow_shell; self.config.allow_shell = allow_shell; self.session.trust_mode = trust_mode; @@ -900,6 +922,11 @@ impl Engine { Arc::clone(&self.subagent_manager), ) .with_role_models(self.config.subagent_model_overrides.clone()) + .with_auto_model(self.session.auto_model) + .with_reasoning_effort( + self.session.reasoning_effort.clone(), + self.session.reasoning_effort_auto, + ) .with_max_spawn_depth(self.config.max_spawn_depth); if let Some((mailbox, cancel_token)) = mailbox_for_runtime.as_ref() { rt = rt diff --git a/crates/tui/src/core/ops.rs b/crates/tui/src/core/ops.rs index 1fed70b2..e05bf6fa 100644 --- a/crates/tui/src/core/ops.rs +++ b/crates/tui/src/core/ops.rs @@ -20,6 +20,11 @@ pub enum Op { /// Reasoning-effort tier: `"off" | "low" | "medium" | "high" | "max"`. /// `None` lets the provider apply its default. reasoning_effort: Option, + /// True when the user selected auto thinking, even though the UI sends + /// a concrete per-turn value to the model API. + reasoning_effort_auto: bool, + /// True when the user selected auto model routing. + auto_model: bool, allow_shell: bool, trust_mode: bool, auto_approve: bool, @@ -89,27 +94,3 @@ pub enum Op { /// Shutdown the engine Shutdown, } - -impl Op { - /// Create a send message operation - pub fn send( - content: impl Into, - mode: AppMode, - model: impl Into, - reasoning_effort: Option, - allow_shell: bool, - trust_mode: bool, - auto_approve: bool, - ) -> Self { - Op::SendMessage { - content: content.into(), - mode, - model: model.into(), - goal_objective: None, - reasoning_effort, - allow_shell, - trust_mode, - auto_approve, - } - } -} diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index 347b1fa5..9bbb61be 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -19,6 +19,11 @@ pub struct Session { /// `"off" | "low" | "medium" | "high" | "max"`. `None` lets the provider /// apply its own defaults. pub reasoning_effort: Option, + /// Whether the user selected automatic reasoning effort. + pub reasoning_effort_auto: bool, + + /// Whether the user selected automatic model routing. + pub auto_model: bool, /// Workspace directory pub workspace: PathBuf, @@ -118,6 +123,8 @@ impl Session { Self { model, reasoning_effort: None, + reasoning_effort_auto: false, + auto_model: false, workspace, system_prompt: None, compaction_summary_prompt: None, diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index e25ce06b..c5978bf8 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -2504,6 +2504,11 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> { .model .or_else(|| config.default_text_model.clone()) .unwrap_or_else(|| config.default_model()); + let route = resolve_cli_auto_route(config, &model, &diff).await; + let model = route.model; + let reasoning_effort = route + .reasoning_effort + .map(|effort| effort.as_setting().to_string()); let system = SystemPrompt::Text( "You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \ @@ -2529,7 +2534,7 @@ Provide findings ordered by severity with file references, then open questions, tool_choice: None, metadata: None, thinking: None, - reasoning_effort: None, + reasoning_effort, stream: Some(false), temperature: Some(0.2), top_p: Some(0.9), @@ -3620,14 +3625,42 @@ async fn run_interactive( .await } +struct CliAutoRoute { + model: String, + reasoning_effort: Option, + auto_model: bool, +} + +async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute { + if model.trim().eq_ignore_ascii_case("auto") { + let selection = + commands::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await; + CliAutoRoute { + model: selection.model, + reasoning_effort: selection.reasoning_effort, + auto_model: true, + } + } else { + CliAutoRoute { + model: model.to_string(), + reasoning_effort: None, + auto_model: false, + } + } +} + async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> { use crate::client::DeepSeekClient; use crate::models::{ContentBlock, Message, MessageRequest}; let client = DeepSeekClient::new(config)?; + let route = resolve_cli_auto_route(config, model, prompt).await; + let reasoning_effort = route + .reasoning_effort + .map(|effort| effort.as_setting().to_string()); let request = MessageRequest { - model: model.to_string(), + model: route.model, messages: vec![Message { role: "user".to_string(), content: vec![ContentBlock::Text { @@ -3641,7 +3674,7 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> tool_choice: None, metadata: None, thinking: None, - reasoning_effort: None, + reasoning_effort, stream: Some(false), temperature: None, top_p: None, @@ -3663,8 +3696,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt}; let client = DeepSeekClient::new(config)?; + let route = resolve_cli_auto_route(config, model, prompt).await; + let model = route.model; + let reasoning_effort = route + .reasoning_effort + .map(|effort| effort.as_setting().to_string()); let request = MessageRequest { - model: model.to_string(), + model: model.clone(), messages: vec![Message { role: "user".to_string(), content: vec![ContentBlock::Text { @@ -3680,7 +3718,7 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result tool_choice: None, metadata: None, thinking: None, - reasoning_effort: None, + reasoning_effort, stream: Some(false), temperature: Some(0.2), top_p: Some(0.9), @@ -3725,6 +3763,13 @@ async fn run_exec_agent( use crate::tools::todo::new_shared_todo_list; use crate::tui::app::AppMode; + let route = resolve_cli_auto_route(config, model, prompt).await; + let auto_model = route.auto_model; + let effective_model = route.model; + let effective_reasoning_effort = route + .reasoning_effort + .map(|effort| effort.as_setting().to_string()); + // Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle // architecture (issue #124) handles long-context resets via fresh contexts // rather than progressive summarization. The compaction config is still @@ -3732,8 +3777,8 @@ async fn run_exec_agent( // or direct engine config keep their old behavior. let compaction = CompactionConfig { enabled: false, - model: model.to_string(), - token_threshold: compaction_threshold_for_model(model), + model: effective_model.clone(), + token_threshold: compaction_threshold_for_model(&effective_model), ..Default::default() }; @@ -3747,7 +3792,7 @@ async fn run_exec_agent( .map(crate::config::LspConfigToml::into_runtime); let engine_config = EngineConfig { - model: model.to_string(), + model: effective_model.clone(), workspace: workspace.clone(), allow_shell: auto_approve || config.allow_shell(), trust_mode, @@ -3784,15 +3829,18 @@ async fn run_exec_agent( }; engine_handle - .send(Op::send( - prompt, + .send(Op::SendMessage { + content: prompt.to_string(), mode, - model, - None, - auto_approve || config.allow_shell(), + model: effective_model.clone(), + goal_objective: None, + reasoning_effort: effective_reasoning_effort, + reasoning_effort_auto: auto_model, + auto_model, + allow_shell: auto_approve || config.allow_shell(), trust_mode, auto_approve, - )) + }) .await?; #[derive(serde::Serialize)] @@ -3813,7 +3861,7 @@ async fn run_exec_agent( } let mut summary = ExecSummary { mode: "agent".to_string(), - model: model.to_string(), + model: effective_model, prompt: prompt.to_string(), ..ExecSummary::default() }; diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 0ad99f45..8c0f330e 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -1457,21 +1457,43 @@ impl RuntimeThreadManager { } let mode = parse_mode(req.mode.as_deref().unwrap_or(&thread.mode)); - let model = req.model.unwrap_or_else(|| thread.model.clone()); + let requested_model = req.model.unwrap_or_else(|| thread.model.clone()); + let auto_model = requested_model.trim().eq_ignore_ascii_case("auto"); + let (model, reasoning_effort) = if auto_model { + let selection = crate::commands::resolve_auto_route_with_flash( + &self.config, + &prompt, + "", + "auto", + "auto", + ) + .await; + ( + selection.model, + selection + .reasoning_effort + .map(|effort| effort.as_setting().to_string()), + ) + } else { + (requested_model, None) + }; let allow_shell = req.allow_shell.unwrap_or(thread.allow_shell); let trust_mode = req.trust_mode.unwrap_or(thread.trust_mode); let auto_approve = req.auto_approve.unwrap_or(thread.auto_approve); engine - .send(Op::send( - prompt, + .send(Op::SendMessage { + content: prompt, mode, - model.clone(), - None, + model: model.clone(), + goal_objective: None, + reasoning_effort, + reasoning_effort_auto: auto_model, + auto_model, allow_shell, trust_mode, auto_approve, - )) + }) .await .map_err(|e| anyhow!("Failed to start turn: {e}"))?; diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index b9bf6229..285b22c0 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -284,7 +284,7 @@ impl Settings { s.locale = normalize_configured_locale(&s.locale) .unwrap_or("en") .to_string(); - s.default_model = s.default_model.as_deref().and_then(normalize_model_name); + s.default_model = s.default_model.as_deref().and_then(normalize_default_model); s }; settings.apply_env_overrides(); @@ -444,9 +444,9 @@ impl Settings { return Ok(()); } - let Some(model) = normalize_model_name(trimmed) else { + let Some(model) = normalize_default_model(trimmed) else { anyhow::bail!( - "Failed to update setting: invalid model '{value}'. Expected: a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default." + "Failed to update setting: invalid model '{value}'. Expected: auto, a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default." ); }; self.default_model = Some(model); @@ -549,12 +549,21 @@ impl Settings { ("max_history", "Max input history entries"), ( "default_model", - "Default model: any DeepSeek model ID (e.g. deepseek-v4-pro)", + "Default model: auto or any DeepSeek model ID (e.g. deepseek-v4-pro)", ), ] } } +fn normalize_default_model(value: &str) -> Option { + let trimmed = value.trim(); + if trimmed.eq_ignore_ascii_case("auto") { + Some("auto".to_string()) + } else { + normalize_model_name(trimmed) + } +} + /// Parse a boolean value from various formats fn parse_bool(value: &str) -> Result { match value.to_lowercase().as_str() { diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 89f2dd53..0e49385d 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -557,6 +557,9 @@ pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3; pub struct SubAgentRuntime { pub client: DeepSeekClient, pub model: String, + pub auto_model: bool, + pub reasoning_effort: Option, + pub reasoning_effort_auto: bool, pub role_models: HashMap, pub context: ToolContext, pub allow_shell: bool, @@ -597,6 +600,9 @@ impl SubAgentRuntime { Self { client, model, + auto_model: false, + reasoning_effort: None, + reasoning_effort_auto: false, role_models: HashMap::new(), context, allow_shell, @@ -646,6 +652,27 @@ impl SubAgentRuntime { self } + /// Preserve whether the parent session is using per-turn model routing. + #[must_use] + pub fn with_auto_model(mut self, auto_model: bool) -> Self { + self.auto_model = auto_model; + self + } + + /// Preserve the parent's thinking configuration. `reasoning_effort_auto` + /// stays true even when the parent turn itself was sent with a concrete + /// flash-router recommendation, so children can resolve their own tier. + #[must_use] + pub fn with_reasoning_effort( + mut self, + reasoning_effort: Option, + reasoning_effort_auto: bool, + ) -> Self { + self.reasoning_effort = reasoning_effort; + self.reasoning_effort_auto = reasoning_effort_auto; + self + } + /// Return a child runtime that is deliberately detached from the parent /// turn cancellation token. Background sub-agents should keep running when /// the parent turn is cancelled; explicit agent cancellation still @@ -675,6 +702,9 @@ impl SubAgentRuntime { Self { client: self.client.clone(), model: self.model.clone(), + auto_model: self.auto_model, + reasoning_effort: self.reasoning_effort.clone(), + reasoning_effort_auto: self.reasoning_effort_auto, role_models: self.role_models.clone(), context: child_context, allow_shell: self.allow_shell, @@ -1621,16 +1651,14 @@ impl ToolSpec for AgentSpawnTool { if let Some(cwd) = validated_cwd { child_runtime.context.workspace = cwd; } - let effective_model = match spawn_request.model.clone() { - Some(model) => model, + let configured_model = match spawn_request.model.clone() { + Some(model) => Some(model), None => configured_model_for_role_or_type( &self.runtime, spawn_request.assignment.role.as_deref(), &spawn_request.agent_type, - )? - .unwrap_or_else(|| self.runtime.model.clone()), + )?, }; - child_runtime.model = effective_model.clone(); // Cache-aware resident mode (#529): prepend file contents to the prompt // so the child's prefix is byte-stable for DeepSeek prefix caching. @@ -1666,6 +1694,14 @@ impl ToolSpec for AgentSpawnTool { (spawn_request.prompt, None) }; + let route = + resolve_subagent_assignment_route(&self.runtime, configured_model, &effective_prompt) + .await; + child_runtime.model = route.model.clone(); + child_runtime.reasoning_effort = route.reasoning_effort.clone(); + child_runtime.reasoning_effort_auto = false; + let effective_model = route.model; + let mut manager = self.manager.write().await; let result = manager @@ -2743,7 +2779,7 @@ async fn run_subagent( tool_choice: Some(json!({ "type": "auto" })), metadata: None, thinking: None, - reasoning_effort: None, + reasoning_effort: runtime.reasoning_effort.clone(), stream: Some(false), temperature: None, top_p: None, @@ -3272,6 +3308,167 @@ pub(crate) fn configured_model_for_role_or_type( Ok(None) } +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct SubAgentResolvedRoute { + pub(crate) model: String, + pub(crate) reasoning_effort: Option, +} + +pub(crate) async fn resolve_subagent_assignment_route( + runtime: &SubAgentRuntime, + configured_model: Option, + prompt: &str, +) -> SubAgentResolvedRoute { + let explicit_model = configured_model.is_some(); + let mut route = fallback_subagent_assignment_route(runtime, configured_model, prompt); + + if (runtime.auto_model || runtime.reasoning_effort_auto) + && let Ok(Some(recommendation)) = subagent_flash_router(runtime, prompt).await + { + if runtime.auto_model && !explicit_model { + route.model = recommendation.model; + } + if runtime.reasoning_effort_auto { + route.reasoning_effort = recommendation + .reasoning_effort + .map(|effort| effort.as_setting().to_string()) + .or(route.reasoning_effort); + } + } + + route +} + +fn fallback_subagent_assignment_route( + runtime: &SubAgentRuntime, + configured_model: Option, + prompt: &str, +) -> SubAgentResolvedRoute { + let model = if let Some(model) = configured_model { + model + } else if runtime.auto_model { + crate::commands::auto_model_heuristic(prompt, &runtime.model) + } else { + runtime.model.clone() + }; + + let reasoning_effort = if runtime.reasoning_effort_auto { + let effort = match crate::auto_reasoning::select(false, prompt) { + crate::tui::app::ReasoningEffort::Low | crate::tui::app::ReasoningEffort::Medium => { + crate::tui::app::ReasoningEffort::High + } + other => other, + }; + Some(effort.as_setting().to_string()) + } else { + runtime.reasoning_effort.clone() + }; + + SubAgentResolvedRoute { + model, + reasoning_effort, + } +} + +async fn subagent_flash_router( + runtime: &SubAgentRuntime, + prompt: &str, +) -> Result> { + if cfg!(test) { + return Ok(None); + } + + let request = MessageRequest { + model: "deepseek-v4-flash".to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: subagent_router_prompt(runtime, prompt), + cache_control: None, + }], + }], + max_tokens: 96, + system: Some(SystemPrompt::Text( + SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(), + )), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: Some("off".to_string()), + stream: Some(false), + temperature: Some(0.0), + top_p: None, + }; + + let response = tokio::time::timeout( + Duration::from_secs(4), + runtime.client.create_message(request), + ) + .await??; + Ok(crate::commands::parse_auto_route_recommendation( + &message_response_text(&response.content), + )) +} + +const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\ +You are the DeepSeek TUI sub-agent routing manager. Return only compact JSON: \ +{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \ +Treat each child assignment like a customer request entering a team queue: decide the least \ +sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \ +important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \ +Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \ +high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \ +off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \ +multi-step, uncertain, or tool-heavy work."; + +fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String { + format!( + "Parent selected model mode: {}\nParent selected thinking mode: {}\n\nSub-agent assignment:\n{}\n\nReturn JSON only.", + if runtime.auto_model { "auto" } else { "fixed" }, + if runtime.reasoning_effort_auto { + "auto" + } else { + runtime + .reasoning_effort + .as_deref() + .unwrap_or("provider-default") + }, + truncate_subagent_router_prompt(prompt, 4_000) + ) +} + +fn truncate_subagent_router_prompt(text: &str, max_chars: usize) -> String { + if text.chars().count() <= max_chars { + return text.to_string(); + } + let mut out = text.chars().take(max_chars).collect::(); + out.push_str("\n[truncated]"); + out +} + +fn message_response_text(blocks: &[ContentBlock]) -> String { + let mut out = String::new(); + for block in blocks { + match block { + ContentBlock::Text { text, .. } => { + if !out.is_empty() { + out.push('\n'); + } + out.push_str(text); + } + ContentBlock::Thinking { thinking } => { + if !out.is_empty() { + out.push('\n'); + } + out.push_str(thinking); + } + _ => {} + } + } + out +} + fn parse_optional_subagent_model(input: &Value, key: &str) -> Result, ToolError> { match input.get(key) { None | Some(Value::Null) => Ok(None), diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 443f08fa..2ad7cb94 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -398,6 +398,62 @@ fn test_build_assignment_prompt_includes_metadata() { assert!(prompt.contains("role: explorer")); } +#[test] +fn subagent_auto_model_routes_unconfigured_assignments() { + let runtime = stub_runtime().with_auto_model(true); + + assert_eq!( + fallback_subagent_assignment_route(&runtime, None, "implement the release fix").model, + "deepseek-v4-pro" + ); + assert_eq!( + fallback_subagent_assignment_route(&runtime, None, "say hello").model, + "deepseek-v4-flash" + ); +} + +#[test] +fn subagent_auto_route_respects_explicit_or_role_model() { + let runtime = stub_runtime().with_auto_model(true); + + assert_eq!( + fallback_subagent_assignment_route( + &runtime, + Some("deepseek-v4-flash".to_string()), + "implement the release fix" + ) + .model, + "deepseek-v4-flash" + ); +} + +#[test] +fn subagent_auto_reasoning_resolves_to_distinct_v4_tiers() { + let runtime = stub_runtime().with_reasoning_effort(Some("high".to_string()), true); + + assert_eq!( + fallback_subagent_assignment_route(&runtime, None, "quick lookup").reasoning_effort, + Some("high".to_string()) + ); + assert_eq!( + fallback_subagent_assignment_route(&runtime, None, "debug this release failure") + .reasoning_effort, + Some("max".to_string()) + ); +} + +#[test] +fn subagent_router_prompt_frames_assignment_as_auto_routing() { + let runtime = stub_runtime() + .with_auto_model(true) + .with_reasoning_effort(Some("high".to_string()), true); + let prompt = subagent_router_prompt(&runtime, "inspect one file"); + + assert!(prompt.contains("Parent selected model mode: auto")); + assert!(prompt.contains("Parent selected thinking mode: auto")); + assert!(prompt.contains("inspect one file")); +} + #[test] fn test_subagent_tool_registry_reports_unavailable_tools() { let tmp = tempdir().expect("tempdir"); @@ -1102,6 +1158,9 @@ fn stub_runtime() -> SubAgentRuntime { SubAgentRuntime { client: stub_client(), model: "deepseek-v4-flash".to_string(), + auto_model: false, + reasoning_effort: None, + reasoning_effort_auto: false, role_models: std::collections::HashMap::new(), context, allow_shell: true, diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 35b6117b..6313c719 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -9,7 +9,9 @@ use serde_json::Value; use thiserror::Error; use crate::compaction::CompactionConfig; -use crate::config::{ApiProvider, Config, SavedCredential, has_api_key, save_api_key}; +use crate::config::{ + ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key, +}; use crate::config_ui::ConfigUiMode; use crate::core::coherence::CoherenceState; use crate::cycle_manager::{CycleBriefing, CycleConfig}; @@ -628,6 +630,8 @@ pub struct App { /// `dispatch_user_message` calls `auto_model_heuristic` to resolve the /// effective model for each outbound message. pub auto_model: bool, + /// Last concrete model chosen while `auto_model` is active. + pub last_effective_model: Option, /// Current API provider (mirrors `Config::api_provider`). /// Updated by `/provider` switches so the UI/commands can read the /// active backend without re-deriving it from the live config. @@ -635,6 +639,8 @@ pub struct App { /// Current reasoning-effort tier for DeepSeek thinking mode. /// Cycled via Shift+Tab; initialized from config at startup. pub reasoning_effort: ReasoningEffort, + /// Last concrete thinking tier chosen while `reasoning_effort` is auto. + pub last_effective_reasoning_effort: Option, pub workspace: PathBuf, pub config_path: Option, pub config_profile: Option, @@ -1080,8 +1086,23 @@ impl App { let use_paste_burst_detection = settings.paste_burst_detection; let ui_theme = palette::UI_THEME; let model = settings.default_model.clone().unwrap_or(model); + let auto_model = model.trim().eq_ignore_ascii_case("auto"); + let threshold_model = if auto_model { + DEFAULT_TEXT_MODEL + } else { + model.as_str() + }; let compact_threshold = - compaction_threshold_for_model_and_effort(&model, config.reasoning_effort()); + compaction_threshold_for_model_and_effort(threshold_model, config.reasoning_effort()); + let reasoning_effort = if auto_model { + ReasoningEffort::Auto + } else { + config + .reasoning_effort() + .map_or_else(ReasoningEffort::default, |s| { + ReasoningEffort::from_setting(s) + }) + }; // Start in YOLO mode if --yolo flag was passed let preferred_mode = AppMode::from_setting(&settings.default_mode); @@ -1170,13 +1191,11 @@ impl App { sticky_status: None, last_status_message_seen: None, model, - auto_model: false, + auto_model, + last_effective_model: None, api_provider: provider, - reasoning_effort: config - .reasoning_effort() - .map_or_else(ReasoningEffort::default, |s| { - ReasoningEffort::from_setting(s) - }), + reasoning_effort, + last_effective_reasoning_effort: None, workspace, config_path, config_profile, @@ -1435,6 +1454,7 @@ impl App { /// `Off` → `High` → `Max` → `Off`. pub fn cycle_effort(&mut self) { self.reasoning_effort = self.reasoning_effort.cycle_next(); + self.last_effective_reasoning_effort = None; self.needs_redraw = true; self.push_status_toast( format!("Thinking: {}", self.reasoning_effort.short_label()), @@ -3464,10 +3484,42 @@ impl App { } pub fn update_model_compaction_budget(&mut self) { - self.compact_threshold = compaction_threshold_for_model_and_effort( - &self.model, - self.reasoning_effort.api_value(), - ); + let model = self.effective_model_for_budget().to_string(); + self.compact_threshold = + compaction_threshold_for_model_and_effort(&model, self.reasoning_effort.api_value()); + } + + pub fn effective_model_for_budget(&self) -> &str { + if self.auto_model { + return self + .last_effective_model + .as_deref() + .filter(|model| *model != "auto") + .unwrap_or(DEFAULT_TEXT_MODEL); + } + &self.model + } + + pub fn model_display_label(&self) -> String { + if self.auto_model { + if let Some(effective) = self.last_effective_model.as_deref() + && effective != "auto" + { + return format!("auto: {effective}"); + } + return "auto".to_string(); + } + self.model.clone() + } + + pub fn reasoning_effort_display_label(&self) -> String { + if self.auto_model || self.reasoning_effort == ReasoningEffort::Auto { + if let Some(effective) = self.last_effective_reasoning_effort { + return format!("auto: {}", effective.short_label()); + } + return "auto".to_string(); + } + self.reasoning_effort.short_label().to_string() } pub fn compaction_config(&self) -> CompactionConfig { diff --git a/crates/tui/src/tui/model_picker.rs b/crates/tui/src/tui/model_picker.rs index 21a08c98..b8ee32fe 100644 --- a/crates/tui/src/tui/model_picker.rs +++ b/crates/tui/src/tui/model_picker.rs @@ -34,6 +34,7 @@ use crate::tui::views::{ModalKind, ModalView, ViewAction, ViewEvent}; /// Models the picker exposes by default. Kept short on purpose — power /// users can still type `/model ` for anything else. const PICKER_MODELS: &[(&str, &str)] = &[ + ("auto", "select per turn"), ("deepseek-v4-pro", "flagship"), ("deepseek-v4-flash", "fast / cheap"), ]; @@ -41,6 +42,7 @@ const PICKER_MODELS: &[(&str, &str)] = &[ /// Thinking-effort rows shown in the picker, in the order DeepSeek /// behaviorally distinguishes them. const PICKER_EFFORTS: &[ReasoningEffort] = &[ + ReasoningEffort::Auto, ReasoningEffort::Off, ReasoningEffort::High, ReasoningEffort::Max, @@ -68,7 +70,11 @@ pub struct ModelPickerView { impl ModelPickerView { #[must_use] pub fn new(app: &App) -> Self { - let initial_model = app.model.clone(); + let initial_model = if app.auto_model { + "auto".to_string() + } else { + app.model.clone() + }; let mut selected_model_idx = PICKER_MODELS .iter() .position(|(id, _)| *id == initial_model); @@ -88,7 +94,7 @@ impl ModelPickerView { let selected_effort_idx = PICKER_EFFORTS .iter() .position(|e| *e == normalized) - .unwrap_or(1); // default to High if somehow unknown + .unwrap_or(2); // default to High if somehow unknown Self { initial_model, @@ -116,6 +122,9 @@ impl ModelPickerView { } fn resolved_effort(&self) -> ReasoningEffort { + if self.resolved_model().trim().eq_ignore_ascii_case("auto") { + return ReasoningEffort::Auto; + } PICKER_EFFORTS[self.selected_effort_idx] } @@ -318,6 +327,7 @@ impl ModalView for ModelPickerView { .map(|effort| { let label = effort.short_label().to_string(); let hint = match effort { + ReasoningEffort::Auto => "auto-select per turn".to_string(), ReasoningEffort::Off => "thinking disabled".to_string(), ReasoningEffort::High => "thinking enabled (default)".to_string(), ReasoningEffort::Max => "thinking enabled, max effort".to_string(), @@ -387,6 +397,37 @@ mod tests { assert_eq!(view.resolved_effort(), ReasoningEffort::Max); } + #[test] + fn picker_initial_selection_matches_auto_state() { + let mut app = create_test_app(); + app.model = "auto".to_string(); + app.auto_model = true; + app.reasoning_effort = ReasoningEffort::Auto; + + let view = ModelPickerView::new(&app); + + assert_eq!(view.resolved_model(), "auto"); + assert_eq!(view.resolved_effort(), ReasoningEffort::Auto); + } + + #[test] + fn picker_auto_model_forces_auto_effort_on_apply() { + let mut app = create_test_app(); + app.model = "auto".to_string(); + app.auto_model = true; + app.reasoning_effort = ReasoningEffort::Off; + + let mut view = ModelPickerView::new(&app); + view.selected_model_idx = 0; + view.selected_effort_idx = PICKER_EFFORTS + .iter() + .position(|effort| *effort == ReasoningEffort::Max) + .expect("max effort row"); + + assert_eq!(view.resolved_model(), "auto"); + assert_eq!(view.resolved_effort(), ReasoningEffort::Auto); + } + #[test] fn picker_normalizes_low_medium_to_high() { let mut app = create_test_app(); @@ -399,6 +440,21 @@ mod tests { ); } + #[test] + fn picker_exposes_auto_and_distinct_thinking_tiers() { + let model_labels: Vec<_> = PICKER_MODELS.iter().map(|(id, _)| *id).collect(); + assert_eq!( + model_labels, + vec!["auto", "deepseek-v4-pro", "deepseek-v4-flash"] + ); + + let effort_labels: Vec<_> = PICKER_EFFORTS + .iter() + .map(|effort| effort.as_setting()) + .collect(); + assert_eq!(effort_labels, vec!["auto", "off", "high", "max"]); + } + #[test] fn picker_preserves_unknown_model_via_custom_row() { let mut app = create_test_app(); @@ -429,7 +485,7 @@ mod tests { #[test] fn tab_switches_focus_and_arrow_now_moves_effort() { let mut app = create_test_app(); - // Default is Max (index 2 = last); pin to Off so the Down arrow has + // Default is Max; pin to Off so the Down arrow has // somewhere to go. app.reasoning_effort = ReasoningEffort::Off; let mut view = ModelPickerView::new(&app); @@ -490,11 +546,11 @@ mod tests { } #[test] - fn picker_only_exposes_off_high_max() { + fn picker_only_exposes_auto_off_high_max() { let labels: Vec<&str> = PICKER_EFFORTS .iter() .map(|effort| effort.short_label()) .collect(); - assert_eq!(labels, vec!["off", "high", "max"]); + assert_eq!(labels, vec!["auto", "off", "high", "max"]); } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 39f4850f..d7cfacec 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -85,8 +85,8 @@ use crate::tui::user_input::UserInputView; use super::active_cell::ActiveCell; use super::app::{ - App, AppAction, AppMode, OnboardingState, QueuedMessage, SidebarFocus, StatusToastLevel, - SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions, + App, AppAction, AppMode, OnboardingState, QueuedMessage, ReasoningEffort, SidebarFocus, + StatusToastLevel, SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions, }; use super::approval::{ ApprovalMode, ApprovalRequest, ApprovalView, ElevationRequest, ElevationView, ReviewDecision, @@ -924,8 +924,13 @@ async fn run_event_loop( } // Update session cost + let pricing_model = if app.auto_model { + app.last_effective_model.as_deref().unwrap_or(&app.model) + } else { + &app.model + }; let turn_cost = - crate::pricing::calculate_turn_cost_from_usage(&app.model, &usage); + crate::pricing::calculate_turn_cost_from_usage(pricing_model, &usage); if let Some(cost) = turn_cost { app.accrue_session_cost(cost); } @@ -1031,7 +1036,12 @@ async fn run_event_loop( } => { app.api_messages = messages; app.system_prompt = system_prompt; - app.model = model; + if app.auto_model { + app.last_effective_model = Some(model); + } else { + app.model = model; + app.last_effective_model = None; + } app.update_model_compaction_budget(); app.workspace = workspace; if (app.is_loading || app.is_compacting) @@ -1317,7 +1327,8 @@ async fn run_event_loop( } if let Some(next) = queued_to_send { - if let Err(err) = dispatch_user_message(app, &engine_handle, next.clone()).await { + if let Err(err) = dispatch_user_message(app, config, &engine_handle, next.clone()).await + { app.queue_message(next); app.status_message = Some(format!( "Dispatch failed ({err}); kept {} queued message(s)", @@ -2422,7 +2433,7 @@ async fn run_event_loop( app.close_slash_menu(); } if let Some(input) = app.submit_input() { - if handle_plan_choice(app, &engine_handle, &input).await? { + if handle_plan_choice(app, config, &engine_handle, &input).await? { continue; } // `# foo` quick-add (#492) — when memory is enabled, @@ -2473,7 +2484,7 @@ async fn run_event_loop( }) .await; } - submit_or_steer_message(app, &engine_handle, queued).await?; + submit_or_steer_message(app, config, &engine_handle, queued).await?; } } } @@ -3229,6 +3240,7 @@ fn queued_message_content_for_app( async fn dispatch_user_message( app: &mut App, + config: &Config, engine_handle: &EngineHandle, message: QueuedMessage, ) -> Result<()> { @@ -3300,21 +3312,61 @@ async fn dispatch_user_message( persistence_actor::persist(PersistRequest::Checkpoint(session)); } - // Resolve the effective model: when auto_model is active, use the - // heuristic to pick between Pro and Flash based on the user's input. + let auto_selection = if app.auto_model || app.reasoning_effort == ReasoningEffort::Auto { + Some(resolve_auto_model_selection(app, config, &message, &content).await) + } else { + None + }; + let effective_model = if app.auto_model { - commands::auto_model_heuristic(&message.display, &app.model) + auto_selection + .as_ref() + .map(|selection| selection.model.clone()) + .unwrap_or_else(|| commands::auto_model_heuristic(&message.display, &app.model)) } else { app.model.clone() }; + let auto_controls_reasoning = app.auto_model || app.reasoning_effort == ReasoningEffort::Auto; + let effective_reasoning_effort = if auto_controls_reasoning { + let effort = auto_selection + .as_ref() + .and_then(|selection| selection.reasoning_effort) + .unwrap_or_else(|| { + normalize_auto_routed_effort(crate::auto_reasoning::select(false, &message.display)) + }); + app.last_effective_reasoning_effort = Some(effort); + Some(effort.as_setting().to_string()) + } else { + app.last_effective_reasoning_effort = None; + app.reasoning_effort.api_value().map(str::to_string) + }; + + if let Some(selection) = auto_selection.as_ref() { + if app.auto_model { + app.last_effective_model = Some(effective_model.clone()); + let mut status = format!( + "Auto model selected: {effective_model} via {}", + selection.source.label() + ); + if let Some(effort) = app.last_effective_reasoning_effort { + status.push_str(&format!("; thinking auto: {}", effort.as_setting())); + } + app.status_message = Some(status); + } + } else { + app.last_effective_model = None; + } + if let Err(err) = engine_handle .send(Op::SendMessage { content, mode: app.mode, model: effective_model, goal_objective: app.goal.goal_objective.clone(), - reasoning_effort: app.reasoning_effort.api_value().map(str::to_string), + reasoning_effort: effective_reasoning_effort, + reasoning_effort_auto: auto_controls_reasoning, + auto_model: app.auto_model, allow_shell: app.allow_shell, trust_mode: app.trust_mode, auto_approve: app.mode == AppMode::Yolo, @@ -3329,6 +3381,95 @@ async fn dispatch_user_message( Ok(()) } +async fn resolve_auto_model_selection( + app: &App, + config: &Config, + message: &QueuedMessage, + latest_content: &str, +) -> commands::AutoRouteSelection { + let latest_request = if latest_content.trim().is_empty() { + message.display.as_str() + } else { + latest_content + }; + commands::resolve_auto_route_with_flash( + config, + latest_request, + &recent_auto_router_context(&app.api_messages), + if app.auto_model { "auto" } else { "fixed" }, + app.reasoning_effort.as_setting(), + ) + .await +} + +fn normalize_auto_routed_effort(effort: ReasoningEffort) -> ReasoningEffort { + commands::normalize_auto_route_effort(effort) +} + +fn recent_auto_router_context(messages: &[Message]) -> String { + let mut rows = Vec::new(); + for message in messages.iter().rev().skip(1) { + if rows.len() >= 6 { + break; + } + let text = content_blocks_text(&message.content); + let text = text.trim(); + if text.is_empty() { + continue; + } + rows.push(format!( + "{}: {}", + message.role, + truncate_for_auto_router(text, 900) + )); + } + rows.reverse(); + if rows.is_empty() { + "No prior context.".to_string() + } else { + rows.join("\n") + } +} + +fn content_blocks_text(blocks: &[ContentBlock]) -> String { + let mut out = String::new(); + for block in blocks { + match block { + ContentBlock::Text { text, .. } => { + append_router_text(&mut out, text); + } + ContentBlock::Thinking { thinking } => { + append_router_text(&mut out, thinking); + } + ContentBlock::ToolUse { name, .. } => { + append_router_text(&mut out, &format!("[tool call: {name}]")); + } + ContentBlock::ToolResult { content, .. } => { + append_router_text(&mut out, &format!("[tool result] {content}")); + } + _ => {} + } + } + out +} + +fn append_router_text(out: &mut String, text: &str) { + if !out.is_empty() { + out.push('\n'); + } + out.push_str(text); +} + +fn truncate_for_auto_router(text: &str, max_chars: usize) -> String { + let mut chars = text.chars(); + let truncated: String = chars.by_ref().take(max_chars).collect(); + if chars.next().is_some() { + format!("{truncated}...") + } else { + truncated + } +} + async fn apply_model_and_compaction_update( engine_handle: &EngineHandle, compaction: crate::compaction::CompactionConfig, @@ -3423,11 +3564,15 @@ async fn apply_model_picker_choice( app: &mut App, engine_handle: &EngineHandle, model: String, - effort: crate::tui::app::ReasoningEffort, + mut effort: crate::tui::app::ReasoningEffort, previous_model: String, previous_effort: crate::tui::app::ReasoningEffort, ) { - let model_changed = model != previous_model; + let model_is_auto = model.trim().eq_ignore_ascii_case("auto"); + if model_is_auto { + effort = ReasoningEffort::Auto; + } + let model_changed = model != previous_model || app.auto_model != model_is_auto; let effort_changed = effort != previous_effort; if !model_changed && !effort_changed { app.status_message = Some(format!( @@ -3438,6 +3583,8 @@ async fn apply_model_picker_choice( } if model_changed { + app.auto_model = model_is_auto; + app.last_effective_model = None; app.model = model.clone(); app.update_model_compaction_budget(); app.session.last_prompt_tokens = None; @@ -3448,6 +3595,7 @@ async fn apply_model_picker_choice( } if effort_changed { app.reasoning_effort = effort; + app.last_effective_reasoning_effort = None; } // Best-effort persist; surface a status warning if the settings file @@ -3474,20 +3622,27 @@ async fn apply_model_picker_choice( apply_model_and_compaction_update(engine_handle, app.compaction_config()).await; } + let model_summary = if model_is_auto { + "auto (per-turn model)".to_string() + } else { + model.clone() + }; + let previous_effort_summary = previous_effort.short_label(); + let effort_summary = if effort == ReasoningEffort::Auto { + "auto (per-turn thinking)".to_string() + } else { + effort.short_label().to_string() + }; + let mut summary = match (model_changed, effort_changed) { (true, true) => format!( - "Model: {previous_model} → {model} · thinking: {} → {}", - previous_effort.short_label(), - effort.short_label() - ), - (true, false) => format!( - "Model: {previous_model} → {model} · thinking {}", - effort.short_label() + "Model: {previous_model} → {model_summary} · thinking: {previous_effort_summary} → {effort_summary}" ), + (true, false) => { + format!("Model: {previous_model} → {model_summary} · thinking {effort_summary}") + } (false, true) => format!( - "Thinking: {} → {} · model {model}", - previous_effort.short_label(), - effort.short_label() + "Thinking: {previous_effort_summary} → {effort_summary} · model {model_summary}" ), (false, false) => unreachable!(), }; @@ -3896,7 +4051,7 @@ async fn apply_command_result( } AppAction::SendMessage(content) => { let queued = build_queued_message(app, content); - submit_or_steer_message(app, engine_handle, queued).await?; + submit_or_steer_message(app, config, engine_handle, queued).await?; } AppAction::Rlm { prompt, @@ -4417,11 +4572,14 @@ async fn queue_follow_up(app: &mut App, message: QueuedMessage) -> Result<()> { async fn submit_or_steer_message( app: &mut App, + config: &Config, engine_handle: &EngineHandle, message: QueuedMessage, ) -> Result<()> { match app.decide_submit_disposition() { - SubmitDisposition::Immediate => dispatch_user_message(app, engine_handle, message).await, + SubmitDisposition::Immediate => { + dispatch_user_message(app, config, engine_handle, message).await + } SubmitDisposition::Queue => { let count = app.queued_message_count().saturating_add(1); app.queue_message(message); @@ -4523,6 +4681,7 @@ fn parse_plan_choice(input: &str) -> Option { async fn apply_plan_choice( app: &mut App, + config: &Config, engine_handle: &EngineHandle, choice: PlanChoice, ) -> Result<()> { @@ -4539,7 +4698,7 @@ async fn apply_plan_choice( app.status_message = Some("Queued accepted plan execution (agent mode).".to_string()); } else { - dispatch_user_message(app, engine_handle, followup).await?; + dispatch_user_message(app, config, engine_handle, followup).await?; } } PlanChoice::AcceptYolo => { @@ -4554,7 +4713,7 @@ async fn apply_plan_choice( app.status_message = Some("Queued accepted plan execution (YOLO mode).".to_string()); } else { - dispatch_user_message(app, engine_handle, followup).await?; + dispatch_user_message(app, config, engine_handle, followup).await?; } } PlanChoice::RevisePlan => { @@ -4576,6 +4735,7 @@ async fn apply_plan_choice( async fn handle_plan_choice( app: &mut App, + config: &Config, engine_handle: &EngineHandle, input: &str, ) -> Result { @@ -4590,7 +4750,7 @@ async fn handle_plan_choice( return Ok(false); }; - apply_plan_choice(app, engine_handle, choice).await?; + apply_plan_choice(app, config, engine_handle, choice).await?; Ok(true) } @@ -4714,7 +4874,8 @@ fn render(f: &mut Frame, app: &mut App) { .and_then(|value| value.to_str()) .filter(|value| !value.is_empty()) .unwrap_or("workspace"); - let effort_label = app.reasoning_effort.short_label(); + let model_label = app.model_display_label(); + let effort_label = app.reasoning_effort_display_label(); let provider_label = match app.api_provider { crate::config::ApiProvider::Deepseek => None, crate::config::ApiProvider::DeepseekCN => None, @@ -4726,7 +4887,7 @@ fn render(f: &mut Frame, app: &mut App) { }; let header_data = HeaderData::new( app.mode, - &app.model, + &model_label, workspace_name, app.is_loading, app.ui_theme.header_bg, @@ -4737,7 +4898,7 @@ fn render(f: &mut Frame, app: &mut App) { app.session.session_cost, sanitized_prompt_tokens, ) - .with_reasoning_effort(Some(effort_label)) + .with_reasoning_effort(Some(&effort_label)) .with_provider(provider_label); let header_widget = HeaderWidget::new(header_data); let buf = f.buffer_mut(); @@ -5016,7 +5177,8 @@ async fn handle_view_events( if app.plan_prompt_pending { app.plan_prompt_pending = false; if let Some(choice) = plan_choice_from_option(option) - && let Err(err) = apply_plan_choice(app, engine_handle, choice).await + && let Err(err) = + apply_plan_choice(app, config, engine_handle, choice).await { app.status_message = Some(format!("Failed to apply plan selection: {err}")); } @@ -6496,7 +6658,7 @@ fn estimated_context_tokens(app: &App) -> Option { } fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> { - let max = context_window_for_model(&app.model)?; + let max = context_window_for_model(app.effective_model_for_budget())?; let max_i64 = i64::from(max); let reported = app .session diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 9fc60536..9792d4c7 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -699,10 +699,12 @@ async fn model_change_update_syncs_engine_model_before_compaction() { async fn dispatch_user_message_failed_send_clears_loading_state() { let mut app = create_test_app(); let engine = mock_engine_handle(); + let config = Config::default(); drop(engine.rx_op); let result = dispatch_user_message( &mut app, + &config, &engine.handle, QueuedMessage::new("hello".to_string(), None), ) @@ -1469,8 +1471,9 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() { app.offline_mode = true; let engine = crate::core::engine::mock_engine_handle(); + let config = Config::default(); - let handled = handle_plan_choice(&mut app, &engine.handle, "yolo") + let handled = handle_plan_choice(&mut app, &config, &engine.handle, "yolo") .await .expect("plan choice"); @@ -1479,7 +1482,7 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() { assert_eq!(app.mode, AppMode::Plan); let queued = build_queued_message(&mut app, "yolo".to_string()); - submit_or_steer_message(&mut app, &engine.handle, queued) + submit_or_steer_message(&mut app, &config, &engine.handle, queued) .await .expect("submit normal message"); @@ -1504,8 +1507,9 @@ async fn numeric_plan_choice_still_queues_follow_up_when_busy() { app.is_loading = true; let engine = crate::core::engine::mock_engine_handle(); + let config = Config::default(); - let handled = handle_plan_choice(&mut app, &engine.handle, "2") + let handled = handle_plan_choice(&mut app, &config, &engine.handle, "2") .await .expect("plan choice"); diff --git a/crates/tui/src/tui/widgets/footer.rs b/crates/tui/src/tui/widgets/footer.rs index 77224088..042dde6c 100644 --- a/crates/tui/src/tui/widgets/footer.rs +++ b/crates/tui/src/tui/widgets/footer.rs @@ -275,7 +275,7 @@ impl FooterProps { // to cross the 60s threshold inside `footer_worked_chip`. let worked = footer_worked_chip(app.cumulative_turn_duration); Self { - model: app.model.clone(), + model: app.model_display_label(), mode_label, mode_color, text_dim_color: app.ui_theme.text_dim,