fix: restore auto model routing (#772)

Keep auto as a local routing mode, resolve concrete model/thinking before API requests, and wire auto routing through CLI, TUI, runtime threads, and subagents.
This commit is contained in:
Hunter Bown
2026-05-05 21:22:03 -05:00
committed by GitHub
parent 63e2234c6b
commit 50780a5289
18 changed files with 1120 additions and 127 deletions
+317 -11
View File
@@ -1,14 +1,19 @@
//! Config commands: config, settings, mode switches, trust, logout
use std::path::{Path, PathBuf};
use std::time::Duration;
use super::CommandResult;
use crate::client::DeepSeekClient;
use crate::config::{COMMON_DEEPSEEK_MODELS, clear_api_key, normalize_model_name};
use crate::config_ui::{ConfigUiMode, parse_mode};
use crate::llm_client::LlmClient;
use crate::localization::resolve_locale;
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
use crate::settings::Settings;
use crate::tui::app::{App, AppAction, AppMode, OnboardingState, SidebarFocus};
use crate::tui::app::{App, AppAction, AppMode, OnboardingState, ReasoningEffort, SidebarFocus};
use crate::tui::approval::ApprovalMode;
use anyhow::Result;
/// Open the interactive config editor.
///
@@ -91,7 +96,13 @@ fn show_single_setting(app: &App, key: &str) -> CommandResult {
let value = match key.as_str() {
"model" => {
if app.auto_model {
Some("auto (auto-select by request complexity)".to_string())
let mut label = "auto (auto-select model per turn)".to_string();
if let Some(effective) = app.last_effective_model.as_deref()
&& effective != "auto"
{
label.push_str(&format!("; last: {effective}"));
}
Some(label)
} else {
Some(app.model.clone())
}
@@ -247,16 +258,20 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
if value.trim().eq_ignore_ascii_case("auto") {
app.auto_model = true;
app.model = "auto".to_string();
app.last_effective_model = None;
app.reasoning_effort = ReasoningEffort::Auto;
app.last_effective_reasoning_effort = None;
app.update_model_compaction_budget();
app.session.last_prompt_tokens = None;
app.session.last_completion_tokens = None;
return CommandResult::with_message_and_action(
"model = auto (auto-select by request complexity)".to_string(),
"model = auto (auto-select model and thinking per turn)".to_string(),
AppAction::UpdateCompaction(app.compaction_config()),
);
}
// Clear auto mode when a specific model is set
app.auto_model = false;
app.last_effective_model = None;
let Some(model) = normalize_model_name(value) else {
return CommandResult::error(format!(
"Invalid model '{value}'. Expected a DeepSeek model ID. Common models: {}",
@@ -385,7 +400,13 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
}
"default_model" => {
if let Some(ref model) = settings.default_model {
app.auto_model = model.trim().eq_ignore_ascii_case("auto");
app.model.clone_from(model);
app.last_effective_model = None;
if app.auto_model {
app.reasoning_effort = ReasoningEffort::Auto;
app.last_effective_reasoning_effort = None;
}
app.update_model_compaction_budget();
app.session.last_prompt_tokens = None;
app.session.last_completion_tokens = None;
@@ -604,14 +625,6 @@ fn expand_tilde(raw: &str) -> String {
/// Default → Flash (cost savings).
pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
let len = input.chars().count();
// Short messages → Flash
if len < 100 {
return "deepseek-v4-flash".to_string();
}
// Long complex requests → Pro
if len > 500 {
return "deepseek-v4-pro".to_string();
}
let lower = input.to_lowercase();
let complex_keywords = [
"refactor",
@@ -630,10 +643,249 @@ pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
if complex_keywords.iter().any(|kw| lower.contains(kw)) {
return "deepseek-v4-pro".to_string();
}
// Short messages → Flash
if len < 100 {
return "deepseek-v4-flash".to_string();
}
// Long complex requests → Pro
if len > 500 {
return "deepseek-v4-pro".to_string();
}
// Default to Flash for cost savings
"deepseek-v4-flash".to_string()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AutoRouteRecommendation {
pub model: String,
pub reasoning_effort: Option<ReasoningEffort>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AutoRouteSource {
FlashRouter,
Heuristic,
}
impl AutoRouteSource {
#[must_use]
pub fn label(self) -> &'static str {
match self {
AutoRouteSource::FlashRouter => "flash-router",
AutoRouteSource::Heuristic => "heuristic",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AutoRouteSelection {
pub model: String,
pub reasoning_effort: Option<ReasoningEffort>,
pub source: AutoRouteSource,
}
pub const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
You are the DeepSeek TUI auto-routing classifier. Return only compact JSON: \
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
/// Parse the Flash router's JSON-only response.
///
/// The runtime treats classifier output as untrusted: only known V4 model IDs
/// and supported reasoning tiers are accepted. Anything else falls back to the
/// deterministic heuristic.
pub fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
let json = extract_first_json_object(raw)?;
let value: serde_json::Value = serde_json::from_str(json).ok()?;
let model = value.get("model").and_then(serde_json::Value::as_str)?;
let model = normalize_auto_route_model(model)?;
let reasoning_effort = value
.get("thinking")
.or_else(|| value.get("reasoning_effort"))
.or_else(|| value.get("effort"))
.and_then(serde_json::Value::as_str)
.and_then(parse_auto_route_reasoning_effort);
Some(AutoRouteRecommendation {
model: model.to_string(),
reasoning_effort,
})
}
fn extract_first_json_object(raw: &str) -> Option<&str> {
let start = raw.find('{')?;
let end = raw.rfind('}')?;
(end >= start).then_some(&raw[start..=end])
}
fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
match model.trim().to_ascii_lowercase().as_str() {
"deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
"deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
_ => None,
}
}
fn parse_auto_route_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
match effort.trim().to_ascii_lowercase().as_str() {
"off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off),
"low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High),
"high" => Some(ReasoningEffort::High),
"max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max),
_ => None,
}
}
#[must_use]
pub fn normalize_auto_route_effort(effort: ReasoningEffort) -> ReasoningEffort {
match effort {
ReasoningEffort::Low | ReasoningEffort::Medium => ReasoningEffort::High,
other => other,
}
}
pub async fn resolve_auto_route_with_flash(
config: &crate::config::Config,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> AutoRouteSelection {
match auto_route_flash_recommendation(
config,
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
)
.await
{
Ok(Some(recommendation)) => AutoRouteSelection {
model: recommendation.model,
reasoning_effort: recommendation.reasoning_effort,
source: AutoRouteSource::FlashRouter,
},
Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
}
}
fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
AutoRouteSelection {
model: auto_model_heuristic(latest_request, selected_model_mode),
reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
false,
latest_request,
))),
source: AutoRouteSource::Heuristic,
}
}
async fn auto_route_flash_recommendation(
config: &crate::config::Config,
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> Result<Option<AutoRouteRecommendation>> {
if cfg!(test) {
return Ok(None);
}
let client = DeepSeekClient::new(config)?;
let request = MessageRequest {
model: "deepseek-v4-flash".to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: auto_route_prompt(
latest_request,
recent_context,
selected_model_mode,
selected_thinking_mode,
),
cache_control: None,
}],
}],
max_tokens: 96,
system: Some(SystemPrompt::Text(
AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("off".to_string()),
stream: Some(false),
temperature: Some(0.0),
top_p: None,
};
let response =
tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
Ok(parse_auto_route_recommendation(&message_response_text(
&response,
)))
}
fn auto_route_prompt(
latest_request: &str,
recent_context: &str,
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> String {
format!(
"Session mode: agent\nSelected model mode: {}\nSelected thinking mode: {}\n\nRecent context:\n{}\n\nLatest user request:\n{}\n\nReturn JSON only.",
selected_model_mode,
selected_thinking_mode,
if recent_context.trim().is_empty() {
"No prior context."
} else {
recent_context
},
truncate_for_auto_router(latest_request, 4_000)
)
}
fn message_response_text(response: &MessageResponse) -> String {
let mut out = String::new();
for block in &response.content {
match block {
ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
append_router_text(&mut out, text);
}
ContentBlock::Thinking { thinking } => {
append_router_text(&mut out, thinking);
}
ContentBlock::ToolUse { name, .. } => {
append_router_text(&mut out, &format!("[tool call: {name}]"));
}
_ => {}
}
}
out
}
fn append_router_text(out: &mut String, text: &str) {
if !out.is_empty() {
out.push('\n');
}
out.push_str(text);
}
fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
let mut chars = text.chars();
let truncated: String = chars.by_ref().take(max_chars).collect();
if chars.next().is_some() {
format!("{truncated}...")
} else {
truncated
}
}
/// Toggle LSP diagnostics on/off or show status.
///
/// - `/lsp on` — enable inline LSP diagnostics
@@ -864,6 +1116,21 @@ mod tests {
));
}
#[test]
fn test_set_model_auto_enables_auto_thinking() {
let mut app = create_test_app();
app.reasoning_effort = ReasoningEffort::Off;
let result = set_config(&mut app, Some("model auto"));
assert!(result.message.is_some());
assert!(app.auto_model);
assert_eq!(app.model, "auto");
assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
assert!(app.last_effective_model.is_none());
assert!(app.last_effective_reasoning_effort.is_none());
}
#[test]
fn test_set_model_accepts_future_deepseek_model_id() {
let mut app = create_test_app();
@@ -883,6 +1150,45 @@ mod tests {
assert_eq!(app.model, "deepseek-v4-flash");
}
#[test]
fn auto_route_recommendation_parses_strict_json() {
let rec =
parse_auto_route_recommendation(r#"{"model":"deepseek-v4-pro","thinking":"max"}"#)
.expect("valid router response should parse");
assert_eq!(rec.model, "deepseek-v4-pro");
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max));
}
#[test]
fn auto_route_recommendation_accepts_wrapped_json_aliases() {
let rec =
parse_auto_route_recommendation(r#"route: {"model":"flash","reasoning_effort":"off"}"#)
.expect("wrapped router response should parse");
assert_eq!(rec.model, "deepseek-v4-flash");
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Off));
}
#[test]
fn auto_route_recommendation_normalizes_legacy_low_medium_to_high() {
let rec = parse_auto_route_recommendation(
r#"{"model":"deepseek-v4-pro","reasoning_effort":"medium"}"#,
)
.expect("medium should parse for back-compat");
assert_eq!(rec.model, "deepseek-v4-pro");
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::High));
}
#[test]
fn auto_route_recommendation_rejects_unknown_model() {
assert!(
parse_auto_route_recommendation(r#"{"model":"some-other-model","thinking":"max"}"#,)
.is_none()
);
}
#[test]
fn test_set_default_mode_normal_save_reports_normalized_value() {
let _lock = lock_test_env();
+37 -3
View File
@@ -4,7 +4,7 @@ use std::fmt::Write;
use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name};
use crate::localization::{MessageId, tr};
use crate::tui::app::{App, AppAction, AppMode};
use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort};
use crate::tui::views::{HelpView, ModalKind, SubAgentsView};
use super::CommandResult;
@@ -91,14 +91,33 @@ pub fn exit() -> CommandResult {
/// way to flip both knobs without memorising the docs.
pub fn model(app: &mut App, model_name: Option<&str>) -> CommandResult {
if let Some(name) = model_name {
if name.trim().eq_ignore_ascii_case("auto") {
let old_model = app.model_display_label();
app.auto_model = true;
app.model = "auto".to_string();
app.last_effective_model = None;
app.reasoning_effort = ReasoningEffort::Auto;
app.last_effective_reasoning_effort = None;
app.update_model_compaction_budget();
app.session.last_prompt_tokens = None;
app.session.last_completion_tokens = None;
return CommandResult::with_message_and_action(
tr(app.ui_locale, MessageId::ModelChanged)
.replace("{old}", &old_model)
.replace("{new}", "auto"),
AppAction::UpdateCompaction(app.compaction_config()),
);
}
let Some(model_id) = normalize_model_name(name) else {
return CommandResult::error(format!(
"Invalid model '{name}'. Expected a DeepSeek model ID. Common models: {}",
"Invalid model '{name}'. Expected auto or a DeepSeek model ID. Common models: {}",
COMMON_DEEPSEEK_MODELS.join(", ")
));
};
let old_model = app.model.clone();
let old_model = app.model_display_label();
app.auto_model = false;
app.model = model_id.clone();
app.last_effective_model = None;
app.update_model_compaction_budget();
app.session.last_prompt_tokens = None;
app.session.last_completion_tokens = None;
@@ -427,6 +446,21 @@ mod tests {
assert_eq!(app.session.last_completion_tokens, None);
}
#[test]
fn test_model_auto_enables_auto_thinking() {
let mut app = create_test_app();
app.reasoning_effort = ReasoningEffort::Off;
let result = model(&mut app, Some("auto"));
assert!(result.message.is_some());
assert!(app.auto_model);
assert_eq!(app.model, "auto");
assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
assert!(app.last_effective_model.is_none());
assert!(app.last_effective_reasoning_effort.is_none());
}
#[test]
fn test_model_change_accepts_future_deepseek_model() {
let mut app = create_test_app();
+5
View File
@@ -614,6 +614,11 @@ pub fn auto_model_heuristic(input: &str, current_model: &str) -> String {
config::auto_model_heuristic(input, current_model)
}
pub use config::{
AutoRouteRecommendation, AutoRouteSelection, normalize_auto_route_effort,
parse_auto_route_recommendation, resolve_auto_route_with_flash,
};
/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
/// Zhang et al. (arXiv:2512.24601).
///
+18 -1
View File
@@ -977,10 +977,11 @@ impl Config {
}
}
if let Some(model) = self.default_text_model.as_deref()
&& !model.trim().eq_ignore_ascii_case("auto")
&& normalize_model_name(model).is_none()
{
anyhow::bail!(
"Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
"Invalid default_text_model '{model}': expected auto or a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
);
}
if let Some(policy) = self.approval_policy.as_deref() {
@@ -1095,6 +1096,11 @@ impl Config {
{
return normalized;
}
if let Some(model) = self.default_text_model.as_deref()
&& model.trim().eq_ignore_ascii_case("auto")
{
return "auto".to_string();
}
if let Some(model) = self.default_text_model.as_deref()
&& let Some(normalized) = normalize_model_name(model)
{
@@ -3420,6 +3426,17 @@ api_key = "old-openrouter-key"
Ok(())
}
#[test]
fn validate_accepts_auto_default_text_model() -> Result<()> {
let config = Config {
default_text_model: Some("auto".to_string()),
..Default::default()
};
config.validate()?;
assert_eq!(config.default_model(), "auto");
Ok(())
}
#[test]
fn deepseek_model_env_overrides_default_text_model() -> Result<()> {
let _lock = lock_test_env();
+9 -2
View File
@@ -539,7 +539,9 @@ pub fn open_browser(url: &str) -> Result<()> {
}
fn validate_document(doc: &ConfigUiDocument) -> Result<()> {
if normalize_model_name(&doc.runtime.model).is_none() {
if !doc.runtime.model.trim().eq_ignore_ascii_case("auto")
&& normalize_model_name(&doc.runtime.model).is_none()
{
bail!("invalid model '{}'", doc.runtime.model);
}
if doc.config.mcp_config_path.trim().is_empty() {
@@ -557,6 +559,7 @@ fn reload_runtime_config(app: &mut App, config: &mut Config) -> Result<()> {
.reasoning_effort()
.unwrap_or_else(|| app.reasoning_effort.as_setting()),
);
app.last_effective_reasoning_effort = None;
app.update_model_compaction_budget();
app.mcp_config_path = reloaded.mcp_config_path();
app.skills_dir = reloaded.skills_dir();
@@ -584,6 +587,7 @@ fn apply_reasoning_effort(
) -> Result<()> {
let effort: ReasoningEffort = value.into();
app.reasoning_effort = effort;
app.last_effective_reasoning_effort = None;
app.update_model_compaction_budget();
if persist {
commands::persist_root_string_key("reasoning_effort", effort.as_setting())?;
@@ -844,7 +848,10 @@ mod tests {
#[test]
fn build_document_reflects_app_state() {
let app = app();
let mut app = app();
app.auto_model = false;
app.model = "deepseek-v4-pro".to_string();
app.reasoning_effort = ReasoningEffort::Max;
let config = Config::default();
let doc = build_document(&app, &config).expect("document");
assert_eq!(doc.runtime.model, app.model);
+28 -1
View File
@@ -48,6 +48,7 @@ use crate::tools::spec::RuntimeToolServices;
use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult};
use crate::tools::subagent::{
Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager,
resolve_subagent_assignment_route,
};
use crate::tools::todo::{SharedTodoList, new_shared_todo_list};
use crate::tools::user_input::{UserInputRequest, UserInputResponse};
@@ -515,6 +516,8 @@ impl Engine {
model,
goal_objective,
reasoning_effort,
reasoning_effort_auto,
auto_model,
allow_shell,
trust_mode,
auto_approve,
@@ -525,6 +528,8 @@ impl Engine {
model,
goal_objective,
reasoning_effort,
reasoning_effort_auto,
auto_model,
allow_shell,
trust_mode,
auto_approve,
@@ -564,7 +569,7 @@ impl Engine {
continue;
};
let runtime = SubAgentRuntime::new(
let mut runtime = SubAgentRuntime::new(
client,
self.session.model.clone(),
// Sub-agents don't inherit YOLO mode - use Agent mode defaults
@@ -574,8 +579,17 @@ impl Engine {
Arc::clone(&self.subagent_manager),
)
.with_role_models(self.config.subagent_model_overrides.clone())
.with_auto_model(self.session.auto_model)
.with_reasoning_effort(
self.session.reasoning_effort.clone(),
self.session.reasoning_effort_auto,
)
.with_max_spawn_depth(self.config.max_spawn_depth)
.background_runtime();
let route = resolve_subagent_assignment_route(&runtime, None, &prompt).await;
runtime.model = route.model;
runtime.reasoning_effort = route.reasoning_effort;
runtime.reasoning_effort_auto = false;
let result = {
let mut manager = self.subagent_manager.write().await;
@@ -623,6 +637,7 @@ impl Engine {
.await;
}
Op::SetModel { model } => {
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
self.session.model = model;
self.config.model.clone_from(&self.session.model);
let _ = self
@@ -654,6 +669,7 @@ impl Engine {
self.session.compaction_summary_prompt =
extract_compaction_summary_prompt(system_prompt.clone());
self.session.system_prompt = system_prompt;
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
self.session.model = model;
self.session.workspace = workspace.clone();
self.config.model.clone_from(&self.session.model);
@@ -709,6 +725,8 @@ impl Engine {
self.session.model.clone(),
self.config.goal_objective.clone(),
self.session.reasoning_effort.clone(),
self.session.reasoning_effort_auto,
self.session.auto_model,
self.session.allow_shell,
self.session.trust_mode,
self.session.auto_approve,
@@ -758,6 +776,8 @@ impl Engine {
model: String,
goal_objective: Option<String>,
reasoning_effort: Option<String>,
reasoning_effort_auto: bool,
auto_model: bool,
allow_shell: bool,
trust_mode: bool,
auto_approve: bool,
@@ -838,6 +858,8 @@ impl Engine {
self.config.model.clone_from(&self.session.model);
self.config.goal_objective = goal_objective;
self.session.reasoning_effort = reasoning_effort;
self.session.reasoning_effort_auto = reasoning_effort_auto;
self.session.auto_model = auto_model;
self.session.allow_shell = allow_shell;
self.config.allow_shell = allow_shell;
self.session.trust_mode = trust_mode;
@@ -900,6 +922,11 @@ impl Engine {
Arc::clone(&self.subagent_manager),
)
.with_role_models(self.config.subagent_model_overrides.clone())
.with_auto_model(self.session.auto_model)
.with_reasoning_effort(
self.session.reasoning_effort.clone(),
self.session.reasoning_effort_auto,
)
.with_max_spawn_depth(self.config.max_spawn_depth);
if let Some((mailbox, cancel_token)) = mailbox_for_runtime.as_ref() {
rt = rt
+5 -24
View File
@@ -20,6 +20,11 @@ pub enum Op {
/// Reasoning-effort tier: `"off" | "low" | "medium" | "high" | "max"`.
/// `None` lets the provider apply its default.
reasoning_effort: Option<String>,
/// True when the user selected auto thinking, even though the UI sends
/// a concrete per-turn value to the model API.
reasoning_effort_auto: bool,
/// True when the user selected auto model routing.
auto_model: bool,
allow_shell: bool,
trust_mode: bool,
auto_approve: bool,
@@ -89,27 +94,3 @@ pub enum Op {
/// Shutdown the engine
Shutdown,
}
impl Op {
/// Create a send message operation
pub fn send(
content: impl Into<String>,
mode: AppMode,
model: impl Into<String>,
reasoning_effort: Option<String>,
allow_shell: bool,
trust_mode: bool,
auto_approve: bool,
) -> Self {
Op::SendMessage {
content: content.into(),
mode,
model: model.into(),
goal_objective: None,
reasoning_effort,
allow_shell,
trust_mode,
auto_approve,
}
}
}
+7
View File
@@ -19,6 +19,11 @@ pub struct Session {
/// `"off" | "low" | "medium" | "high" | "max"`. `None` lets the provider
/// apply its own defaults.
pub reasoning_effort: Option<String>,
/// Whether the user selected automatic reasoning effort.
pub reasoning_effort_auto: bool,
/// Whether the user selected automatic model routing.
pub auto_model: bool,
/// Workspace directory
pub workspace: PathBuf,
@@ -118,6 +123,8 @@ impl Session {
Self {
model,
reasoning_effort: None,
reasoning_effort_auto: false,
auto_model: false,
workspace,
system_prompt: None,
compaction_summary_prompt: None,
+63 -15
View File
@@ -2504,6 +2504,11 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> {
.model
.or_else(|| config.default_text_model.clone())
.unwrap_or_else(|| config.default_model());
let route = resolve_cli_auto_route(config, &model, &diff).await;
let model = route.model;
let reasoning_effort = route
.reasoning_effort
.map(|effort| effort.as_setting().to_string());
let system = SystemPrompt::Text(
"You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \
@@ -2529,7 +2534,7 @@ Provide findings ordered by severity with file references, then open questions,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
reasoning_effort,
stream: Some(false),
temperature: Some(0.2),
top_p: Some(0.9),
@@ -3620,14 +3625,42 @@ async fn run_interactive(
.await
}
struct CliAutoRoute {
model: String,
reasoning_effort: Option<crate::tui::app::ReasoningEffort>,
auto_model: bool,
}
async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute {
if model.trim().eq_ignore_ascii_case("auto") {
let selection =
commands::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await;
CliAutoRoute {
model: selection.model,
reasoning_effort: selection.reasoning_effort,
auto_model: true,
}
} else {
CliAutoRoute {
model: model.to_string(),
reasoning_effort: None,
auto_model: false,
}
}
}
async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> {
use crate::client::DeepSeekClient;
use crate::models::{ContentBlock, Message, MessageRequest};
let client = DeepSeekClient::new(config)?;
let route = resolve_cli_auto_route(config, model, prompt).await;
let reasoning_effort = route
.reasoning_effort
.map(|effort| effort.as_setting().to_string());
let request = MessageRequest {
model: model.to_string(),
model: route.model,
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
@@ -3641,7 +3674,7 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()>
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
reasoning_effort,
stream: Some(false),
temperature: None,
top_p: None,
@@ -3663,8 +3696,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt};
let client = DeepSeekClient::new(config)?;
let route = resolve_cli_auto_route(config, model, prompt).await;
let model = route.model;
let reasoning_effort = route
.reasoning_effort
.map(|effort| effort.as_setting().to_string());
let request = MessageRequest {
model: model.to_string(),
model: model.clone(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
@@ -3680,7 +3718,7 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
reasoning_effort,
stream: Some(false),
temperature: Some(0.2),
top_p: Some(0.9),
@@ -3725,6 +3763,13 @@ async fn run_exec_agent(
use crate::tools::todo::new_shared_todo_list;
use crate::tui::app::AppMode;
let route = resolve_cli_auto_route(config, model, prompt).await;
let auto_model = route.auto_model;
let effective_model = route.model;
let effective_reasoning_effort = route
.reasoning_effort
.map(|effort| effort.as_setting().to_string());
// Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle
// architecture (issue #124) handles long-context resets via fresh contexts
// rather than progressive summarization. The compaction config is still
@@ -3732,8 +3777,8 @@ async fn run_exec_agent(
// or direct engine config keep their old behavior.
let compaction = CompactionConfig {
enabled: false,
model: model.to_string(),
token_threshold: compaction_threshold_for_model(model),
model: effective_model.clone(),
token_threshold: compaction_threshold_for_model(&effective_model),
..Default::default()
};
@@ -3747,7 +3792,7 @@ async fn run_exec_agent(
.map(crate::config::LspConfigToml::into_runtime);
let engine_config = EngineConfig {
model: model.to_string(),
model: effective_model.clone(),
workspace: workspace.clone(),
allow_shell: auto_approve || config.allow_shell(),
trust_mode,
@@ -3784,15 +3829,18 @@ async fn run_exec_agent(
};
engine_handle
.send(Op::send(
prompt,
.send(Op::SendMessage {
content: prompt.to_string(),
mode,
model,
None,
auto_approve || config.allow_shell(),
model: effective_model.clone(),
goal_objective: None,
reasoning_effort: effective_reasoning_effort,
reasoning_effort_auto: auto_model,
auto_model,
allow_shell: auto_approve || config.allow_shell(),
trust_mode,
auto_approve,
))
})
.await?;
#[derive(serde::Serialize)]
@@ -3813,7 +3861,7 @@ async fn run_exec_agent(
}
let mut summary = ExecSummary {
mode: "agent".to_string(),
model: model.to_string(),
model: effective_model,
prompt: prompt.to_string(),
..ExecSummary::default()
};
+28 -6
View File
@@ -1457,21 +1457,43 @@ impl RuntimeThreadManager {
}
let mode = parse_mode(req.mode.as_deref().unwrap_or(&thread.mode));
let model = req.model.unwrap_or_else(|| thread.model.clone());
let requested_model = req.model.unwrap_or_else(|| thread.model.clone());
let auto_model = requested_model.trim().eq_ignore_ascii_case("auto");
let (model, reasoning_effort) = if auto_model {
let selection = crate::commands::resolve_auto_route_with_flash(
&self.config,
&prompt,
"",
"auto",
"auto",
)
.await;
(
selection.model,
selection
.reasoning_effort
.map(|effort| effort.as_setting().to_string()),
)
} else {
(requested_model, None)
};
let allow_shell = req.allow_shell.unwrap_or(thread.allow_shell);
let trust_mode = req.trust_mode.unwrap_or(thread.trust_mode);
let auto_approve = req.auto_approve.unwrap_or(thread.auto_approve);
engine
.send(Op::send(
prompt,
.send(Op::SendMessage {
content: prompt,
mode,
model.clone(),
None,
model: model.clone(),
goal_objective: None,
reasoning_effort,
reasoning_effort_auto: auto_model,
auto_model,
allow_shell,
trust_mode,
auto_approve,
))
})
.await
.map_err(|e| anyhow!("Failed to start turn: {e}"))?;
+13 -4
View File
@@ -284,7 +284,7 @@ impl Settings {
s.locale = normalize_configured_locale(&s.locale)
.unwrap_or("en")
.to_string();
s.default_model = s.default_model.as_deref().and_then(normalize_model_name);
s.default_model = s.default_model.as_deref().and_then(normalize_default_model);
s
};
settings.apply_env_overrides();
@@ -444,9 +444,9 @@ impl Settings {
return Ok(());
}
let Some(model) = normalize_model_name(trimmed) else {
let Some(model) = normalize_default_model(trimmed) else {
anyhow::bail!(
"Failed to update setting: invalid model '{value}'. Expected: a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
"Failed to update setting: invalid model '{value}'. Expected: auto, a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
);
};
self.default_model = Some(model);
@@ -549,12 +549,21 @@ impl Settings {
("max_history", "Max input history entries"),
(
"default_model",
"Default model: any DeepSeek model ID (e.g. deepseek-v4-pro)",
"Default model: auto or any DeepSeek model ID (e.g. deepseek-v4-pro)",
),
]
}
}
fn normalize_default_model(value: &str) -> Option<String> {
let trimmed = value.trim();
if trimmed.eq_ignore_ascii_case("auto") {
Some("auto".to_string())
} else {
normalize_model_name(trimmed)
}
}
/// Parse a boolean value from various formats
fn parse_bool(value: &str) -> Result<bool> {
match value.to_lowercase().as_str() {
+203 -6
View File
@@ -557,6 +557,9 @@ pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
pub struct SubAgentRuntime {
pub client: DeepSeekClient,
pub model: String,
pub auto_model: bool,
pub reasoning_effort: Option<String>,
pub reasoning_effort_auto: bool,
pub role_models: HashMap<String, String>,
pub context: ToolContext,
pub allow_shell: bool,
@@ -597,6 +600,9 @@ impl SubAgentRuntime {
Self {
client,
model,
auto_model: false,
reasoning_effort: None,
reasoning_effort_auto: false,
role_models: HashMap::new(),
context,
allow_shell,
@@ -646,6 +652,27 @@ impl SubAgentRuntime {
self
}
/// Preserve whether the parent session is using per-turn model routing.
#[must_use]
pub fn with_auto_model(mut self, auto_model: bool) -> Self {
self.auto_model = auto_model;
self
}
/// Preserve the parent's thinking configuration. `reasoning_effort_auto`
/// stays true even when the parent turn itself was sent with a concrete
/// flash-router recommendation, so children can resolve their own tier.
#[must_use]
pub fn with_reasoning_effort(
mut self,
reasoning_effort: Option<String>,
reasoning_effort_auto: bool,
) -> Self {
self.reasoning_effort = reasoning_effort;
self.reasoning_effort_auto = reasoning_effort_auto;
self
}
/// Return a child runtime that is deliberately detached from the parent
/// turn cancellation token. Background sub-agents should keep running when
/// the parent turn is cancelled; explicit agent cancellation still
@@ -675,6 +702,9 @@ impl SubAgentRuntime {
Self {
client: self.client.clone(),
model: self.model.clone(),
auto_model: self.auto_model,
reasoning_effort: self.reasoning_effort.clone(),
reasoning_effort_auto: self.reasoning_effort_auto,
role_models: self.role_models.clone(),
context: child_context,
allow_shell: self.allow_shell,
@@ -1621,16 +1651,14 @@ impl ToolSpec for AgentSpawnTool {
if let Some(cwd) = validated_cwd {
child_runtime.context.workspace = cwd;
}
let effective_model = match spawn_request.model.clone() {
Some(model) => model,
let configured_model = match spawn_request.model.clone() {
Some(model) => Some(model),
None => configured_model_for_role_or_type(
&self.runtime,
spawn_request.assignment.role.as_deref(),
&spawn_request.agent_type,
)?
.unwrap_or_else(|| self.runtime.model.clone()),
)?,
};
child_runtime.model = effective_model.clone();
// Cache-aware resident mode (#529): prepend file contents to the prompt
// so the child's prefix is byte-stable for DeepSeek prefix caching.
@@ -1666,6 +1694,14 @@ impl ToolSpec for AgentSpawnTool {
(spawn_request.prompt, None)
};
let route =
resolve_subagent_assignment_route(&self.runtime, configured_model, &effective_prompt)
.await;
child_runtime.model = route.model.clone();
child_runtime.reasoning_effort = route.reasoning_effort.clone();
child_runtime.reasoning_effort_auto = false;
let effective_model = route.model;
let mut manager = self.manager.write().await;
let result = manager
@@ -2743,7 +2779,7 @@ async fn run_subagent(
tool_choice: Some(json!({ "type": "auto" })),
metadata: None,
thinking: None,
reasoning_effort: None,
reasoning_effort: runtime.reasoning_effort.clone(),
stream: Some(false),
temperature: None,
top_p: None,
@@ -3272,6 +3308,167 @@ pub(crate) fn configured_model_for_role_or_type(
Ok(None)
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct SubAgentResolvedRoute {
pub(crate) model: String,
pub(crate) reasoning_effort: Option<String>,
}
pub(crate) async fn resolve_subagent_assignment_route(
runtime: &SubAgentRuntime,
configured_model: Option<String>,
prompt: &str,
) -> SubAgentResolvedRoute {
let explicit_model = configured_model.is_some();
let mut route = fallback_subagent_assignment_route(runtime, configured_model, prompt);
if (runtime.auto_model || runtime.reasoning_effort_auto)
&& let Ok(Some(recommendation)) = subagent_flash_router(runtime, prompt).await
{
if runtime.auto_model && !explicit_model {
route.model = recommendation.model;
}
if runtime.reasoning_effort_auto {
route.reasoning_effort = recommendation
.reasoning_effort
.map(|effort| effort.as_setting().to_string())
.or(route.reasoning_effort);
}
}
route
}
fn fallback_subagent_assignment_route(
runtime: &SubAgentRuntime,
configured_model: Option<String>,
prompt: &str,
) -> SubAgentResolvedRoute {
let model = if let Some(model) = configured_model {
model
} else if runtime.auto_model {
crate::commands::auto_model_heuristic(prompt, &runtime.model)
} else {
runtime.model.clone()
};
let reasoning_effort = if runtime.reasoning_effort_auto {
let effort = match crate::auto_reasoning::select(false, prompt) {
crate::tui::app::ReasoningEffort::Low | crate::tui::app::ReasoningEffort::Medium => {
crate::tui::app::ReasoningEffort::High
}
other => other,
};
Some(effort.as_setting().to_string())
} else {
runtime.reasoning_effort.clone()
};
SubAgentResolvedRoute {
model,
reasoning_effort,
}
}
async fn subagent_flash_router(
runtime: &SubAgentRuntime,
prompt: &str,
) -> Result<Option<crate::commands::AutoRouteRecommendation>> {
if cfg!(test) {
return Ok(None);
}
let request = MessageRequest {
model: "deepseek-v4-flash".to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: subagent_router_prompt(runtime, prompt),
cache_control: None,
}],
}],
max_tokens: 96,
system: Some(SystemPrompt::Text(
SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(),
)),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("off".to_string()),
stream: Some(false),
temperature: Some(0.0),
top_p: None,
};
let response = tokio::time::timeout(
Duration::from_secs(4),
runtime.client.create_message(request),
)
.await??;
Ok(crate::commands::parse_auto_route_recommendation(
&message_response_text(&response.content),
))
}
const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\
You are the DeepSeek TUI sub-agent routing manager. Return only compact JSON: \
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
Treat each child assignment like a customer request entering a team queue: decide the least \
sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \
important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \
Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \
high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \
off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \
multi-step, uncertain, or tool-heavy work.";
fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String {
format!(
"Parent selected model mode: {}\nParent selected thinking mode: {}\n\nSub-agent assignment:\n{}\n\nReturn JSON only.",
if runtime.auto_model { "auto" } else { "fixed" },
if runtime.reasoning_effort_auto {
"auto"
} else {
runtime
.reasoning_effort
.as_deref()
.unwrap_or("provider-default")
},
truncate_subagent_router_prompt(prompt, 4_000)
)
}
fn truncate_subagent_router_prompt(text: &str, max_chars: usize) -> String {
if text.chars().count() <= max_chars {
return text.to_string();
}
let mut out = text.chars().take(max_chars).collect::<String>();
out.push_str("\n[truncated]");
out
}
fn message_response_text(blocks: &[ContentBlock]) -> String {
let mut out = String::new();
for block in blocks {
match block {
ContentBlock::Text { text, .. } => {
if !out.is_empty() {
out.push('\n');
}
out.push_str(text);
}
ContentBlock::Thinking { thinking } => {
if !out.is_empty() {
out.push('\n');
}
out.push_str(thinking);
}
_ => {}
}
}
out
}
fn parse_optional_subagent_model(input: &Value, key: &str) -> Result<Option<String>, ToolError> {
match input.get(key) {
None | Some(Value::Null) => Ok(None),
+59
View File
@@ -398,6 +398,62 @@ fn test_build_assignment_prompt_includes_metadata() {
assert!(prompt.contains("role: explorer"));
}
#[test]
fn subagent_auto_model_routes_unconfigured_assignments() {
let runtime = stub_runtime().with_auto_model(true);
assert_eq!(
fallback_subagent_assignment_route(&runtime, None, "implement the release fix").model,
"deepseek-v4-pro"
);
assert_eq!(
fallback_subagent_assignment_route(&runtime, None, "say hello").model,
"deepseek-v4-flash"
);
}
#[test]
fn subagent_auto_route_respects_explicit_or_role_model() {
let runtime = stub_runtime().with_auto_model(true);
assert_eq!(
fallback_subagent_assignment_route(
&runtime,
Some("deepseek-v4-flash".to_string()),
"implement the release fix"
)
.model,
"deepseek-v4-flash"
);
}
#[test]
fn subagent_auto_reasoning_resolves_to_distinct_v4_tiers() {
let runtime = stub_runtime().with_reasoning_effort(Some("high".to_string()), true);
assert_eq!(
fallback_subagent_assignment_route(&runtime, None, "quick lookup").reasoning_effort,
Some("high".to_string())
);
assert_eq!(
fallback_subagent_assignment_route(&runtime, None, "debug this release failure")
.reasoning_effort,
Some("max".to_string())
);
}
#[test]
fn subagent_router_prompt_frames_assignment_as_auto_routing() {
let runtime = stub_runtime()
.with_auto_model(true)
.with_reasoning_effort(Some("high".to_string()), true);
let prompt = subagent_router_prompt(&runtime, "inspect one file");
assert!(prompt.contains("Parent selected model mode: auto"));
assert!(prompt.contains("Parent selected thinking mode: auto"));
assert!(prompt.contains("inspect one file"));
}
#[test]
fn test_subagent_tool_registry_reports_unavailable_tools() {
let tmp = tempdir().expect("tempdir");
@@ -1102,6 +1158,9 @@ fn stub_runtime() -> SubAgentRuntime {
SubAgentRuntime {
client: stub_client(),
model: "deepseek-v4-flash".to_string(),
auto_model: false,
reasoning_effort: None,
reasoning_effort_auto: false,
role_models: std::collections::HashMap::new(),
context,
allow_shell: true,
+64 -12
View File
@@ -9,7 +9,9 @@ use serde_json::Value;
use thiserror::Error;
use crate::compaction::CompactionConfig;
use crate::config::{ApiProvider, Config, SavedCredential, has_api_key, save_api_key};
use crate::config::{
ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key,
};
use crate::config_ui::ConfigUiMode;
use crate::core::coherence::CoherenceState;
use crate::cycle_manager::{CycleBriefing, CycleConfig};
@@ -628,6 +630,8 @@ pub struct App {
/// `dispatch_user_message` calls `auto_model_heuristic` to resolve the
/// effective model for each outbound message.
pub auto_model: bool,
/// Last concrete model chosen while `auto_model` is active.
pub last_effective_model: Option<String>,
/// Current API provider (mirrors `Config::api_provider`).
/// Updated by `/provider` switches so the UI/commands can read the
/// active backend without re-deriving it from the live config.
@@ -635,6 +639,8 @@ pub struct App {
/// Current reasoning-effort tier for DeepSeek thinking mode.
/// Cycled via Shift+Tab; initialized from config at startup.
pub reasoning_effort: ReasoningEffort,
/// Last concrete thinking tier chosen while `reasoning_effort` is auto.
pub last_effective_reasoning_effort: Option<ReasoningEffort>,
pub workspace: PathBuf,
pub config_path: Option<PathBuf>,
pub config_profile: Option<String>,
@@ -1080,8 +1086,23 @@ impl App {
let use_paste_burst_detection = settings.paste_burst_detection;
let ui_theme = palette::UI_THEME;
let model = settings.default_model.clone().unwrap_or(model);
let auto_model = model.trim().eq_ignore_ascii_case("auto");
let threshold_model = if auto_model {
DEFAULT_TEXT_MODEL
} else {
model.as_str()
};
let compact_threshold =
compaction_threshold_for_model_and_effort(&model, config.reasoning_effort());
compaction_threshold_for_model_and_effort(threshold_model, config.reasoning_effort());
let reasoning_effort = if auto_model {
ReasoningEffort::Auto
} else {
config
.reasoning_effort()
.map_or_else(ReasoningEffort::default, |s| {
ReasoningEffort::from_setting(s)
})
};
// Start in YOLO mode if --yolo flag was passed
let preferred_mode = AppMode::from_setting(&settings.default_mode);
@@ -1170,13 +1191,11 @@ impl App {
sticky_status: None,
last_status_message_seen: None,
model,
auto_model: false,
auto_model,
last_effective_model: None,
api_provider: provider,
reasoning_effort: config
.reasoning_effort()
.map_or_else(ReasoningEffort::default, |s| {
ReasoningEffort::from_setting(s)
}),
reasoning_effort,
last_effective_reasoning_effort: None,
workspace,
config_path,
config_profile,
@@ -1435,6 +1454,7 @@ impl App {
/// `Off` → `High` → `Max` → `Off`.
pub fn cycle_effort(&mut self) {
self.reasoning_effort = self.reasoning_effort.cycle_next();
self.last_effective_reasoning_effort = None;
self.needs_redraw = true;
self.push_status_toast(
format!("Thinking: {}", self.reasoning_effort.short_label()),
@@ -3464,10 +3484,42 @@ impl App {
}
pub fn update_model_compaction_budget(&mut self) {
self.compact_threshold = compaction_threshold_for_model_and_effort(
&self.model,
self.reasoning_effort.api_value(),
);
let model = self.effective_model_for_budget().to_string();
self.compact_threshold =
compaction_threshold_for_model_and_effort(&model, self.reasoning_effort.api_value());
}
pub fn effective_model_for_budget(&self) -> &str {
if self.auto_model {
return self
.last_effective_model
.as_deref()
.filter(|model| *model != "auto")
.unwrap_or(DEFAULT_TEXT_MODEL);
}
&self.model
}
pub fn model_display_label(&self) -> String {
if self.auto_model {
if let Some(effective) = self.last_effective_model.as_deref()
&& effective != "auto"
{
return format!("auto: {effective}");
}
return "auto".to_string();
}
self.model.clone()
}
pub fn reasoning_effort_display_label(&self) -> String {
if self.auto_model || self.reasoning_effort == ReasoningEffort::Auto {
if let Some(effective) = self.last_effective_reasoning_effort {
return format!("auto: {}", effective.short_label());
}
return "auto".to_string();
}
self.reasoning_effort.short_label().to_string()
}
pub fn compaction_config(&self) -> CompactionConfig {
+61 -5
View File
@@ -34,6 +34,7 @@ use crate::tui::views::{ModalKind, ModalView, ViewAction, ViewEvent};
/// Models the picker exposes by default. Kept short on purpose — power
/// users can still type `/model <id>` for anything else.
const PICKER_MODELS: &[(&str, &str)] = &[
("auto", "select per turn"),
("deepseek-v4-pro", "flagship"),
("deepseek-v4-flash", "fast / cheap"),
];
@@ -41,6 +42,7 @@ const PICKER_MODELS: &[(&str, &str)] = &[
/// Thinking-effort rows shown in the picker, in the order DeepSeek
/// behaviorally distinguishes them.
const PICKER_EFFORTS: &[ReasoningEffort] = &[
ReasoningEffort::Auto,
ReasoningEffort::Off,
ReasoningEffort::High,
ReasoningEffort::Max,
@@ -68,7 +70,11 @@ pub struct ModelPickerView {
impl ModelPickerView {
#[must_use]
pub fn new(app: &App) -> Self {
let initial_model = app.model.clone();
let initial_model = if app.auto_model {
"auto".to_string()
} else {
app.model.clone()
};
let mut selected_model_idx = PICKER_MODELS
.iter()
.position(|(id, _)| *id == initial_model);
@@ -88,7 +94,7 @@ impl ModelPickerView {
let selected_effort_idx = PICKER_EFFORTS
.iter()
.position(|e| *e == normalized)
.unwrap_or(1); // default to High if somehow unknown
.unwrap_or(2); // default to High if somehow unknown
Self {
initial_model,
@@ -116,6 +122,9 @@ impl ModelPickerView {
}
fn resolved_effort(&self) -> ReasoningEffort {
if self.resolved_model().trim().eq_ignore_ascii_case("auto") {
return ReasoningEffort::Auto;
}
PICKER_EFFORTS[self.selected_effort_idx]
}
@@ -318,6 +327,7 @@ impl ModalView for ModelPickerView {
.map(|effort| {
let label = effort.short_label().to_string();
let hint = match effort {
ReasoningEffort::Auto => "auto-select per turn".to_string(),
ReasoningEffort::Off => "thinking disabled".to_string(),
ReasoningEffort::High => "thinking enabled (default)".to_string(),
ReasoningEffort::Max => "thinking enabled, max effort".to_string(),
@@ -387,6 +397,37 @@ mod tests {
assert_eq!(view.resolved_effort(), ReasoningEffort::Max);
}
#[test]
fn picker_initial_selection_matches_auto_state() {
let mut app = create_test_app();
app.model = "auto".to_string();
app.auto_model = true;
app.reasoning_effort = ReasoningEffort::Auto;
let view = ModelPickerView::new(&app);
assert_eq!(view.resolved_model(), "auto");
assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
}
#[test]
fn picker_auto_model_forces_auto_effort_on_apply() {
let mut app = create_test_app();
app.model = "auto".to_string();
app.auto_model = true;
app.reasoning_effort = ReasoningEffort::Off;
let mut view = ModelPickerView::new(&app);
view.selected_model_idx = 0;
view.selected_effort_idx = PICKER_EFFORTS
.iter()
.position(|effort| *effort == ReasoningEffort::Max)
.expect("max effort row");
assert_eq!(view.resolved_model(), "auto");
assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
}
#[test]
fn picker_normalizes_low_medium_to_high() {
let mut app = create_test_app();
@@ -399,6 +440,21 @@ mod tests {
);
}
#[test]
fn picker_exposes_auto_and_distinct_thinking_tiers() {
let model_labels: Vec<_> = PICKER_MODELS.iter().map(|(id, _)| *id).collect();
assert_eq!(
model_labels,
vec!["auto", "deepseek-v4-pro", "deepseek-v4-flash"]
);
let effort_labels: Vec<_> = PICKER_EFFORTS
.iter()
.map(|effort| effort.as_setting())
.collect();
assert_eq!(effort_labels, vec!["auto", "off", "high", "max"]);
}
#[test]
fn picker_preserves_unknown_model_via_custom_row() {
let mut app = create_test_app();
@@ -429,7 +485,7 @@ mod tests {
#[test]
fn tab_switches_focus_and_arrow_now_moves_effort() {
let mut app = create_test_app();
// Default is Max (index 2 = last); pin to Off so the Down arrow has
// Default is Max; pin to Off so the Down arrow has
// somewhere to go.
app.reasoning_effort = ReasoningEffort::Off;
let mut view = ModelPickerView::new(&app);
@@ -490,11 +546,11 @@ mod tests {
}
#[test]
fn picker_only_exposes_off_high_max() {
fn picker_only_exposes_auto_off_high_max() {
let labels: Vec<&str> = PICKER_EFFORTS
.iter()
.map(|effort| effort.short_label())
.collect();
assert_eq!(labels, vec!["off", "high", "max"]);
assert_eq!(labels, vec!["auto", "off", "high", "max"]);
}
}
+195 -33
View File
@@ -85,8 +85,8 @@ use crate::tui::user_input::UserInputView;
use super::active_cell::ActiveCell;
use super::app::{
App, AppAction, AppMode, OnboardingState, QueuedMessage, SidebarFocus, StatusToastLevel,
SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
App, AppAction, AppMode, OnboardingState, QueuedMessage, ReasoningEffort, SidebarFocus,
StatusToastLevel, SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
};
use super::approval::{
ApprovalMode, ApprovalRequest, ApprovalView, ElevationRequest, ElevationView, ReviewDecision,
@@ -924,8 +924,13 @@ async fn run_event_loop(
}
// Update session cost
let pricing_model = if app.auto_model {
app.last_effective_model.as_deref().unwrap_or(&app.model)
} else {
&app.model
};
let turn_cost =
crate::pricing::calculate_turn_cost_from_usage(&app.model, &usage);
crate::pricing::calculate_turn_cost_from_usage(pricing_model, &usage);
if let Some(cost) = turn_cost {
app.accrue_session_cost(cost);
}
@@ -1031,7 +1036,12 @@ async fn run_event_loop(
} => {
app.api_messages = messages;
app.system_prompt = system_prompt;
app.model = model;
if app.auto_model {
app.last_effective_model = Some(model);
} else {
app.model = model;
app.last_effective_model = None;
}
app.update_model_compaction_budget();
app.workspace = workspace;
if (app.is_loading || app.is_compacting)
@@ -1317,7 +1327,8 @@ async fn run_event_loop(
}
if let Some(next) = queued_to_send {
if let Err(err) = dispatch_user_message(app, &engine_handle, next.clone()).await {
if let Err(err) = dispatch_user_message(app, config, &engine_handle, next.clone()).await
{
app.queue_message(next);
app.status_message = Some(format!(
"Dispatch failed ({err}); kept {} queued message(s)",
@@ -2422,7 +2433,7 @@ async fn run_event_loop(
app.close_slash_menu();
}
if let Some(input) = app.submit_input() {
if handle_plan_choice(app, &engine_handle, &input).await? {
if handle_plan_choice(app, config, &engine_handle, &input).await? {
continue;
}
// `# foo` quick-add (#492) — when memory is enabled,
@@ -2473,7 +2484,7 @@ async fn run_event_loop(
})
.await;
}
submit_or_steer_message(app, &engine_handle, queued).await?;
submit_or_steer_message(app, config, &engine_handle, queued).await?;
}
}
}
@@ -3229,6 +3240,7 @@ fn queued_message_content_for_app(
async fn dispatch_user_message(
app: &mut App,
config: &Config,
engine_handle: &EngineHandle,
message: QueuedMessage,
) -> Result<()> {
@@ -3300,21 +3312,61 @@ async fn dispatch_user_message(
persistence_actor::persist(PersistRequest::Checkpoint(session));
}
// Resolve the effective model: when auto_model is active, use the
// heuristic to pick between Pro and Flash based on the user's input.
let auto_selection = if app.auto_model || app.reasoning_effort == ReasoningEffort::Auto {
Some(resolve_auto_model_selection(app, config, &message, &content).await)
} else {
None
};
let effective_model = if app.auto_model {
commands::auto_model_heuristic(&message.display, &app.model)
auto_selection
.as_ref()
.map(|selection| selection.model.clone())
.unwrap_or_else(|| commands::auto_model_heuristic(&message.display, &app.model))
} else {
app.model.clone()
};
let auto_controls_reasoning = app.auto_model || app.reasoning_effort == ReasoningEffort::Auto;
let effective_reasoning_effort = if auto_controls_reasoning {
let effort = auto_selection
.as_ref()
.and_then(|selection| selection.reasoning_effort)
.unwrap_or_else(|| {
normalize_auto_routed_effort(crate::auto_reasoning::select(false, &message.display))
});
app.last_effective_reasoning_effort = Some(effort);
Some(effort.as_setting().to_string())
} else {
app.last_effective_reasoning_effort = None;
app.reasoning_effort.api_value().map(str::to_string)
};
if let Some(selection) = auto_selection.as_ref() {
if app.auto_model {
app.last_effective_model = Some(effective_model.clone());
let mut status = format!(
"Auto model selected: {effective_model} via {}",
selection.source.label()
);
if let Some(effort) = app.last_effective_reasoning_effort {
status.push_str(&format!("; thinking auto: {}", effort.as_setting()));
}
app.status_message = Some(status);
}
} else {
app.last_effective_model = None;
}
if let Err(err) = engine_handle
.send(Op::SendMessage {
content,
mode: app.mode,
model: effective_model,
goal_objective: app.goal.goal_objective.clone(),
reasoning_effort: app.reasoning_effort.api_value().map(str::to_string),
reasoning_effort: effective_reasoning_effort,
reasoning_effort_auto: auto_controls_reasoning,
auto_model: app.auto_model,
allow_shell: app.allow_shell,
trust_mode: app.trust_mode,
auto_approve: app.mode == AppMode::Yolo,
@@ -3329,6 +3381,95 @@ async fn dispatch_user_message(
Ok(())
}
async fn resolve_auto_model_selection(
app: &App,
config: &Config,
message: &QueuedMessage,
latest_content: &str,
) -> commands::AutoRouteSelection {
let latest_request = if latest_content.trim().is_empty() {
message.display.as_str()
} else {
latest_content
};
commands::resolve_auto_route_with_flash(
config,
latest_request,
&recent_auto_router_context(&app.api_messages),
if app.auto_model { "auto" } else { "fixed" },
app.reasoning_effort.as_setting(),
)
.await
}
fn normalize_auto_routed_effort(effort: ReasoningEffort) -> ReasoningEffort {
commands::normalize_auto_route_effort(effort)
}
fn recent_auto_router_context(messages: &[Message]) -> String {
let mut rows = Vec::new();
for message in messages.iter().rev().skip(1) {
if rows.len() >= 6 {
break;
}
let text = content_blocks_text(&message.content);
let text = text.trim();
if text.is_empty() {
continue;
}
rows.push(format!(
"{}: {}",
message.role,
truncate_for_auto_router(text, 900)
));
}
rows.reverse();
if rows.is_empty() {
"No prior context.".to_string()
} else {
rows.join("\n")
}
}
fn content_blocks_text(blocks: &[ContentBlock]) -> String {
let mut out = String::new();
for block in blocks {
match block {
ContentBlock::Text { text, .. } => {
append_router_text(&mut out, text);
}
ContentBlock::Thinking { thinking } => {
append_router_text(&mut out, thinking);
}
ContentBlock::ToolUse { name, .. } => {
append_router_text(&mut out, &format!("[tool call: {name}]"));
}
ContentBlock::ToolResult { content, .. } => {
append_router_text(&mut out, &format!("[tool result] {content}"));
}
_ => {}
}
}
out
}
fn append_router_text(out: &mut String, text: &str) {
if !out.is_empty() {
out.push('\n');
}
out.push_str(text);
}
fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
let mut chars = text.chars();
let truncated: String = chars.by_ref().take(max_chars).collect();
if chars.next().is_some() {
format!("{truncated}...")
} else {
truncated
}
}
async fn apply_model_and_compaction_update(
engine_handle: &EngineHandle,
compaction: crate::compaction::CompactionConfig,
@@ -3423,11 +3564,15 @@ async fn apply_model_picker_choice(
app: &mut App,
engine_handle: &EngineHandle,
model: String,
effort: crate::tui::app::ReasoningEffort,
mut effort: crate::tui::app::ReasoningEffort,
previous_model: String,
previous_effort: crate::tui::app::ReasoningEffort,
) {
let model_changed = model != previous_model;
let model_is_auto = model.trim().eq_ignore_ascii_case("auto");
if model_is_auto {
effort = ReasoningEffort::Auto;
}
let model_changed = model != previous_model || app.auto_model != model_is_auto;
let effort_changed = effort != previous_effort;
if !model_changed && !effort_changed {
app.status_message = Some(format!(
@@ -3438,6 +3583,8 @@ async fn apply_model_picker_choice(
}
if model_changed {
app.auto_model = model_is_auto;
app.last_effective_model = None;
app.model = model.clone();
app.update_model_compaction_budget();
app.session.last_prompt_tokens = None;
@@ -3448,6 +3595,7 @@ async fn apply_model_picker_choice(
}
if effort_changed {
app.reasoning_effort = effort;
app.last_effective_reasoning_effort = None;
}
// Best-effort persist; surface a status warning if the settings file
@@ -3474,20 +3622,27 @@ async fn apply_model_picker_choice(
apply_model_and_compaction_update(engine_handle, app.compaction_config()).await;
}
let model_summary = if model_is_auto {
"auto (per-turn model)".to_string()
} else {
model.clone()
};
let previous_effort_summary = previous_effort.short_label();
let effort_summary = if effort == ReasoningEffort::Auto {
"auto (per-turn thinking)".to_string()
} else {
effort.short_label().to_string()
};
let mut summary = match (model_changed, effort_changed) {
(true, true) => format!(
"Model: {previous_model} → {model} · thinking: {} → {}",
previous_effort.short_label(),
effort.short_label()
),
(true, false) => format!(
"Model: {previous_model} → {model} · thinking {}",
effort.short_label()
"Model: {previous_model} → {model_summary} · thinking: {previous_effort_summary} → {effort_summary}"
),
(true, false) => {
format!("Model: {previous_model}{model_summary} · thinking {effort_summary}")
}
(false, true) => format!(
"Thinking: {} → {} · model {model}",
previous_effort.short_label(),
effort.short_label()
"Thinking: {previous_effort_summary} → {effort_summary} · model {model_summary}"
),
(false, false) => unreachable!(),
};
@@ -3896,7 +4051,7 @@ async fn apply_command_result(
}
AppAction::SendMessage(content) => {
let queued = build_queued_message(app, content);
submit_or_steer_message(app, engine_handle, queued).await?;
submit_or_steer_message(app, config, engine_handle, queued).await?;
}
AppAction::Rlm {
prompt,
@@ -4417,11 +4572,14 @@ async fn queue_follow_up(app: &mut App, message: QueuedMessage) -> Result<()> {
async fn submit_or_steer_message(
app: &mut App,
config: &Config,
engine_handle: &EngineHandle,
message: QueuedMessage,
) -> Result<()> {
match app.decide_submit_disposition() {
SubmitDisposition::Immediate => dispatch_user_message(app, engine_handle, message).await,
SubmitDisposition::Immediate => {
dispatch_user_message(app, config, engine_handle, message).await
}
SubmitDisposition::Queue => {
let count = app.queued_message_count().saturating_add(1);
app.queue_message(message);
@@ -4523,6 +4681,7 @@ fn parse_plan_choice(input: &str) -> Option<PlanChoice> {
async fn apply_plan_choice(
app: &mut App,
config: &Config,
engine_handle: &EngineHandle,
choice: PlanChoice,
) -> Result<()> {
@@ -4539,7 +4698,7 @@ async fn apply_plan_choice(
app.status_message =
Some("Queued accepted plan execution (agent mode).".to_string());
} else {
dispatch_user_message(app, engine_handle, followup).await?;
dispatch_user_message(app, config, engine_handle, followup).await?;
}
}
PlanChoice::AcceptYolo => {
@@ -4554,7 +4713,7 @@ async fn apply_plan_choice(
app.status_message =
Some("Queued accepted plan execution (YOLO mode).".to_string());
} else {
dispatch_user_message(app, engine_handle, followup).await?;
dispatch_user_message(app, config, engine_handle, followup).await?;
}
}
PlanChoice::RevisePlan => {
@@ -4576,6 +4735,7 @@ async fn apply_plan_choice(
async fn handle_plan_choice(
app: &mut App,
config: &Config,
engine_handle: &EngineHandle,
input: &str,
) -> Result<bool> {
@@ -4590,7 +4750,7 @@ async fn handle_plan_choice(
return Ok(false);
};
apply_plan_choice(app, engine_handle, choice).await?;
apply_plan_choice(app, config, engine_handle, choice).await?;
Ok(true)
}
@@ -4714,7 +4874,8 @@ fn render(f: &mut Frame, app: &mut App) {
.and_then(|value| value.to_str())
.filter(|value| !value.is_empty())
.unwrap_or("workspace");
let effort_label = app.reasoning_effort.short_label();
let model_label = app.model_display_label();
let effort_label = app.reasoning_effort_display_label();
let provider_label = match app.api_provider {
crate::config::ApiProvider::Deepseek => None,
crate::config::ApiProvider::DeepseekCN => None,
@@ -4726,7 +4887,7 @@ fn render(f: &mut Frame, app: &mut App) {
};
let header_data = HeaderData::new(
app.mode,
&app.model,
&model_label,
workspace_name,
app.is_loading,
app.ui_theme.header_bg,
@@ -4737,7 +4898,7 @@ fn render(f: &mut Frame, app: &mut App) {
app.session.session_cost,
sanitized_prompt_tokens,
)
.with_reasoning_effort(Some(effort_label))
.with_reasoning_effort(Some(&effort_label))
.with_provider(provider_label);
let header_widget = HeaderWidget::new(header_data);
let buf = f.buffer_mut();
@@ -5016,7 +5177,8 @@ async fn handle_view_events(
if app.plan_prompt_pending {
app.plan_prompt_pending = false;
if let Some(choice) = plan_choice_from_option(option)
&& let Err(err) = apply_plan_choice(app, engine_handle, choice).await
&& let Err(err) =
apply_plan_choice(app, config, engine_handle, choice).await
{
app.status_message = Some(format!("Failed to apply plan selection: {err}"));
}
@@ -6496,7 +6658,7 @@ fn estimated_context_tokens(app: &App) -> Option<i64> {
}
fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
let max = context_window_for_model(&app.model)?;
let max = context_window_for_model(app.effective_model_for_budget())?;
let max_i64 = i64::from(max);
let reported = app
.session
+7 -3
View File
@@ -699,10 +699,12 @@ async fn model_change_update_syncs_engine_model_before_compaction() {
async fn dispatch_user_message_failed_send_clears_loading_state() {
let mut app = create_test_app();
let engine = mock_engine_handle();
let config = Config::default();
drop(engine.rx_op);
let result = dispatch_user_message(
&mut app,
&config,
&engine.handle,
QueuedMessage::new("hello".to_string(), None),
)
@@ -1469,8 +1471,9 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
app.offline_mode = true;
let engine = crate::core::engine::mock_engine_handle();
let config = Config::default();
let handled = handle_plan_choice(&mut app, &engine.handle, "yolo")
let handled = handle_plan_choice(&mut app, &config, &engine.handle, "yolo")
.await
.expect("plan choice");
@@ -1479,7 +1482,7 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
assert_eq!(app.mode, AppMode::Plan);
let queued = build_queued_message(&mut app, "yolo".to_string());
submit_or_steer_message(&mut app, &engine.handle, queued)
submit_or_steer_message(&mut app, &config, &engine.handle, queued)
.await
.expect("submit normal message");
@@ -1504,8 +1507,9 @@ async fn numeric_plan_choice_still_queues_follow_up_when_busy() {
app.is_loading = true;
let engine = crate::core::engine::mock_engine_handle();
let config = Config::default();
let handled = handle_plan_choice(&mut app, &engine.handle, "2")
let handled = handle_plan_choice(&mut app, &config, &engine.handle, "2")
.await
.expect("plan choice");
+1 -1
View File
@@ -275,7 +275,7 @@ impl FooterProps {
// to cross the 60s threshold inside `footer_worked_chip`.
let worked = footer_worked_chip(app.cumulative_turn_duration);
Self {
model: app.model.clone(),
model: app.model_display_label(),
mode_label,
mode_color,
text_dim_color: app.ui_theme.text_dim,