fix: restore auto model routing (#772)

Keep auto as a local routing mode, resolve concrete model/thinking before API requests, and wire auto routing through CLI, TUI, runtime threads, and subagents.
2026-05-05 21:22:03 -05:00
parent 63e2234c6b
commit 50780a5289
18 changed files with 1120 additions and 127 deletions
@@ -1,14 +1,19 @@
 //! Config commands: config, settings, mode switches, trust, logout

 use std::path::{Path, PathBuf};
+use std::time::Duration;

 use super::CommandResult;
+use crate::client::DeepSeekClient;
 use crate::config::{COMMON_DEEPSEEK_MODELS, clear_api_key, normalize_model_name};
 use crate::config_ui::{ConfigUiMode, parse_mode};
+use crate::llm_client::LlmClient;
 use crate::localization::resolve_locale;
+use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
 use crate::settings::Settings;
-use crate::tui::app::{App, AppAction, AppMode, OnboardingState, SidebarFocus};
+use crate::tui::app::{App, AppAction, AppMode, OnboardingState, ReasoningEffort, SidebarFocus};
 use crate::tui::approval::ApprovalMode;
+use anyhow::Result;

 /// Open the interactive config editor.
 ///
@@ -91,7 +96,13 @@ fn show_single_setting(app: &App, key: &str) -> CommandResult {
    let value = match key.as_str() {
        "model" => {
            if app.auto_model {
-                Some("auto (auto-select by request complexity)".to_string())
+                let mut label = "auto (auto-select model per turn)".to_string();
+                if let Some(effective) = app.last_effective_model.as_deref()
+                    && effective != "auto"
+                {
+                    label.push_str(&format!("; last: {effective}"));
+                }
+                Some(label)
            } else {
                Some(app.model.clone())
            }
@@ -247,16 +258,20 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
            if value.trim().eq_ignore_ascii_case("auto") {
                app.auto_model = true;
                app.model = "auto".to_string();
+                app.last_effective_model = None;
+                app.reasoning_effort = ReasoningEffort::Auto;
+                app.last_effective_reasoning_effort = None;
                app.update_model_compaction_budget();
                app.session.last_prompt_tokens = None;
                app.session.last_completion_tokens = None;
                return CommandResult::with_message_and_action(
-                    "model = auto (auto-select by request complexity)".to_string(),
+                    "model = auto (auto-select model and thinking per turn)".to_string(),
                    AppAction::UpdateCompaction(app.compaction_config()),
                );
            }
            // Clear auto mode when a specific model is set
            app.auto_model = false;
+            app.last_effective_model = None;
            let Some(model) = normalize_model_name(value) else {
                return CommandResult::error(format!(
                    "Invalid model '{value}'. Expected a DeepSeek model ID. Common models: {}",
@@ -385,7 +400,13 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
        }
        "default_model" => {
            if let Some(ref model) = settings.default_model {
+                app.auto_model = model.trim().eq_ignore_ascii_case("auto");
                app.model.clone_from(model);
+                app.last_effective_model = None;
+                if app.auto_model {
+                    app.reasoning_effort = ReasoningEffort::Auto;
+                    app.last_effective_reasoning_effort = None;
+                }
                app.update_model_compaction_budget();
                app.session.last_prompt_tokens = None;
                app.session.last_completion_tokens = None;
@@ -604,14 +625,6 @@ fn expand_tilde(raw: &str) -> String {
 /// Default → Flash (cost savings).
 pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
    let len = input.chars().count();
-    // Short messages → Flash
-    if len < 100 {
-        return "deepseek-v4-flash".to_string();
-    }
-    // Long complex requests → Pro
-    if len > 500 {
-        return "deepseek-v4-pro".to_string();
-    }
    let lower = input.to_lowercase();
    let complex_keywords = [
        "refactor",
@@ -630,10 +643,249 @@ pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
    if complex_keywords.iter().any(|kw| lower.contains(kw)) {
        return "deepseek-v4-pro".to_string();
    }
+    // Short messages → Flash
+    if len < 100 {
+        return "deepseek-v4-flash".to_string();
+    }
+    // Long complex requests → Pro
+    if len > 500 {
+        return "deepseek-v4-pro".to_string();
+    }
    // Default to Flash for cost savings
    "deepseek-v4-flash".to_string()
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AutoRouteRecommendation {
+    pub model: String,
+    pub reasoning_effort: Option<ReasoningEffort>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum AutoRouteSource {
+    FlashRouter,
+    Heuristic,
+}
+
+impl AutoRouteSource {
+    #[must_use]
+    pub fn label(self) -> &'static str {
+        match self {
+            AutoRouteSource::FlashRouter => "flash-router",
+            AutoRouteSource::Heuristic => "heuristic",
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct AutoRouteSelection {
+    pub model: String,
+    pub reasoning_effort: Option<ReasoningEffort>,
+    pub source: AutoRouteSource,
+}
+
+pub const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
+You are the DeepSeek TUI auto-routing classifier. Return only compact JSON: \
+{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
+Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
+Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
+tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
+Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
+agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
+
+/// Parse the Flash router's JSON-only response.
+///
+/// The runtime treats classifier output as untrusted: only known V4 model IDs
+/// and supported reasoning tiers are accepted. Anything else falls back to the
+/// deterministic heuristic.
+pub fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
+    let json = extract_first_json_object(raw)?;
+    let value: serde_json::Value = serde_json::from_str(json).ok()?;
+    let model = value.get("model").and_then(serde_json::Value::as_str)?;
+    let model = normalize_auto_route_model(model)?;
+    let reasoning_effort = value
+        .get("thinking")
+        .or_else(|| value.get("reasoning_effort"))
+        .or_else(|| value.get("effort"))
+        .and_then(serde_json::Value::as_str)
+        .and_then(parse_auto_route_reasoning_effort);
+
+    Some(AutoRouteRecommendation {
+        model: model.to_string(),
+        reasoning_effort,
+    })
+}
+
+fn extract_first_json_object(raw: &str) -> Option<&str> {
+    let start = raw.find('{')?;
+    let end = raw.rfind('}')?;
+    (end >= start).then_some(&raw[start..=end])
+}
+
+fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
+    match model.trim().to_ascii_lowercase().as_str() {
+        "deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
+        "deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
+        _ => None,
+    }
+}
+
+fn parse_auto_route_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
+    match effort.trim().to_ascii_lowercase().as_str() {
+        "off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off),
+        "low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High),
+        "high" => Some(ReasoningEffort::High),
+        "max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max),
+        _ => None,
+    }
+}
+
+#[must_use]
+pub fn normalize_auto_route_effort(effort: ReasoningEffort) -> ReasoningEffort {
+    match effort {
+        ReasoningEffort::Low | ReasoningEffort::Medium => ReasoningEffort::High,
+        other => other,
+    }
+}
+
+pub async fn resolve_auto_route_with_flash(
+    config: &crate::config::Config,
+    latest_request: &str,
+    recent_context: &str,
+    selected_model_mode: &str,
+    selected_thinking_mode: &str,
+) -> AutoRouteSelection {
+    match auto_route_flash_recommendation(
+        config,
+        latest_request,
+        recent_context,
+        selected_model_mode,
+        selected_thinking_mode,
+    )
+    .await
+    {
+        Ok(Some(recommendation)) => AutoRouteSelection {
+            model: recommendation.model,
+            reasoning_effort: recommendation.reasoning_effort,
+            source: AutoRouteSource::FlashRouter,
+        },
+        Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
+    }
+}
+
+fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
+    AutoRouteSelection {
+        model: auto_model_heuristic(latest_request, selected_model_mode),
+        reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
+            false,
+            latest_request,
+        ))),
+        source: AutoRouteSource::Heuristic,
+    }
+}
+
+async fn auto_route_flash_recommendation(
+    config: &crate::config::Config,
+    latest_request: &str,
+    recent_context: &str,
+    selected_model_mode: &str,
+    selected_thinking_mode: &str,
+) -> Result<Option<AutoRouteRecommendation>> {
+    if cfg!(test) {
+        return Ok(None);
+    }
+
+    let client = DeepSeekClient::new(config)?;
+    let request = MessageRequest {
+        model: "deepseek-v4-flash".to_string(),
+        messages: vec![Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: auto_route_prompt(
+                    latest_request,
+                    recent_context,
+                    selected_model_mode,
+                    selected_thinking_mode,
+                ),
+                cache_control: None,
+            }],
+        }],
+        max_tokens: 96,
+        system: Some(SystemPrompt::Text(
+            AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
+        )),
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: Some("off".to_string()),
+        stream: Some(false),
+        temperature: Some(0.0),
+        top_p: None,
+    };
+
+    let response =
+        tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
+    Ok(parse_auto_route_recommendation(&message_response_text(
+        &response,
+    )))
+}
+
+fn auto_route_prompt(
+    latest_request: &str,
+    recent_context: &str,
+    selected_model_mode: &str,
+    selected_thinking_mode: &str,
+) -> String {
+    format!(
+        "Session mode: agent\nSelected model mode: {}\nSelected thinking mode: {}\n\nRecent context:\n{}\n\nLatest user request:\n{}\n\nReturn JSON only.",
+        selected_model_mode,
+        selected_thinking_mode,
+        if recent_context.trim().is_empty() {
+            "No prior context."
+        } else {
+            recent_context
+        },
+        truncate_for_auto_router(latest_request, 4_000)
+    )
+}
+
+fn message_response_text(response: &MessageResponse) -> String {
+    let mut out = String::new();
+    for block in &response.content {
+        match block {
+            ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
+                append_router_text(&mut out, text);
+            }
+            ContentBlock::Thinking { thinking } => {
+                append_router_text(&mut out, thinking);
+            }
+            ContentBlock::ToolUse { name, .. } => {
+                append_router_text(&mut out, &format!("[tool call: {name}]"));
+            }
+            _ => {}
+        }
+    }
+    out
+}
+
+fn append_router_text(out: &mut String, text: &str) {
+    if !out.is_empty() {
+        out.push('\n');
+    }
+    out.push_str(text);
+}
+
+fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
+    let mut chars = text.chars();
+    let truncated: String = chars.by_ref().take(max_chars).collect();
+    if chars.next().is_some() {
+        format!("{truncated}...")
+    } else {
+        truncated
+    }
+}
+
 /// Toggle LSP diagnostics on/off or show status.
 ///
 /// - `/lsp on` — enable inline LSP diagnostics
@@ -864,6 +1116,21 @@ mod tests {
        ));
    }

+    #[test]
+    fn test_set_model_auto_enables_auto_thinking() {
+        let mut app = create_test_app();
+        app.reasoning_effort = ReasoningEffort::Off;
+
+        let result = set_config(&mut app, Some("model auto"));
+
+        assert!(result.message.is_some());
+        assert!(app.auto_model);
+        assert_eq!(app.model, "auto");
+        assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
+        assert!(app.last_effective_model.is_none());
+        assert!(app.last_effective_reasoning_effort.is_none());
+    }
+
    #[test]
    fn test_set_model_accepts_future_deepseek_model_id() {
        let mut app = create_test_app();
@@ -883,6 +1150,45 @@ mod tests {
        assert_eq!(app.model, "deepseek-v4-flash");
    }

+    #[test]
+    fn auto_route_recommendation_parses_strict_json() {
+        let rec =
+            parse_auto_route_recommendation(r#"{"model":"deepseek-v4-pro","thinking":"max"}"#)
+                .expect("valid router response should parse");
+
+        assert_eq!(rec.model, "deepseek-v4-pro");
+        assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max));
+    }
+
+    #[test]
+    fn auto_route_recommendation_accepts_wrapped_json_aliases() {
+        let rec =
+            parse_auto_route_recommendation(r#"route: {"model":"flash","reasoning_effort":"off"}"#)
+                .expect("wrapped router response should parse");
+
+        assert_eq!(rec.model, "deepseek-v4-flash");
+        assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Off));
+    }
+
+    #[test]
+    fn auto_route_recommendation_normalizes_legacy_low_medium_to_high() {
+        let rec = parse_auto_route_recommendation(
+            r#"{"model":"deepseek-v4-pro","reasoning_effort":"medium"}"#,
+        )
+        .expect("medium should parse for back-compat");
+
+        assert_eq!(rec.model, "deepseek-v4-pro");
+        assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::High));
+    }
+
+    #[test]
+    fn auto_route_recommendation_rejects_unknown_model() {
+        assert!(
+            parse_auto_route_recommendation(r#"{"model":"some-other-model","thinking":"max"}"#,)
+                .is_none()
+        );
+    }
+
    #[test]
    fn test_set_default_mode_normal_save_reports_normalized_value() {
        let _lock = lock_test_env();
@@ -4,7 +4,7 @@ use std::fmt::Write;

 use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name};
 use crate::localization::{MessageId, tr};
-use crate::tui::app::{App, AppAction, AppMode};
+use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort};
 use crate::tui::views::{HelpView, ModalKind, SubAgentsView};

 use super::CommandResult;
@@ -91,14 +91,33 @@ pub fn exit() -> CommandResult {
 /// way to flip both knobs without memorising the docs.
 pub fn model(app: &mut App, model_name: Option<&str>) -> CommandResult {
    if let Some(name) = model_name {
+        if name.trim().eq_ignore_ascii_case("auto") {
+            let old_model = app.model_display_label();
+            app.auto_model = true;
+            app.model = "auto".to_string();
+            app.last_effective_model = None;
+            app.reasoning_effort = ReasoningEffort::Auto;
+            app.last_effective_reasoning_effort = None;
+            app.update_model_compaction_budget();
+            app.session.last_prompt_tokens = None;
+            app.session.last_completion_tokens = None;
+            return CommandResult::with_message_and_action(
+                tr(app.ui_locale, MessageId::ModelChanged)
+                    .replace("{old}", &old_model)
+                    .replace("{new}", "auto"),
+                AppAction::UpdateCompaction(app.compaction_config()),
+            );
+        }
        let Some(model_id) = normalize_model_name(name) else {
            return CommandResult::error(format!(
-                "Invalid model '{name}'. Expected a DeepSeek model ID. Common models: {}",
+                "Invalid model '{name}'. Expected auto or a DeepSeek model ID. Common models: {}",
                COMMON_DEEPSEEK_MODELS.join(", ")
            ));
        };
-        let old_model = app.model.clone();
+        let old_model = app.model_display_label();
+        app.auto_model = false;
        app.model = model_id.clone();
+        app.last_effective_model = None;
        app.update_model_compaction_budget();
        app.session.last_prompt_tokens = None;
        app.session.last_completion_tokens = None;
@@ -427,6 +446,21 @@ mod tests {
        assert_eq!(app.session.last_completion_tokens, None);
    }

+    #[test]
+    fn test_model_auto_enables_auto_thinking() {
+        let mut app = create_test_app();
+        app.reasoning_effort = ReasoningEffort::Off;
+
+        let result = model(&mut app, Some("auto"));
+
+        assert!(result.message.is_some());
+        assert!(app.auto_model);
+        assert_eq!(app.model, "auto");
+        assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
+        assert!(app.last_effective_model.is_none());
+        assert!(app.last_effective_reasoning_effort.is_none());
+    }
+
    #[test]
    fn test_model_change_accepts_future_deepseek_model() {
        let mut app = create_test_app();
@@ -614,6 +614,11 @@ pub fn auto_model_heuristic(input: &str, current_model: &str) -> String {
    config::auto_model_heuristic(input, current_model)
 }

+pub use config::{
+    AutoRouteRecommendation, AutoRouteSelection, normalize_auto_route_effort,
+    parse_auto_route_recommendation, resolve_auto_route_with_flash,
+};
+
 /// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
 /// Zhang et al. (arXiv:2512.24601).
 ///
@@ -977,10 +977,11 @@ impl Config {
            }
        }
        if let Some(model) = self.default_text_model.as_deref()
+            && !model.trim().eq_ignore_ascii_case("auto")
            && normalize_model_name(model).is_none()
        {
            anyhow::bail!(
-                "Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
+                "Invalid default_text_model '{model}': expected auto or a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
            );
        }
        if let Some(policy) = self.approval_policy.as_deref() {
@@ -1095,6 +1096,11 @@ impl Config {
        {
            return normalized;
        }
+        if let Some(model) = self.default_text_model.as_deref()
+            && model.trim().eq_ignore_ascii_case("auto")
+        {
+            return "auto".to_string();
+        }
        if let Some(model) = self.default_text_model.as_deref()
            && let Some(normalized) = normalize_model_name(model)
        {
@@ -3420,6 +3426,17 @@ api_key = "old-openrouter-key"
        Ok(())
    }

+    #[test]
+    fn validate_accepts_auto_default_text_model() -> Result<()> {
+        let config = Config {
+            default_text_model: Some("auto".to_string()),
+            ..Default::default()
+        };
+        config.validate()?;
+        assert_eq!(config.default_model(), "auto");
+        Ok(())
+    }
+
    #[test]
    fn deepseek_model_env_overrides_default_text_model() -> Result<()> {
        let _lock = lock_test_env();
@@ -539,7 +539,9 @@ pub fn open_browser(url: &str) -> Result<()> {
 }

 fn validate_document(doc: &ConfigUiDocument) -> Result<()> {
-    if normalize_model_name(&doc.runtime.model).is_none() {
+    if !doc.runtime.model.trim().eq_ignore_ascii_case("auto")
+        && normalize_model_name(&doc.runtime.model).is_none()
+    {
        bail!("invalid model '{}'", doc.runtime.model);
    }
    if doc.config.mcp_config_path.trim().is_empty() {
@@ -557,6 +559,7 @@ fn reload_runtime_config(app: &mut App, config: &mut Config) -> Result<()> {
            .reasoning_effort()
            .unwrap_or_else(|| app.reasoning_effort.as_setting()),
    );
+    app.last_effective_reasoning_effort = None;
    app.update_model_compaction_budget();
    app.mcp_config_path = reloaded.mcp_config_path();
    app.skills_dir = reloaded.skills_dir();
@@ -584,6 +587,7 @@ fn apply_reasoning_effort(
 ) -> Result<()> {
    let effort: ReasoningEffort = value.into();
    app.reasoning_effort = effort;
+    app.last_effective_reasoning_effort = None;
    app.update_model_compaction_budget();
    if persist {
        commands::persist_root_string_key("reasoning_effort", effort.as_setting())?;
@@ -844,7 +848,10 @@ mod tests {

    #[test]
    fn build_document_reflects_app_state() {
-        let app = app();
+        let mut app = app();
+        app.auto_model = false;
+        app.model = "deepseek-v4-pro".to_string();
+        app.reasoning_effort = ReasoningEffort::Max;
        let config = Config::default();
        let doc = build_document(&app, &config).expect("document");
        assert_eq!(doc.runtime.model, app.model);
@@ -48,6 +48,7 @@ use crate::tools::spec::RuntimeToolServices;
 use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult};
 use crate::tools::subagent::{
    Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager,
+    resolve_subagent_assignment_route,
 };
 use crate::tools::todo::{SharedTodoList, new_shared_todo_list};
 use crate::tools::user_input::{UserInputRequest, UserInputResponse};
@@ -515,6 +516,8 @@ impl Engine {
                    model,
                    goal_objective,
                    reasoning_effort,
+                    reasoning_effort_auto,
+                    auto_model,
                    allow_shell,
                    trust_mode,
                    auto_approve,
@@ -525,6 +528,8 @@ impl Engine {
                        model,
                        goal_objective,
                        reasoning_effort,
+                        reasoning_effort_auto,
+                        auto_model,
                        allow_shell,
                        trust_mode,
                        auto_approve,
@@ -564,7 +569,7 @@ impl Engine {
                        continue;
                    };

-                    let runtime = SubAgentRuntime::new(
+                    let mut runtime = SubAgentRuntime::new(
                        client,
                        self.session.model.clone(),
                        // Sub-agents don't inherit YOLO mode - use Agent mode defaults
@@ -574,8 +579,17 @@ impl Engine {
                        Arc::clone(&self.subagent_manager),
                    )
                    .with_role_models(self.config.subagent_model_overrides.clone())
+                    .with_auto_model(self.session.auto_model)
+                    .with_reasoning_effort(
+                        self.session.reasoning_effort.clone(),
+                        self.session.reasoning_effort_auto,
+                    )
                    .with_max_spawn_depth(self.config.max_spawn_depth)
                    .background_runtime();
+                    let route = resolve_subagent_assignment_route(&runtime, None, &prompt).await;
+                    runtime.model = route.model;
+                    runtime.reasoning_effort = route.reasoning_effort;
+                    runtime.reasoning_effort_auto = false;

                    let result = {
                        let mut manager = self.subagent_manager.write().await;
@@ -623,6 +637,7 @@ impl Engine {
                        .await;
                }
                Op::SetModel { model } => {
+                    self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
                    self.session.model = model;
                    self.config.model.clone_from(&self.session.model);
                    let _ = self
@@ -654,6 +669,7 @@ impl Engine {
                    self.session.compaction_summary_prompt =
                        extract_compaction_summary_prompt(system_prompt.clone());
                    self.session.system_prompt = system_prompt;
+                    self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
                    self.session.model = model;
                    self.session.workspace = workspace.clone();
                    self.config.model.clone_from(&self.session.model);
@@ -709,6 +725,8 @@ impl Engine {
                        self.session.model.clone(),
                        self.config.goal_objective.clone(),
                        self.session.reasoning_effort.clone(),
+                        self.session.reasoning_effort_auto,
+                        self.session.auto_model,
                        self.session.allow_shell,
                        self.session.trust_mode,
                        self.session.auto_approve,
@@ -758,6 +776,8 @@ impl Engine {
        model: String,
        goal_objective: Option<String>,
        reasoning_effort: Option<String>,
+        reasoning_effort_auto: bool,
+        auto_model: bool,
        allow_shell: bool,
        trust_mode: bool,
        auto_approve: bool,
@@ -838,6 +858,8 @@ impl Engine {
        self.config.model.clone_from(&self.session.model);
        self.config.goal_objective = goal_objective;
        self.session.reasoning_effort = reasoning_effort;
+        self.session.reasoning_effort_auto = reasoning_effort_auto;
+        self.session.auto_model = auto_model;
        self.session.allow_shell = allow_shell;
        self.config.allow_shell = allow_shell;
        self.session.trust_mode = trust_mode;
@@ -900,6 +922,11 @@ impl Engine {
                            Arc::clone(&self.subagent_manager),
                        )
                        .with_role_models(self.config.subagent_model_overrides.clone())
+                        .with_auto_model(self.session.auto_model)
+                        .with_reasoning_effort(
+                            self.session.reasoning_effort.clone(),
+                            self.session.reasoning_effort_auto,
+                        )
                        .with_max_spawn_depth(self.config.max_spawn_depth);
                        if let Some((mailbox, cancel_token)) = mailbox_for_runtime.as_ref() {
                            rt = rt
@@ -20,6 +20,11 @@ pub enum Op {
        /// Reasoning-effort tier: `"off" | "low" | "medium" | "high" | "max"`.
        /// `None` lets the provider apply its default.
        reasoning_effort: Option<String>,
+        /// True when the user selected auto thinking, even though the UI sends
+        /// a concrete per-turn value to the model API.
+        reasoning_effort_auto: bool,
+        /// True when the user selected auto model routing.
+        auto_model: bool,
        allow_shell: bool,
        trust_mode: bool,
        auto_approve: bool,
@@ -89,27 +94,3 @@ pub enum Op {
    /// Shutdown the engine
    Shutdown,
 }
-
-impl Op {
-    /// Create a send message operation
-    pub fn send(
-        content: impl Into<String>,
-        mode: AppMode,
-        model: impl Into<String>,
-        reasoning_effort: Option<String>,
-        allow_shell: bool,
-        trust_mode: bool,
-        auto_approve: bool,
-    ) -> Self {
-        Op::SendMessage {
-            content: content.into(),
-            mode,
-            model: model.into(),
-            goal_objective: None,
-            reasoning_effort,
-            allow_shell,
-            trust_mode,
-            auto_approve,
-        }
-    }
-}
@@ -19,6 +19,11 @@ pub struct Session {
    /// `"off" | "low" | "medium" | "high" | "max"`. `None` lets the provider
    /// apply its own defaults.
    pub reasoning_effort: Option<String>,
+    /// Whether the user selected automatic reasoning effort.
+    pub reasoning_effort_auto: bool,
+
+    /// Whether the user selected automatic model routing.
+    pub auto_model: bool,

    /// Workspace directory
    pub workspace: PathBuf,
@@ -118,6 +123,8 @@ impl Session {
        Self {
            model,
            reasoning_effort: None,
+            reasoning_effort_auto: false,
+            auto_model: false,
            workspace,
            system_prompt: None,
            compaction_summary_prompt: None,
@@ -2504,6 +2504,11 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> {
        .model
        .or_else(|| config.default_text_model.clone())
        .unwrap_or_else(|| config.default_model());
+    let route = resolve_cli_auto_route(config, &model, &diff).await;
+    let model = route.model;
+    let reasoning_effort = route
+        .reasoning_effort
+        .map(|effort| effort.as_setting().to_string());

    let system = SystemPrompt::Text(
        "You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \
@@ -2529,7 +2534,7 @@ Provide findings ordered by severity with file references, then open questions,
        tool_choice: None,
        metadata: None,
        thinking: None,
-        reasoning_effort: None,
+        reasoning_effort,
        stream: Some(false),
        temperature: Some(0.2),
        top_p: Some(0.9),
@@ -3620,14 +3625,42 @@ async fn run_interactive(
    .await
 }

+struct CliAutoRoute {
+    model: String,
+    reasoning_effort: Option<crate::tui::app::ReasoningEffort>,
+    auto_model: bool,
+}
+
+async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute {
+    if model.trim().eq_ignore_ascii_case("auto") {
+        let selection =
+            commands::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await;
+        CliAutoRoute {
+            model: selection.model,
+            reasoning_effort: selection.reasoning_effort,
+            auto_model: true,
+        }
+    } else {
+        CliAutoRoute {
+            model: model.to_string(),
+            reasoning_effort: None,
+            auto_model: false,
+        }
+    }
+}
+
 async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> {
    use crate::client::DeepSeekClient;
    use crate::models::{ContentBlock, Message, MessageRequest};

    let client = DeepSeekClient::new(config)?;
+    let route = resolve_cli_auto_route(config, model, prompt).await;
+    let reasoning_effort = route
+        .reasoning_effort
+        .map(|effort| effort.as_setting().to_string());

    let request = MessageRequest {
-        model: model.to_string(),
+        model: route.model,
        messages: vec![Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
@@ -3641,7 +3674,7 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()>
        tool_choice: None,
        metadata: None,
        thinking: None,
-        reasoning_effort: None,
+        reasoning_effort,
        stream: Some(false),
        temperature: None,
        top_p: None,
@@ -3663,8 +3696,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
    use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt};

    let client = DeepSeekClient::new(config)?;
+    let route = resolve_cli_auto_route(config, model, prompt).await;
+    let model = route.model;
+    let reasoning_effort = route
+        .reasoning_effort
+        .map(|effort| effort.as_setting().to_string());
    let request = MessageRequest {
-        model: model.to_string(),
+        model: model.clone(),
        messages: vec![Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
@@ -3680,7 +3718,7 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
        tool_choice: None,
        metadata: None,
        thinking: None,
-        reasoning_effort: None,
+        reasoning_effort,
        stream: Some(false),
        temperature: Some(0.2),
        top_p: Some(0.9),
@@ -3725,6 +3763,13 @@ async fn run_exec_agent(
    use crate::tools::todo::new_shared_todo_list;
    use crate::tui::app::AppMode;

+    let route = resolve_cli_auto_route(config, model, prompt).await;
+    let auto_model = route.auto_model;
+    let effective_model = route.model;
+    let effective_reasoning_effort = route
+        .reasoning_effort
+        .map(|effort| effort.as_setting().to_string());
+
    // Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle
    // architecture (issue #124) handles long-context resets via fresh contexts
    // rather than progressive summarization. The compaction config is still
@@ -3732,8 +3777,8 @@ async fn run_exec_agent(
    // or direct engine config keep their old behavior.
    let compaction = CompactionConfig {
        enabled: false,
-        model: model.to_string(),
-        token_threshold: compaction_threshold_for_model(model),
+        model: effective_model.clone(),
+        token_threshold: compaction_threshold_for_model(&effective_model),
        ..Default::default()
    };

@@ -3747,7 +3792,7 @@ async fn run_exec_agent(
        .map(crate::config::LspConfigToml::into_runtime);

    let engine_config = EngineConfig {
-        model: model.to_string(),
+        model: effective_model.clone(),
        workspace: workspace.clone(),
        allow_shell: auto_approve || config.allow_shell(),
        trust_mode,
@@ -3784,15 +3829,18 @@ async fn run_exec_agent(
    };

    engine_handle
-        .send(Op::send(
-            prompt,
+        .send(Op::SendMessage {
+            content: prompt.to_string(),
            mode,
-            model,
-            None,
-            auto_approve || config.allow_shell(),
+            model: effective_model.clone(),
+            goal_objective: None,
+            reasoning_effort: effective_reasoning_effort,
+            reasoning_effort_auto: auto_model,
+            auto_model,
+            allow_shell: auto_approve || config.allow_shell(),
            trust_mode,
            auto_approve,
-        ))
+        })
        .await?;

    #[derive(serde::Serialize)]
@@ -3813,7 +3861,7 @@ async fn run_exec_agent(
    }
    let mut summary = ExecSummary {
        mode: "agent".to_string(),
-        model: model.to_string(),
+        model: effective_model,
        prompt: prompt.to_string(),
        ..ExecSummary::default()
    };
@@ -1457,21 +1457,43 @@ impl RuntimeThreadManager {
        }

        let mode = parse_mode(req.mode.as_deref().unwrap_or(&thread.mode));
-        let model = req.model.unwrap_or_else(|| thread.model.clone());
+        let requested_model = req.model.unwrap_or_else(|| thread.model.clone());
+        let auto_model = requested_model.trim().eq_ignore_ascii_case("auto");
+        let (model, reasoning_effort) = if auto_model {
+            let selection = crate::commands::resolve_auto_route_with_flash(
+                &self.config,
+                &prompt,
+                "",
+                "auto",
+                "auto",
+            )
+            .await;
+            (
+                selection.model,
+                selection
+                    .reasoning_effort
+                    .map(|effort| effort.as_setting().to_string()),
+            )
+        } else {
+            (requested_model, None)
+        };
        let allow_shell = req.allow_shell.unwrap_or(thread.allow_shell);
        let trust_mode = req.trust_mode.unwrap_or(thread.trust_mode);
        let auto_approve = req.auto_approve.unwrap_or(thread.auto_approve);

        engine
-            .send(Op::send(
-                prompt,
+            .send(Op::SendMessage {
+                content: prompt,
                mode,
-                model.clone(),
-                None,
+                model: model.clone(),
+                goal_objective: None,
+                reasoning_effort,
+                reasoning_effort_auto: auto_model,
+                auto_model,
                allow_shell,
                trust_mode,
                auto_approve,
-            ))
+            })
            .await
            .map_err(|e| anyhow!("Failed to start turn: {e}"))?;

@@ -284,7 +284,7 @@ impl Settings {
            s.locale = normalize_configured_locale(&s.locale)
                .unwrap_or("en")
                .to_string();
-            s.default_model = s.default_model.as_deref().and_then(normalize_model_name);
+            s.default_model = s.default_model.as_deref().and_then(normalize_default_model);
            s
        };
        settings.apply_env_overrides();
@@ -444,9 +444,9 @@ impl Settings {
                    return Ok(());
                }

-                let Some(model) = normalize_model_name(trimmed) else {
+                let Some(model) = normalize_default_model(trimmed) else {
                    anyhow::bail!(
-                        "Failed to update setting: invalid model '{value}'. Expected: a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
+                        "Failed to update setting: invalid model '{value}'. Expected: auto, a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
                    );
                };
                self.default_model = Some(model);
@@ -549,12 +549,21 @@ impl Settings {
            ("max_history", "Max input history entries"),
            (
                "default_model",
-                "Default model: any DeepSeek model ID (e.g. deepseek-v4-pro)",
+                "Default model: auto or any DeepSeek model ID (e.g. deepseek-v4-pro)",
            ),
        ]
    }
 }

+fn normalize_default_model(value: &str) -> Option<String> {
+    let trimmed = value.trim();
+    if trimmed.eq_ignore_ascii_case("auto") {
+        Some("auto".to_string())
+    } else {
+        normalize_model_name(trimmed)
+    }
+}
+
 /// Parse a boolean value from various formats
 fn parse_bool(value: &str) -> Result<bool> {
    match value.to_lowercase().as_str() {
@@ -557,6 +557,9 @@ pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
 pub struct SubAgentRuntime {
    pub client: DeepSeekClient,
    pub model: String,
+    pub auto_model: bool,
+    pub reasoning_effort: Option<String>,
+    pub reasoning_effort_auto: bool,
    pub role_models: HashMap<String, String>,
    pub context: ToolContext,
    pub allow_shell: bool,
@@ -597,6 +600,9 @@ impl SubAgentRuntime {
        Self {
            client,
            model,
+            auto_model: false,
+            reasoning_effort: None,
+            reasoning_effort_auto: false,
            role_models: HashMap::new(),
            context,
            allow_shell,
@@ -646,6 +652,27 @@ impl SubAgentRuntime {
        self
    }

+    /// Preserve whether the parent session is using per-turn model routing.
+    #[must_use]
+    pub fn with_auto_model(mut self, auto_model: bool) -> Self {
+        self.auto_model = auto_model;
+        self
+    }
+
+    /// Preserve the parent's thinking configuration. `reasoning_effort_auto`
+    /// stays true even when the parent turn itself was sent with a concrete
+    /// flash-router recommendation, so children can resolve their own tier.
+    #[must_use]
+    pub fn with_reasoning_effort(
+        mut self,
+        reasoning_effort: Option<String>,
+        reasoning_effort_auto: bool,
+    ) -> Self {
+        self.reasoning_effort = reasoning_effort;
+        self.reasoning_effort_auto = reasoning_effort_auto;
+        self
+    }
+
    /// Return a child runtime that is deliberately detached from the parent
    /// turn cancellation token. Background sub-agents should keep running when
    /// the parent turn is cancelled; explicit agent cancellation still
@@ -675,6 +702,9 @@ impl SubAgentRuntime {
        Self {
            client: self.client.clone(),
            model: self.model.clone(),
+            auto_model: self.auto_model,
+            reasoning_effort: self.reasoning_effort.clone(),
+            reasoning_effort_auto: self.reasoning_effort_auto,
            role_models: self.role_models.clone(),
            context: child_context,
            allow_shell: self.allow_shell,
@@ -1621,16 +1651,14 @@ impl ToolSpec for AgentSpawnTool {
        if let Some(cwd) = validated_cwd {
            child_runtime.context.workspace = cwd;
        }
-        let effective_model = match spawn_request.model.clone() {
-            Some(model) => model,
+        let configured_model = match spawn_request.model.clone() {
+            Some(model) => Some(model),
            None => configured_model_for_role_or_type(
                &self.runtime,
                spawn_request.assignment.role.as_deref(),
                &spawn_request.agent_type,
-            )?
-            .unwrap_or_else(|| self.runtime.model.clone()),
+            )?,
        };
-        child_runtime.model = effective_model.clone();

        // Cache-aware resident mode (#529): prepend file contents to the prompt
        // so the child's prefix is byte-stable for DeepSeek prefix caching.
@@ -1666,6 +1694,14 @@ impl ToolSpec for AgentSpawnTool {
                (spawn_request.prompt, None)
            };

+        let route =
+            resolve_subagent_assignment_route(&self.runtime, configured_model, &effective_prompt)
+                .await;
+        child_runtime.model = route.model.clone();
+        child_runtime.reasoning_effort = route.reasoning_effort.clone();
+        child_runtime.reasoning_effort_auto = false;
+        let effective_model = route.model;
+
        let mut manager = self.manager.write().await;

        let result = manager
@@ -2743,7 +2779,7 @@ async fn run_subagent(
            tool_choice: Some(json!({ "type": "auto" })),
            metadata: None,
            thinking: None,
-            reasoning_effort: None,
+            reasoning_effort: runtime.reasoning_effort.clone(),
            stream: Some(false),
            temperature: None,
            top_p: None,
@@ -3272,6 +3308,167 @@ pub(crate) fn configured_model_for_role_or_type(
    Ok(None)
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(crate) struct SubAgentResolvedRoute {
+    pub(crate) model: String,
+    pub(crate) reasoning_effort: Option<String>,
+}
+
+pub(crate) async fn resolve_subagent_assignment_route(
+    runtime: &SubAgentRuntime,
+    configured_model: Option<String>,
+    prompt: &str,
+) -> SubAgentResolvedRoute {
+    let explicit_model = configured_model.is_some();
+    let mut route = fallback_subagent_assignment_route(runtime, configured_model, prompt);
+
+    if (runtime.auto_model || runtime.reasoning_effort_auto)
+        && let Ok(Some(recommendation)) = subagent_flash_router(runtime, prompt).await
+    {
+        if runtime.auto_model && !explicit_model {
+            route.model = recommendation.model;
+        }
+        if runtime.reasoning_effort_auto {
+            route.reasoning_effort = recommendation
+                .reasoning_effort
+                .map(|effort| effort.as_setting().to_string())
+                .or(route.reasoning_effort);
+        }
+    }
+
+    route
+}
+
+fn fallback_subagent_assignment_route(
+    runtime: &SubAgentRuntime,
+    configured_model: Option<String>,
+    prompt: &str,
+) -> SubAgentResolvedRoute {
+    let model = if let Some(model) = configured_model {
+        model
+    } else if runtime.auto_model {
+        crate::commands::auto_model_heuristic(prompt, &runtime.model)
+    } else {
+        runtime.model.clone()
+    };
+
+    let reasoning_effort = if runtime.reasoning_effort_auto {
+        let effort = match crate::auto_reasoning::select(false, prompt) {
+            crate::tui::app::ReasoningEffort::Low | crate::tui::app::ReasoningEffort::Medium => {
+                crate::tui::app::ReasoningEffort::High
+            }
+            other => other,
+        };
+        Some(effort.as_setting().to_string())
+    } else {
+        runtime.reasoning_effort.clone()
+    };
+
+    SubAgentResolvedRoute {
+        model,
+        reasoning_effort,
+    }
+}
+
+async fn subagent_flash_router(
+    runtime: &SubAgentRuntime,
+    prompt: &str,
+) -> Result<Option<crate::commands::AutoRouteRecommendation>> {
+    if cfg!(test) {
+        return Ok(None);
+    }
+
+    let request = MessageRequest {
+        model: "deepseek-v4-flash".to_string(),
+        messages: vec![Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: subagent_router_prompt(runtime, prompt),
+                cache_control: None,
+            }],
+        }],
+        max_tokens: 96,
+        system: Some(SystemPrompt::Text(
+            SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(),
+        )),
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: Some("off".to_string()),
+        stream: Some(false),
+        temperature: Some(0.0),
+        top_p: None,
+    };
+
+    let response = tokio::time::timeout(
+        Duration::from_secs(4),
+        runtime.client.create_message(request),
+    )
+    .await??;
+    Ok(crate::commands::parse_auto_route_recommendation(
+        &message_response_text(&response.content),
+    ))
+}
+
+const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\
+You are the DeepSeek TUI sub-agent routing manager. Return only compact JSON: \
+{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
+Treat each child assignment like a customer request entering a team queue: decide the least \
+sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \
+important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \
+Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \
+high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \
+off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \
+multi-step, uncertain, or tool-heavy work.";
+
+fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String {
+    format!(
+        "Parent selected model mode: {}\nParent selected thinking mode: {}\n\nSub-agent assignment:\n{}\n\nReturn JSON only.",
+        if runtime.auto_model { "auto" } else { "fixed" },
+        if runtime.reasoning_effort_auto {
+            "auto"
+        } else {
+            runtime
+                .reasoning_effort
+                .as_deref()
+                .unwrap_or("provider-default")
+        },
+        truncate_subagent_router_prompt(prompt, 4_000)
+    )
+}
+
+fn truncate_subagent_router_prompt(text: &str, max_chars: usize) -> String {
+    if text.chars().count() <= max_chars {
+        return text.to_string();
+    }
+    let mut out = text.chars().take(max_chars).collect::<String>();
+    out.push_str("\n[truncated]");
+    out
+}
+
+fn message_response_text(blocks: &[ContentBlock]) -> String {
+    let mut out = String::new();
+    for block in blocks {
+        match block {
+            ContentBlock::Text { text, .. } => {
+                if !out.is_empty() {
+                    out.push('\n');
+                }
+                out.push_str(text);
+            }
+            ContentBlock::Thinking { thinking } => {
+                if !out.is_empty() {
+                    out.push('\n');
+                }
+                out.push_str(thinking);
+            }
+            _ => {}
+        }
+    }
+    out
+}
+
 fn parse_optional_subagent_model(input: &Value, key: &str) -> Result<Option<String>, ToolError> {
    match input.get(key) {
        None | Some(Value::Null) => Ok(None),
@@ -398,6 +398,62 @@ fn test_build_assignment_prompt_includes_metadata() {
    assert!(prompt.contains("role: explorer"));
 }

+#[test]
+fn subagent_auto_model_routes_unconfigured_assignments() {
+    let runtime = stub_runtime().with_auto_model(true);
+
+    assert_eq!(
+        fallback_subagent_assignment_route(&runtime, None, "implement the release fix").model,
+        "deepseek-v4-pro"
+    );
+    assert_eq!(
+        fallback_subagent_assignment_route(&runtime, None, "say hello").model,
+        "deepseek-v4-flash"
+    );
+}
+
+#[test]
+fn subagent_auto_route_respects_explicit_or_role_model() {
+    let runtime = stub_runtime().with_auto_model(true);
+
+    assert_eq!(
+        fallback_subagent_assignment_route(
+            &runtime,
+            Some("deepseek-v4-flash".to_string()),
+            "implement the release fix"
+        )
+        .model,
+        "deepseek-v4-flash"
+    );
+}
+
+#[test]
+fn subagent_auto_reasoning_resolves_to_distinct_v4_tiers() {
+    let runtime = stub_runtime().with_reasoning_effort(Some("high".to_string()), true);
+
+    assert_eq!(
+        fallback_subagent_assignment_route(&runtime, None, "quick lookup").reasoning_effort,
+        Some("high".to_string())
+    );
+    assert_eq!(
+        fallback_subagent_assignment_route(&runtime, None, "debug this release failure")
+            .reasoning_effort,
+        Some("max".to_string())
+    );
+}
+
+#[test]
+fn subagent_router_prompt_frames_assignment_as_auto_routing() {
+    let runtime = stub_runtime()
+        .with_auto_model(true)
+        .with_reasoning_effort(Some("high".to_string()), true);
+    let prompt = subagent_router_prompt(&runtime, "inspect one file");
+
+    assert!(prompt.contains("Parent selected model mode: auto"));
+    assert!(prompt.contains("Parent selected thinking mode: auto"));
+    assert!(prompt.contains("inspect one file"));
+}
+
 #[test]
 fn test_subagent_tool_registry_reports_unavailable_tools() {
    let tmp = tempdir().expect("tempdir");
@@ -1102,6 +1158,9 @@ fn stub_runtime() -> SubAgentRuntime {
    SubAgentRuntime {
        client: stub_client(),
        model: "deepseek-v4-flash".to_string(),
+        auto_model: false,
+        reasoning_effort: None,
+        reasoning_effort_auto: false,
        role_models: std::collections::HashMap::new(),
        context,
        allow_shell: true,
@@ -9,7 +9,9 @@ use serde_json::Value;
 use thiserror::Error;

 use crate::compaction::CompactionConfig;
-use crate::config::{ApiProvider, Config, SavedCredential, has_api_key, save_api_key};
+use crate::config::{
+    ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key,
+};
 use crate::config_ui::ConfigUiMode;
 use crate::core::coherence::CoherenceState;
 use crate::cycle_manager::{CycleBriefing, CycleConfig};
@@ -628,6 +630,8 @@ pub struct App {
    /// `dispatch_user_message` calls `auto_model_heuristic` to resolve the
    /// effective model for each outbound message.
    pub auto_model: bool,
+    /// Last concrete model chosen while `auto_model` is active.
+    pub last_effective_model: Option<String>,
    /// Current API provider (mirrors `Config::api_provider`).
    /// Updated by `/provider` switches so the UI/commands can read the
    /// active backend without re-deriving it from the live config.
@@ -635,6 +639,8 @@ pub struct App {
    /// Current reasoning-effort tier for DeepSeek thinking mode.
    /// Cycled via Shift+Tab; initialized from config at startup.
    pub reasoning_effort: ReasoningEffort,
+    /// Last concrete thinking tier chosen while `reasoning_effort` is auto.
+    pub last_effective_reasoning_effort: Option<ReasoningEffort>,
    pub workspace: PathBuf,
    pub config_path: Option<PathBuf>,
    pub config_profile: Option<String>,
@@ -1080,8 +1086,23 @@ impl App {
        let use_paste_burst_detection = settings.paste_burst_detection;
        let ui_theme = palette::UI_THEME;
        let model = settings.default_model.clone().unwrap_or(model);
+        let auto_model = model.trim().eq_ignore_ascii_case("auto");
+        let threshold_model = if auto_model {
+            DEFAULT_TEXT_MODEL
+        } else {
+            model.as_str()
+        };
        let compact_threshold =
-            compaction_threshold_for_model_and_effort(&model, config.reasoning_effort());
+            compaction_threshold_for_model_and_effort(threshold_model, config.reasoning_effort());
+        let reasoning_effort = if auto_model {
+            ReasoningEffort::Auto
+        } else {
+            config
+                .reasoning_effort()
+                .map_or_else(ReasoningEffort::default, |s| {
+                    ReasoningEffort::from_setting(s)
+                })
+        };

        // Start in YOLO mode if --yolo flag was passed
        let preferred_mode = AppMode::from_setting(&settings.default_mode);
@@ -1170,13 +1191,11 @@ impl App {
            sticky_status: None,
            last_status_message_seen: None,
            model,
-            auto_model: false,
+            auto_model,
+            last_effective_model: None,
            api_provider: provider,
-            reasoning_effort: config
-                .reasoning_effort()
-                .map_or_else(ReasoningEffort::default, |s| {
-                    ReasoningEffort::from_setting(s)
-                }),
+            reasoning_effort,
+            last_effective_reasoning_effort: None,
            workspace,
            config_path,
            config_profile,
@@ -1435,6 +1454,7 @@ impl App {
    /// `Off` → `High` → `Max` → `Off`.
    pub fn cycle_effort(&mut self) {
        self.reasoning_effort = self.reasoning_effort.cycle_next();
+        self.last_effective_reasoning_effort = None;
        self.needs_redraw = true;
        self.push_status_toast(
            format!("Thinking: {}", self.reasoning_effort.short_label()),
@@ -3464,10 +3484,42 @@ impl App {
    }

    pub fn update_model_compaction_budget(&mut self) {
-        self.compact_threshold = compaction_threshold_for_model_and_effort(
-            &self.model,
-            self.reasoning_effort.api_value(),
-        );
+        let model = self.effective_model_for_budget().to_string();
+        self.compact_threshold =
+            compaction_threshold_for_model_and_effort(&model, self.reasoning_effort.api_value());
+    }
+
+    pub fn effective_model_for_budget(&self) -> &str {
+        if self.auto_model {
+            return self
+                .last_effective_model
+                .as_deref()
+                .filter(|model| *model != "auto")
+                .unwrap_or(DEFAULT_TEXT_MODEL);
+        }
+        &self.model
+    }
+
+    pub fn model_display_label(&self) -> String {
+        if self.auto_model {
+            if let Some(effective) = self.last_effective_model.as_deref()
+                && effective != "auto"
+            {
+                return format!("auto: {effective}");
+            }
+            return "auto".to_string();
+        }
+        self.model.clone()
+    }
+
+    pub fn reasoning_effort_display_label(&self) -> String {
+        if self.auto_model || self.reasoning_effort == ReasoningEffort::Auto {
+            if let Some(effective) = self.last_effective_reasoning_effort {
+                return format!("auto: {}", effective.short_label());
+            }
+            return "auto".to_string();
+        }
+        self.reasoning_effort.short_label().to_string()
    }

    pub fn compaction_config(&self) -> CompactionConfig {
@@ -34,6 +34,7 @@ use crate::tui::views::{ModalKind, ModalView, ViewAction, ViewEvent};
 /// Models the picker exposes by default. Kept short on purpose — power
 /// users can still type `/model <id>` for anything else.
 const PICKER_MODELS: &[(&str, &str)] = &[
+    ("auto", "select per turn"),
    ("deepseek-v4-pro", "flagship"),
    ("deepseek-v4-flash", "fast / cheap"),
 ];
@@ -41,6 +42,7 @@ const PICKER_MODELS: &[(&str, &str)] = &[
 /// Thinking-effort rows shown in the picker, in the order DeepSeek
 /// behaviorally distinguishes them.
 const PICKER_EFFORTS: &[ReasoningEffort] = &[
+    ReasoningEffort::Auto,
    ReasoningEffort::Off,
    ReasoningEffort::High,
    ReasoningEffort::Max,
@@ -68,7 +70,11 @@ pub struct ModelPickerView {
 impl ModelPickerView {
    #[must_use]
    pub fn new(app: &App) -> Self {
-        let initial_model = app.model.clone();
+        let initial_model = if app.auto_model {
+            "auto".to_string()
+        } else {
+            app.model.clone()
+        };
        let mut selected_model_idx = PICKER_MODELS
            .iter()
            .position(|(id, _)| *id == initial_model);
@@ -88,7 +94,7 @@ impl ModelPickerView {
        let selected_effort_idx = PICKER_EFFORTS
            .iter()
            .position(|e| *e == normalized)
-            .unwrap_or(1); // default to High if somehow unknown
+            .unwrap_or(2); // default to High if somehow unknown

        Self {
            initial_model,
@@ -116,6 +122,9 @@ impl ModelPickerView {
    }

    fn resolved_effort(&self) -> ReasoningEffort {
+        if self.resolved_model().trim().eq_ignore_ascii_case("auto") {
+            return ReasoningEffort::Auto;
+        }
        PICKER_EFFORTS[self.selected_effort_idx]
    }

@@ -318,6 +327,7 @@ impl ModalView for ModelPickerView {
            .map(|effort| {
                let label = effort.short_label().to_string();
                let hint = match effort {
+                    ReasoningEffort::Auto => "auto-select per turn".to_string(),
                    ReasoningEffort::Off => "thinking disabled".to_string(),
                    ReasoningEffort::High => "thinking enabled (default)".to_string(),
                    ReasoningEffort::Max => "thinking enabled, max effort".to_string(),
@@ -387,6 +397,37 @@ mod tests {
        assert_eq!(view.resolved_effort(), ReasoningEffort::Max);
    }

+    #[test]
+    fn picker_initial_selection_matches_auto_state() {
+        let mut app = create_test_app();
+        app.model = "auto".to_string();
+        app.auto_model = true;
+        app.reasoning_effort = ReasoningEffort::Auto;
+
+        let view = ModelPickerView::new(&app);
+
+        assert_eq!(view.resolved_model(), "auto");
+        assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
+    }
+
+    #[test]
+    fn picker_auto_model_forces_auto_effort_on_apply() {
+        let mut app = create_test_app();
+        app.model = "auto".to_string();
+        app.auto_model = true;
+        app.reasoning_effort = ReasoningEffort::Off;
+
+        let mut view = ModelPickerView::new(&app);
+        view.selected_model_idx = 0;
+        view.selected_effort_idx = PICKER_EFFORTS
+            .iter()
+            .position(|effort| *effort == ReasoningEffort::Max)
+            .expect("max effort row");
+
+        assert_eq!(view.resolved_model(), "auto");
+        assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
+    }
+
    #[test]
    fn picker_normalizes_low_medium_to_high() {
        let mut app = create_test_app();
@@ -399,6 +440,21 @@ mod tests {
        );
    }

+    #[test]
+    fn picker_exposes_auto_and_distinct_thinking_tiers() {
+        let model_labels: Vec<_> = PICKER_MODELS.iter().map(|(id, _)| *id).collect();
+        assert_eq!(
+            model_labels,
+            vec!["auto", "deepseek-v4-pro", "deepseek-v4-flash"]
+        );
+
+        let effort_labels: Vec<_> = PICKER_EFFORTS
+            .iter()
+            .map(|effort| effort.as_setting())
+            .collect();
+        assert_eq!(effort_labels, vec!["auto", "off", "high", "max"]);
+    }
+
    #[test]
    fn picker_preserves_unknown_model_via_custom_row() {
        let mut app = create_test_app();
@@ -429,7 +485,7 @@ mod tests {
    #[test]
    fn tab_switches_focus_and_arrow_now_moves_effort() {
        let mut app = create_test_app();
-        // Default is Max (index 2 = last); pin to Off so the Down arrow has
+        // Default is Max; pin to Off so the Down arrow has
        // somewhere to go.
        app.reasoning_effort = ReasoningEffort::Off;
        let mut view = ModelPickerView::new(&app);
@@ -490,11 +546,11 @@ mod tests {
    }

    #[test]
-    fn picker_only_exposes_off_high_max() {
+    fn picker_only_exposes_auto_off_high_max() {
        let labels: Vec<&str> = PICKER_EFFORTS
            .iter()
            .map(|effort| effort.short_label())
            .collect();
-        assert_eq!(labels, vec!["off", "high", "max"]);
+        assert_eq!(labels, vec!["auto", "off", "high", "max"]);
    }
 }
@@ -85,8 +85,8 @@ use crate::tui::user_input::UserInputView;

 use super::active_cell::ActiveCell;
 use super::app::{
-    App, AppAction, AppMode, OnboardingState, QueuedMessage, SidebarFocus, StatusToastLevel,
-    SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
+    App, AppAction, AppMode, OnboardingState, QueuedMessage, ReasoningEffort, SidebarFocus,
+    StatusToastLevel, SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
 };
 use super::approval::{
    ApprovalMode, ApprovalRequest, ApprovalView, ElevationRequest, ElevationView, ReviewDecision,
@@ -924,8 +924,13 @@ async fn run_event_loop(
                        }

                        // Update session cost
+                        let pricing_model = if app.auto_model {
+                            app.last_effective_model.as_deref().unwrap_or(&app.model)
+                        } else {
+                            &app.model
+                        };
                        let turn_cost =
-                            crate::pricing::calculate_turn_cost_from_usage(&app.model, &usage);
+                            crate::pricing::calculate_turn_cost_from_usage(pricing_model, &usage);
                        if let Some(cost) = turn_cost {
                            app.accrue_session_cost(cost);
                        }
@@ -1031,7 +1036,12 @@ async fn run_event_loop(
                    } => {
                        app.api_messages = messages;
                        app.system_prompt = system_prompt;
-                        app.model = model;
+                        if app.auto_model {
+                            app.last_effective_model = Some(model);
+                        } else {
+                            app.model = model;
+                            app.last_effective_model = None;
+                        }
                        app.update_model_compaction_budget();
                        app.workspace = workspace;
                        if (app.is_loading || app.is_compacting)
@@ -1317,7 +1327,8 @@ async fn run_event_loop(
        }

        if let Some(next) = queued_to_send {
-            if let Err(err) = dispatch_user_message(app, &engine_handle, next.clone()).await {
+            if let Err(err) = dispatch_user_message(app, config, &engine_handle, next.clone()).await
+            {
                app.queue_message(next);
                app.status_message = Some(format!(
                    "Dispatch failed ({err}); kept {} queued message(s)",
@@ -2422,7 +2433,7 @@ async fn run_event_loop(
                        app.close_slash_menu();
                    }
                    if let Some(input) = app.submit_input() {
-                        if handle_plan_choice(app, &engine_handle, &input).await? {
+                        if handle_plan_choice(app, config, &engine_handle, &input).await? {
                            continue;
                        }
                        // `# foo` quick-add (#492) — when memory is enabled,
@@ -2473,7 +2484,7 @@ async fn run_event_loop(
                                    })
                                    .await;
                            }
-                            submit_or_steer_message(app, &engine_handle, queued).await?;
+                            submit_or_steer_message(app, config, &engine_handle, queued).await?;
                        }
                    }
                }
@@ -3229,6 +3240,7 @@ fn queued_message_content_for_app(

 async fn dispatch_user_message(
    app: &mut App,
+    config: &Config,
    engine_handle: &EngineHandle,
    message: QueuedMessage,
 ) -> Result<()> {
@@ -3300,21 +3312,61 @@ async fn dispatch_user_message(
        persistence_actor::persist(PersistRequest::Checkpoint(session));
    }

-    // Resolve the effective model: when auto_model is active, use the
-    // heuristic to pick between Pro and Flash based on the user's input.
+    let auto_selection = if app.auto_model || app.reasoning_effort == ReasoningEffort::Auto {
+        Some(resolve_auto_model_selection(app, config, &message, &content).await)
+    } else {
+        None
+    };
+
    let effective_model = if app.auto_model {
-        commands::auto_model_heuristic(&message.display, &app.model)
+        auto_selection
+            .as_ref()
+            .map(|selection| selection.model.clone())
+            .unwrap_or_else(|| commands::auto_model_heuristic(&message.display, &app.model))
    } else {
        app.model.clone()
    };

+    let auto_controls_reasoning = app.auto_model || app.reasoning_effort == ReasoningEffort::Auto;
+    let effective_reasoning_effort = if auto_controls_reasoning {
+        let effort = auto_selection
+            .as_ref()
+            .and_then(|selection| selection.reasoning_effort)
+            .unwrap_or_else(|| {
+                normalize_auto_routed_effort(crate::auto_reasoning::select(false, &message.display))
+            });
+        app.last_effective_reasoning_effort = Some(effort);
+        Some(effort.as_setting().to_string())
+    } else {
+        app.last_effective_reasoning_effort = None;
+        app.reasoning_effort.api_value().map(str::to_string)
+    };
+
+    if let Some(selection) = auto_selection.as_ref() {
+        if app.auto_model {
+            app.last_effective_model = Some(effective_model.clone());
+            let mut status = format!(
+                "Auto model selected: {effective_model} via {}",
+                selection.source.label()
+            );
+            if let Some(effort) = app.last_effective_reasoning_effort {
+                status.push_str(&format!("; thinking auto: {}", effort.as_setting()));
+            }
+            app.status_message = Some(status);
+        }
+    } else {
+        app.last_effective_model = None;
+    }
+
    if let Err(err) = engine_handle
        .send(Op::SendMessage {
            content,
            mode: app.mode,
            model: effective_model,
            goal_objective: app.goal.goal_objective.clone(),
-            reasoning_effort: app.reasoning_effort.api_value().map(str::to_string),
+            reasoning_effort: effective_reasoning_effort,
+            reasoning_effort_auto: auto_controls_reasoning,
+            auto_model: app.auto_model,
            allow_shell: app.allow_shell,
            trust_mode: app.trust_mode,
            auto_approve: app.mode == AppMode::Yolo,
@@ -3329,6 +3381,95 @@ async fn dispatch_user_message(
    Ok(())
 }

+async fn resolve_auto_model_selection(
+    app: &App,
+    config: &Config,
+    message: &QueuedMessage,
+    latest_content: &str,
+) -> commands::AutoRouteSelection {
+    let latest_request = if latest_content.trim().is_empty() {
+        message.display.as_str()
+    } else {
+        latest_content
+    };
+    commands::resolve_auto_route_with_flash(
+        config,
+        latest_request,
+        &recent_auto_router_context(&app.api_messages),
+        if app.auto_model { "auto" } else { "fixed" },
+        app.reasoning_effort.as_setting(),
+    )
+    .await
+}
+
+fn normalize_auto_routed_effort(effort: ReasoningEffort) -> ReasoningEffort {
+    commands::normalize_auto_route_effort(effort)
+}
+
+fn recent_auto_router_context(messages: &[Message]) -> String {
+    let mut rows = Vec::new();
+    for message in messages.iter().rev().skip(1) {
+        if rows.len() >= 6 {
+            break;
+        }
+        let text = content_blocks_text(&message.content);
+        let text = text.trim();
+        if text.is_empty() {
+            continue;
+        }
+        rows.push(format!(
+            "{}: {}",
+            message.role,
+            truncate_for_auto_router(text, 900)
+        ));
+    }
+    rows.reverse();
+    if rows.is_empty() {
+        "No prior context.".to_string()
+    } else {
+        rows.join("\n")
+    }
+}
+
+fn content_blocks_text(blocks: &[ContentBlock]) -> String {
+    let mut out = String::new();
+    for block in blocks {
+        match block {
+            ContentBlock::Text { text, .. } => {
+                append_router_text(&mut out, text);
+            }
+            ContentBlock::Thinking { thinking } => {
+                append_router_text(&mut out, thinking);
+            }
+            ContentBlock::ToolUse { name, .. } => {
+                append_router_text(&mut out, &format!("[tool call: {name}]"));
+            }
+            ContentBlock::ToolResult { content, .. } => {
+                append_router_text(&mut out, &format!("[tool result] {content}"));
+            }
+            _ => {}
+        }
+    }
+    out
+}
+
+fn append_router_text(out: &mut String, text: &str) {
+    if !out.is_empty() {
+        out.push('\n');
+    }
+    out.push_str(text);
+}
+
+fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
+    let mut chars = text.chars();
+    let truncated: String = chars.by_ref().take(max_chars).collect();
+    if chars.next().is_some() {
+        format!("{truncated}...")
+    } else {
+        truncated
+    }
+}
+
 async fn apply_model_and_compaction_update(
    engine_handle: &EngineHandle,
    compaction: crate::compaction::CompactionConfig,
@@ -3423,11 +3564,15 @@ async fn apply_model_picker_choice(
    app: &mut App,
    engine_handle: &EngineHandle,
    model: String,
-    effort: crate::tui::app::ReasoningEffort,
+    mut effort: crate::tui::app::ReasoningEffort,
    previous_model: String,
    previous_effort: crate::tui::app::ReasoningEffort,
 ) {
-    let model_changed = model != previous_model;
+    let model_is_auto = model.trim().eq_ignore_ascii_case("auto");
+    if model_is_auto {
+        effort = ReasoningEffort::Auto;
+    }
+    let model_changed = model != previous_model || app.auto_model != model_is_auto;
    let effort_changed = effort != previous_effort;
    if !model_changed && !effort_changed {
        app.status_message = Some(format!(
@@ -3438,6 +3583,8 @@ async fn apply_model_picker_choice(
    }

    if model_changed {
+        app.auto_model = model_is_auto;
+        app.last_effective_model = None;
        app.model = model.clone();
        app.update_model_compaction_budget();
        app.session.last_prompt_tokens = None;
@@ -3448,6 +3595,7 @@ async fn apply_model_picker_choice(
    }
    if effort_changed {
        app.reasoning_effort = effort;
+        app.last_effective_reasoning_effort = None;
    }

    // Best-effort persist; surface a status warning if the settings file
@@ -3474,20 +3622,27 @@ async fn apply_model_picker_choice(
        apply_model_and_compaction_update(engine_handle, app.compaction_config()).await;
    }

+    let model_summary = if model_is_auto {
+        "auto (per-turn model)".to_string()
+    } else {
+        model.clone()
+    };
+    let previous_effort_summary = previous_effort.short_label();
+    let effort_summary = if effort == ReasoningEffort::Auto {
+        "auto (per-turn thinking)".to_string()
+    } else {
+        effort.short_label().to_string()
+    };
+
    let mut summary = match (model_changed, effort_changed) {
        (true, true) => format!(
-            "Model: {previous_model} → {model} · thinking: {} → {}",
-            previous_effort.short_label(),
-            effort.short_label()
-        ),
-        (true, false) => format!(
-            "Model: {previous_model} → {model} · thinking {}",
-            effort.short_label()
+            "Model: {previous_model} → {model_summary} · thinking: {previous_effort_summary} → {effort_summary}"
        ),
+        (true, false) => {
+            format!("Model: {previous_model} → {model_summary} · thinking {effort_summary}")
+        }
        (false, true) => format!(
-            "Thinking: {} → {} · model {model}",
-            previous_effort.short_label(),
-            effort.short_label()
+            "Thinking: {previous_effort_summary} → {effort_summary} · model {model_summary}"
        ),
        (false, false) => unreachable!(),
    };
@@ -3896,7 +4051,7 @@ async fn apply_command_result(
            }
            AppAction::SendMessage(content) => {
                let queued = build_queued_message(app, content);
-                submit_or_steer_message(app, engine_handle, queued).await?;
+                submit_or_steer_message(app, config, engine_handle, queued).await?;
            }
            AppAction::Rlm {
                prompt,
@@ -4417,11 +4572,14 @@ async fn queue_follow_up(app: &mut App, message: QueuedMessage) -> Result<()> {

 async fn submit_or_steer_message(
    app: &mut App,
+    config: &Config,
    engine_handle: &EngineHandle,
    message: QueuedMessage,
 ) -> Result<()> {
    match app.decide_submit_disposition() {
-        SubmitDisposition::Immediate => dispatch_user_message(app, engine_handle, message).await,
+        SubmitDisposition::Immediate => {
+            dispatch_user_message(app, config, engine_handle, message).await
+        }
        SubmitDisposition::Queue => {
            let count = app.queued_message_count().saturating_add(1);
            app.queue_message(message);
@@ -4523,6 +4681,7 @@ fn parse_plan_choice(input: &str) -> Option<PlanChoice> {

 async fn apply_plan_choice(
    app: &mut App,
+    config: &Config,
    engine_handle: &EngineHandle,
    choice: PlanChoice,
 ) -> Result<()> {
@@ -4539,7 +4698,7 @@ async fn apply_plan_choice(
                app.status_message =
                    Some("Queued accepted plan execution (agent mode).".to_string());
            } else {
-                dispatch_user_message(app, engine_handle, followup).await?;
+                dispatch_user_message(app, config, engine_handle, followup).await?;
            }
        }
        PlanChoice::AcceptYolo => {
@@ -4554,7 +4713,7 @@ async fn apply_plan_choice(
                app.status_message =
                    Some("Queued accepted plan execution (YOLO mode).".to_string());
            } else {
-                dispatch_user_message(app, engine_handle, followup).await?;
+                dispatch_user_message(app, config, engine_handle, followup).await?;
            }
        }
        PlanChoice::RevisePlan => {
@@ -4576,6 +4735,7 @@ async fn apply_plan_choice(

 async fn handle_plan_choice(
    app: &mut App,
+    config: &Config,
    engine_handle: &EngineHandle,
    input: &str,
 ) -> Result<bool> {
@@ -4590,7 +4750,7 @@ async fn handle_plan_choice(
        return Ok(false);
    };

-    apply_plan_choice(app, engine_handle, choice).await?;
+    apply_plan_choice(app, config, engine_handle, choice).await?;
    Ok(true)
 }

@@ -4714,7 +4874,8 @@ fn render(f: &mut Frame, app: &mut App) {
            .and_then(|value| value.to_str())
            .filter(|value| !value.is_empty())
            .unwrap_or("workspace");
-        let effort_label = app.reasoning_effort.short_label();
+        let model_label = app.model_display_label();
+        let effort_label = app.reasoning_effort_display_label();
        let provider_label = match app.api_provider {
            crate::config::ApiProvider::Deepseek => None,
            crate::config::ApiProvider::DeepseekCN => None,
@@ -4726,7 +4887,7 @@ fn render(f: &mut Frame, app: &mut App) {
        };
        let header_data = HeaderData::new(
            app.mode,
-            &app.model,
+            &model_label,
            workspace_name,
            app.is_loading,
            app.ui_theme.header_bg,
@@ -4737,7 +4898,7 @@ fn render(f: &mut Frame, app: &mut App) {
            app.session.session_cost,
            sanitized_prompt_tokens,
        )
-        .with_reasoning_effort(Some(effort_label))
+        .with_reasoning_effort(Some(&effort_label))
        .with_provider(provider_label);
        let header_widget = HeaderWidget::new(header_data);
        let buf = f.buffer_mut();
@@ -5016,7 +5177,8 @@ async fn handle_view_events(
                if app.plan_prompt_pending {
                    app.plan_prompt_pending = false;
                    if let Some(choice) = plan_choice_from_option(option)
-                        && let Err(err) = apply_plan_choice(app, engine_handle, choice).await
+                        && let Err(err) =
+                            apply_plan_choice(app, config, engine_handle, choice).await
                    {
                        app.status_message = Some(format!("Failed to apply plan selection: {err}"));
                    }
@@ -6496,7 +6658,7 @@ fn estimated_context_tokens(app: &App) -> Option<i64> {
 }

 fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
-    let max = context_window_for_model(&app.model)?;
+    let max = context_window_for_model(app.effective_model_for_budget())?;
    let max_i64 = i64::from(max);
    let reported = app
        .session
@@ -699,10 +699,12 @@ async fn model_change_update_syncs_engine_model_before_compaction() {
 async fn dispatch_user_message_failed_send_clears_loading_state() {
    let mut app = create_test_app();
    let engine = mock_engine_handle();
+    let config = Config::default();
    drop(engine.rx_op);

    let result = dispatch_user_message(
        &mut app,
+        &config,
        &engine.handle,
        QueuedMessage::new("hello".to_string(), None),
    )
@@ -1469,8 +1471,9 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
    app.offline_mode = true;

    let engine = crate::core::engine::mock_engine_handle();
+    let config = Config::default();

-    let handled = handle_plan_choice(&mut app, &engine.handle, "yolo")
+    let handled = handle_plan_choice(&mut app, &config, &engine.handle, "yolo")
        .await
        .expect("plan choice");

@@ -1479,7 +1482,7 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
    assert_eq!(app.mode, AppMode::Plan);

    let queued = build_queued_message(&mut app, "yolo".to_string());
-    submit_or_steer_message(&mut app, &engine.handle, queued)
+    submit_or_steer_message(&mut app, &config, &engine.handle, queued)
        .await
        .expect("submit normal message");

@@ -1504,8 +1507,9 @@ async fn numeric_plan_choice_still_queues_follow_up_when_busy() {
    app.is_loading = true;

    let engine = crate::core::engine::mock_engine_handle();
+    let config = Config::default();

-    let handled = handle_plan_choice(&mut app, &engine.handle, "2")
+    let handled = handle_plan_choice(&mut app, &config, &engine.handle, "2")
        .await
        .expect("plan choice");

@@ -275,7 +275,7 @@ impl FooterProps {
        // to cross the 60s threshold inside `footer_worked_chip`.
        let worked = footer_worked_chip(app.cumulative_turn_duration);
        Self {
-            model: app.model.clone(),
+            model: app.model_display_label(),
            mode_label,
            mode_color,
            text_dim_color: app.ui_theme.text_dim,