fix: restore auto model routing (#772)
Keep auto as a local routing mode, resolve concrete model/thinking before API requests, and wire auto routing through CLI, TUI, runtime threads, and subagents.
This commit is contained in:
@@ -1,14 +1,19 @@
|
||||
//! Config commands: config, settings, mode switches, trust, logout
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
use super::CommandResult;
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::config::{COMMON_DEEPSEEK_MODELS, clear_api_key, normalize_model_name};
|
||||
use crate::config_ui::{ConfigUiMode, parse_mode};
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::localization::resolve_locale;
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt};
|
||||
use crate::settings::Settings;
|
||||
use crate::tui::app::{App, AppAction, AppMode, OnboardingState, SidebarFocus};
|
||||
use crate::tui::app::{App, AppAction, AppMode, OnboardingState, ReasoningEffort, SidebarFocus};
|
||||
use crate::tui::approval::ApprovalMode;
|
||||
use anyhow::Result;
|
||||
|
||||
/// Open the interactive config editor.
|
||||
///
|
||||
@@ -91,7 +96,13 @@ fn show_single_setting(app: &App, key: &str) -> CommandResult {
|
||||
let value = match key.as_str() {
|
||||
"model" => {
|
||||
if app.auto_model {
|
||||
Some("auto (auto-select by request complexity)".to_string())
|
||||
let mut label = "auto (auto-select model per turn)".to_string();
|
||||
if let Some(effective) = app.last_effective_model.as_deref()
|
||||
&& effective != "auto"
|
||||
{
|
||||
label.push_str(&format!("; last: {effective}"));
|
||||
}
|
||||
Some(label)
|
||||
} else {
|
||||
Some(app.model.clone())
|
||||
}
|
||||
@@ -247,16 +258,20 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
|
||||
if value.trim().eq_ignore_ascii_case("auto") {
|
||||
app.auto_model = true;
|
||||
app.model = "auto".to_string();
|
||||
app.last_effective_model = None;
|
||||
app.reasoning_effort = ReasoningEffort::Auto;
|
||||
app.last_effective_reasoning_effort = None;
|
||||
app.update_model_compaction_budget();
|
||||
app.session.last_prompt_tokens = None;
|
||||
app.session.last_completion_tokens = None;
|
||||
return CommandResult::with_message_and_action(
|
||||
"model = auto (auto-select by request complexity)".to_string(),
|
||||
"model = auto (auto-select model and thinking per turn)".to_string(),
|
||||
AppAction::UpdateCompaction(app.compaction_config()),
|
||||
);
|
||||
}
|
||||
// Clear auto mode when a specific model is set
|
||||
app.auto_model = false;
|
||||
app.last_effective_model = None;
|
||||
let Some(model) = normalize_model_name(value) else {
|
||||
return CommandResult::error(format!(
|
||||
"Invalid model '{value}'. Expected a DeepSeek model ID. Common models: {}",
|
||||
@@ -385,7 +400,13 @@ pub fn set_config_value(app: &mut App, key: &str, value: &str, persist: bool) ->
|
||||
}
|
||||
"default_model" => {
|
||||
if let Some(ref model) = settings.default_model {
|
||||
app.auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
app.model.clone_from(model);
|
||||
app.last_effective_model = None;
|
||||
if app.auto_model {
|
||||
app.reasoning_effort = ReasoningEffort::Auto;
|
||||
app.last_effective_reasoning_effort = None;
|
||||
}
|
||||
app.update_model_compaction_budget();
|
||||
app.session.last_prompt_tokens = None;
|
||||
app.session.last_completion_tokens = None;
|
||||
@@ -604,14 +625,6 @@ fn expand_tilde(raw: &str) -> String {
|
||||
/// Default → Flash (cost savings).
|
||||
pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
|
||||
let len = input.chars().count();
|
||||
// Short messages → Flash
|
||||
if len < 100 {
|
||||
return "deepseek-v4-flash".to_string();
|
||||
}
|
||||
// Long complex requests → Pro
|
||||
if len > 500 {
|
||||
return "deepseek-v4-pro".to_string();
|
||||
}
|
||||
let lower = input.to_lowercase();
|
||||
let complex_keywords = [
|
||||
"refactor",
|
||||
@@ -630,10 +643,249 @@ pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
|
||||
if complex_keywords.iter().any(|kw| lower.contains(kw)) {
|
||||
return "deepseek-v4-pro".to_string();
|
||||
}
|
||||
// Short messages → Flash
|
||||
if len < 100 {
|
||||
return "deepseek-v4-flash".to_string();
|
||||
}
|
||||
// Long complex requests → Pro
|
||||
if len > 500 {
|
||||
return "deepseek-v4-pro".to_string();
|
||||
}
|
||||
// Default to Flash for cost savings
|
||||
"deepseek-v4-flash".to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct AutoRouteRecommendation {
|
||||
pub model: String,
|
||||
pub reasoning_effort: Option<ReasoningEffort>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AutoRouteSource {
|
||||
FlashRouter,
|
||||
Heuristic,
|
||||
}
|
||||
|
||||
impl AutoRouteSource {
|
||||
#[must_use]
|
||||
pub fn label(self) -> &'static str {
|
||||
match self {
|
||||
AutoRouteSource::FlashRouter => "flash-router",
|
||||
AutoRouteSource::Heuristic => "heuristic",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct AutoRouteSelection {
|
||||
pub model: String,
|
||||
pub reasoning_effort: Option<ReasoningEffort>,
|
||||
pub source: AutoRouteSource,
|
||||
}
|
||||
|
||||
pub const AUTO_MODEL_ROUTER_SYSTEM_PROMPT: &str = "\
|
||||
You are the DeepSeek TUI auto-routing classifier. Return only compact JSON: \
|
||||
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
|
||||
Use deepseek-v4-flash for trivial, conversational, status, or single-step work. \
|
||||
Use deepseek-v4-pro for coding, debugging, release work, multi-step tasks, high-risk decisions, \
|
||||
tool-heavy work, ambiguous requests, or anything that benefits from deeper reasoning. \
|
||||
Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
|
||||
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
|
||||
|
||||
/// Parse the Flash router's JSON-only response.
|
||||
///
|
||||
/// The runtime treats classifier output as untrusted: only known V4 model IDs
|
||||
/// and supported reasoning tiers are accepted. Anything else falls back to the
|
||||
/// deterministic heuristic.
|
||||
pub fn parse_auto_route_recommendation(raw: &str) -> Option<AutoRouteRecommendation> {
|
||||
let json = extract_first_json_object(raw)?;
|
||||
let value: serde_json::Value = serde_json::from_str(json).ok()?;
|
||||
let model = value.get("model").and_then(serde_json::Value::as_str)?;
|
||||
let model = normalize_auto_route_model(model)?;
|
||||
let reasoning_effort = value
|
||||
.get("thinking")
|
||||
.or_else(|| value.get("reasoning_effort"))
|
||||
.or_else(|| value.get("effort"))
|
||||
.and_then(serde_json::Value::as_str)
|
||||
.and_then(parse_auto_route_reasoning_effort);
|
||||
|
||||
Some(AutoRouteRecommendation {
|
||||
model: model.to_string(),
|
||||
reasoning_effort,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_first_json_object(raw: &str) -> Option<&str> {
|
||||
let start = raw.find('{')?;
|
||||
let end = raw.rfind('}')?;
|
||||
(end >= start).then_some(&raw[start..=end])
|
||||
}
|
||||
|
||||
fn normalize_auto_route_model(model: &str) -> Option<&'static str> {
|
||||
match model.trim().to_ascii_lowercase().as_str() {
|
||||
"deepseek-v4-pro" | "v4-pro" | "pro" => Some("deepseek-v4-pro"),
|
||||
"deepseek-v4-flash" | "v4-flash" | "flash" => Some("deepseek-v4-flash"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_auto_route_reasoning_effort(effort: &str) -> Option<ReasoningEffort> {
|
||||
match effort.trim().to_ascii_lowercase().as_str() {
|
||||
"off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off),
|
||||
"low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High),
|
||||
"high" => Some(ReasoningEffort::High),
|
||||
"max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn normalize_auto_route_effort(effort: ReasoningEffort) -> ReasoningEffort {
|
||||
match effort {
|
||||
ReasoningEffort::Low | ReasoningEffort::Medium => ReasoningEffort::High,
|
||||
other => other,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn resolve_auto_route_with_flash(
|
||||
config: &crate::config::Config,
|
||||
latest_request: &str,
|
||||
recent_context: &str,
|
||||
selected_model_mode: &str,
|
||||
selected_thinking_mode: &str,
|
||||
) -> AutoRouteSelection {
|
||||
match auto_route_flash_recommendation(
|
||||
config,
|
||||
latest_request,
|
||||
recent_context,
|
||||
selected_model_mode,
|
||||
selected_thinking_mode,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(Some(recommendation)) => AutoRouteSelection {
|
||||
model: recommendation.model,
|
||||
reasoning_effort: recommendation.reasoning_effort,
|
||||
source: AutoRouteSource::FlashRouter,
|
||||
},
|
||||
Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
|
||||
}
|
||||
}
|
||||
|
||||
fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
|
||||
AutoRouteSelection {
|
||||
model: auto_model_heuristic(latest_request, selected_model_mode),
|
||||
reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
|
||||
false,
|
||||
latest_request,
|
||||
))),
|
||||
source: AutoRouteSource::Heuristic,
|
||||
}
|
||||
}
|
||||
|
||||
async fn auto_route_flash_recommendation(
|
||||
config: &crate::config::Config,
|
||||
latest_request: &str,
|
||||
recent_context: &str,
|
||||
selected_model_mode: &str,
|
||||
selected_thinking_mode: &str,
|
||||
) -> Result<Option<AutoRouteRecommendation>> {
|
||||
if cfg!(test) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let client = DeepSeekClient::new(config)?;
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: auto_route_prompt(
|
||||
latest_request,
|
||||
recent_context,
|
||||
selected_model_mode,
|
||||
selected_thinking_mode,
|
||||
),
|
||||
cache_control: None,
|
||||
}],
|
||||
}],
|
||||
max_tokens: 96,
|
||||
system: Some(SystemPrompt::Text(
|
||||
AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
|
||||
)),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: Some("off".to_string()),
|
||||
stream: Some(false),
|
||||
temperature: Some(0.0),
|
||||
top_p: None,
|
||||
};
|
||||
|
||||
let response =
|
||||
tokio::time::timeout(Duration::from_secs(4), client.create_message(request)).await??;
|
||||
Ok(parse_auto_route_recommendation(&message_response_text(
|
||||
&response,
|
||||
)))
|
||||
}
|
||||
|
||||
fn auto_route_prompt(
|
||||
latest_request: &str,
|
||||
recent_context: &str,
|
||||
selected_model_mode: &str,
|
||||
selected_thinking_mode: &str,
|
||||
) -> String {
|
||||
format!(
|
||||
"Session mode: agent\nSelected model mode: {}\nSelected thinking mode: {}\n\nRecent context:\n{}\n\nLatest user request:\n{}\n\nReturn JSON only.",
|
||||
selected_model_mode,
|
||||
selected_thinking_mode,
|
||||
if recent_context.trim().is_empty() {
|
||||
"No prior context."
|
||||
} else {
|
||||
recent_context
|
||||
},
|
||||
truncate_for_auto_router(latest_request, 4_000)
|
||||
)
|
||||
}
|
||||
|
||||
fn message_response_text(response: &MessageResponse) -> String {
|
||||
let mut out = String::new();
|
||||
for block in &response.content {
|
||||
match block {
|
||||
ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
|
||||
append_router_text(&mut out, text);
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
append_router_text(&mut out, thinking);
|
||||
}
|
||||
ContentBlock::ToolUse { name, .. } => {
|
||||
append_router_text(&mut out, &format!("[tool call: {name}]"));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn append_router_text(out: &mut String, text: &str) {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
|
||||
fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
|
||||
let mut chars = text.chars();
|
||||
let truncated: String = chars.by_ref().take(max_chars).collect();
|
||||
if chars.next().is_some() {
|
||||
format!("{truncated}...")
|
||||
} else {
|
||||
truncated
|
||||
}
|
||||
}
|
||||
|
||||
/// Toggle LSP diagnostics on/off or show status.
|
||||
///
|
||||
/// - `/lsp on` — enable inline LSP diagnostics
|
||||
@@ -864,6 +1116,21 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_model_auto_enables_auto_thinking() {
|
||||
let mut app = create_test_app();
|
||||
app.reasoning_effort = ReasoningEffort::Off;
|
||||
|
||||
let result = set_config(&mut app, Some("model auto"));
|
||||
|
||||
assert!(result.message.is_some());
|
||||
assert!(app.auto_model);
|
||||
assert_eq!(app.model, "auto");
|
||||
assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
|
||||
assert!(app.last_effective_model.is_none());
|
||||
assert!(app.last_effective_reasoning_effort.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_model_accepts_future_deepseek_model_id() {
|
||||
let mut app = create_test_app();
|
||||
@@ -883,6 +1150,45 @@ mod tests {
|
||||
assert_eq!(app.model, "deepseek-v4-flash");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_route_recommendation_parses_strict_json() {
|
||||
let rec =
|
||||
parse_auto_route_recommendation(r#"{"model":"deepseek-v4-pro","thinking":"max"}"#)
|
||||
.expect("valid router response should parse");
|
||||
|
||||
assert_eq!(rec.model, "deepseek-v4-pro");
|
||||
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_route_recommendation_accepts_wrapped_json_aliases() {
|
||||
let rec =
|
||||
parse_auto_route_recommendation(r#"route: {"model":"flash","reasoning_effort":"off"}"#)
|
||||
.expect("wrapped router response should parse");
|
||||
|
||||
assert_eq!(rec.model, "deepseek-v4-flash");
|
||||
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Off));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_route_recommendation_normalizes_legacy_low_medium_to_high() {
|
||||
let rec = parse_auto_route_recommendation(
|
||||
r#"{"model":"deepseek-v4-pro","reasoning_effort":"medium"}"#,
|
||||
)
|
||||
.expect("medium should parse for back-compat");
|
||||
|
||||
assert_eq!(rec.model, "deepseek-v4-pro");
|
||||
assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::High));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_route_recommendation_rejects_unknown_model() {
|
||||
assert!(
|
||||
parse_auto_route_recommendation(r#"{"model":"some-other-model","thinking":"max"}"#,)
|
||||
.is_none()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_default_mode_normal_save_reports_normalized_value() {
|
||||
let _lock = lock_test_env();
|
||||
|
||||
@@ -4,7 +4,7 @@ use std::fmt::Write;
|
||||
|
||||
use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name};
|
||||
use crate::localization::{MessageId, tr};
|
||||
use crate::tui::app::{App, AppAction, AppMode};
|
||||
use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort};
|
||||
use crate::tui::views::{HelpView, ModalKind, SubAgentsView};
|
||||
|
||||
use super::CommandResult;
|
||||
@@ -91,14 +91,33 @@ pub fn exit() -> CommandResult {
|
||||
/// way to flip both knobs without memorising the docs.
|
||||
pub fn model(app: &mut App, model_name: Option<&str>) -> CommandResult {
|
||||
if let Some(name) = model_name {
|
||||
if name.trim().eq_ignore_ascii_case("auto") {
|
||||
let old_model = app.model_display_label();
|
||||
app.auto_model = true;
|
||||
app.model = "auto".to_string();
|
||||
app.last_effective_model = None;
|
||||
app.reasoning_effort = ReasoningEffort::Auto;
|
||||
app.last_effective_reasoning_effort = None;
|
||||
app.update_model_compaction_budget();
|
||||
app.session.last_prompt_tokens = None;
|
||||
app.session.last_completion_tokens = None;
|
||||
return CommandResult::with_message_and_action(
|
||||
tr(app.ui_locale, MessageId::ModelChanged)
|
||||
.replace("{old}", &old_model)
|
||||
.replace("{new}", "auto"),
|
||||
AppAction::UpdateCompaction(app.compaction_config()),
|
||||
);
|
||||
}
|
||||
let Some(model_id) = normalize_model_name(name) else {
|
||||
return CommandResult::error(format!(
|
||||
"Invalid model '{name}'. Expected a DeepSeek model ID. Common models: {}",
|
||||
"Invalid model '{name}'. Expected auto or a DeepSeek model ID. Common models: {}",
|
||||
COMMON_DEEPSEEK_MODELS.join(", ")
|
||||
));
|
||||
};
|
||||
let old_model = app.model.clone();
|
||||
let old_model = app.model_display_label();
|
||||
app.auto_model = false;
|
||||
app.model = model_id.clone();
|
||||
app.last_effective_model = None;
|
||||
app.update_model_compaction_budget();
|
||||
app.session.last_prompt_tokens = None;
|
||||
app.session.last_completion_tokens = None;
|
||||
@@ -427,6 +446,21 @@ mod tests {
|
||||
assert_eq!(app.session.last_completion_tokens, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_auto_enables_auto_thinking() {
|
||||
let mut app = create_test_app();
|
||||
app.reasoning_effort = ReasoningEffort::Off;
|
||||
|
||||
let result = model(&mut app, Some("auto"));
|
||||
|
||||
assert!(result.message.is_some());
|
||||
assert!(app.auto_model);
|
||||
assert_eq!(app.model, "auto");
|
||||
assert_eq!(app.reasoning_effort, ReasoningEffort::Auto);
|
||||
assert!(app.last_effective_model.is_none());
|
||||
assert!(app.last_effective_reasoning_effort.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_model_change_accepts_future_deepseek_model() {
|
||||
let mut app = create_test_app();
|
||||
|
||||
@@ -614,6 +614,11 @@ pub fn auto_model_heuristic(input: &str, current_model: &str) -> String {
|
||||
config::auto_model_heuristic(input, current_model)
|
||||
}
|
||||
|
||||
pub use config::{
|
||||
AutoRouteRecommendation, AutoRouteSelection, normalize_auto_route_effort,
|
||||
parse_auto_route_recommendation, resolve_auto_route_with_flash,
|
||||
};
|
||||
|
||||
/// Execute a Recursive Language Model (RLM) turn — Algorithm 1 from
|
||||
/// Zhang et al. (arXiv:2512.24601).
|
||||
///
|
||||
|
||||
@@ -977,10 +977,11 @@ impl Config {
|
||||
}
|
||||
}
|
||||
if let Some(model) = self.default_text_model.as_deref()
|
||||
&& !model.trim().eq_ignore_ascii_case("auto")
|
||||
&& normalize_model_name(model).is_none()
|
||||
{
|
||||
anyhow::bail!(
|
||||
"Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
|
||||
"Invalid default_text_model '{model}': expected auto or a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)."
|
||||
);
|
||||
}
|
||||
if let Some(policy) = self.approval_policy.as_deref() {
|
||||
@@ -1095,6 +1096,11 @@ impl Config {
|
||||
{
|
||||
return normalized;
|
||||
}
|
||||
if let Some(model) = self.default_text_model.as_deref()
|
||||
&& model.trim().eq_ignore_ascii_case("auto")
|
||||
{
|
||||
return "auto".to_string();
|
||||
}
|
||||
if let Some(model) = self.default_text_model.as_deref()
|
||||
&& let Some(normalized) = normalize_model_name(model)
|
||||
{
|
||||
@@ -3420,6 +3426,17 @@ api_key = "old-openrouter-key"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validate_accepts_auto_default_text_model() -> Result<()> {
|
||||
let config = Config {
|
||||
default_text_model: Some("auto".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
config.validate()?;
|
||||
assert_eq!(config.default_model(), "auto");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deepseek_model_env_overrides_default_text_model() -> Result<()> {
|
||||
let _lock = lock_test_env();
|
||||
|
||||
@@ -539,7 +539,9 @@ pub fn open_browser(url: &str) -> Result<()> {
|
||||
}
|
||||
|
||||
fn validate_document(doc: &ConfigUiDocument) -> Result<()> {
|
||||
if normalize_model_name(&doc.runtime.model).is_none() {
|
||||
if !doc.runtime.model.trim().eq_ignore_ascii_case("auto")
|
||||
&& normalize_model_name(&doc.runtime.model).is_none()
|
||||
{
|
||||
bail!("invalid model '{}'", doc.runtime.model);
|
||||
}
|
||||
if doc.config.mcp_config_path.trim().is_empty() {
|
||||
@@ -557,6 +559,7 @@ fn reload_runtime_config(app: &mut App, config: &mut Config) -> Result<()> {
|
||||
.reasoning_effort()
|
||||
.unwrap_or_else(|| app.reasoning_effort.as_setting()),
|
||||
);
|
||||
app.last_effective_reasoning_effort = None;
|
||||
app.update_model_compaction_budget();
|
||||
app.mcp_config_path = reloaded.mcp_config_path();
|
||||
app.skills_dir = reloaded.skills_dir();
|
||||
@@ -584,6 +587,7 @@ fn apply_reasoning_effort(
|
||||
) -> Result<()> {
|
||||
let effort: ReasoningEffort = value.into();
|
||||
app.reasoning_effort = effort;
|
||||
app.last_effective_reasoning_effort = None;
|
||||
app.update_model_compaction_budget();
|
||||
if persist {
|
||||
commands::persist_root_string_key("reasoning_effort", effort.as_setting())?;
|
||||
@@ -844,7 +848,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_document_reflects_app_state() {
|
||||
let app = app();
|
||||
let mut app = app();
|
||||
app.auto_model = false;
|
||||
app.model = "deepseek-v4-pro".to_string();
|
||||
app.reasoning_effort = ReasoningEffort::Max;
|
||||
let config = Config::default();
|
||||
let doc = build_document(&app, &config).expect("document");
|
||||
assert_eq!(doc.runtime.model, app.model);
|
||||
|
||||
@@ -48,6 +48,7 @@ use crate::tools::spec::RuntimeToolServices;
|
||||
use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult};
|
||||
use crate::tools::subagent::{
|
||||
Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager,
|
||||
resolve_subagent_assignment_route,
|
||||
};
|
||||
use crate::tools::todo::{SharedTodoList, new_shared_todo_list};
|
||||
use crate::tools::user_input::{UserInputRequest, UserInputResponse};
|
||||
@@ -515,6 +516,8 @@ impl Engine {
|
||||
model,
|
||||
goal_objective,
|
||||
reasoning_effort,
|
||||
reasoning_effort_auto,
|
||||
auto_model,
|
||||
allow_shell,
|
||||
trust_mode,
|
||||
auto_approve,
|
||||
@@ -525,6 +528,8 @@ impl Engine {
|
||||
model,
|
||||
goal_objective,
|
||||
reasoning_effort,
|
||||
reasoning_effort_auto,
|
||||
auto_model,
|
||||
allow_shell,
|
||||
trust_mode,
|
||||
auto_approve,
|
||||
@@ -564,7 +569,7 @@ impl Engine {
|
||||
continue;
|
||||
};
|
||||
|
||||
let runtime = SubAgentRuntime::new(
|
||||
let mut runtime = SubAgentRuntime::new(
|
||||
client,
|
||||
self.session.model.clone(),
|
||||
// Sub-agents don't inherit YOLO mode - use Agent mode defaults
|
||||
@@ -574,8 +579,17 @@ impl Engine {
|
||||
Arc::clone(&self.subagent_manager),
|
||||
)
|
||||
.with_role_models(self.config.subagent_model_overrides.clone())
|
||||
.with_auto_model(self.session.auto_model)
|
||||
.with_reasoning_effort(
|
||||
self.session.reasoning_effort.clone(),
|
||||
self.session.reasoning_effort_auto,
|
||||
)
|
||||
.with_max_spawn_depth(self.config.max_spawn_depth)
|
||||
.background_runtime();
|
||||
let route = resolve_subagent_assignment_route(&runtime, None, &prompt).await;
|
||||
runtime.model = route.model;
|
||||
runtime.reasoning_effort = route.reasoning_effort;
|
||||
runtime.reasoning_effort_auto = false;
|
||||
|
||||
let result = {
|
||||
let mut manager = self.subagent_manager.write().await;
|
||||
@@ -623,6 +637,7 @@ impl Engine {
|
||||
.await;
|
||||
}
|
||||
Op::SetModel { model } => {
|
||||
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
self.session.model = model;
|
||||
self.config.model.clone_from(&self.session.model);
|
||||
let _ = self
|
||||
@@ -654,6 +669,7 @@ impl Engine {
|
||||
self.session.compaction_summary_prompt =
|
||||
extract_compaction_summary_prompt(system_prompt.clone());
|
||||
self.session.system_prompt = system_prompt;
|
||||
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
self.session.model = model;
|
||||
self.session.workspace = workspace.clone();
|
||||
self.config.model.clone_from(&self.session.model);
|
||||
@@ -709,6 +725,8 @@ impl Engine {
|
||||
self.session.model.clone(),
|
||||
self.config.goal_objective.clone(),
|
||||
self.session.reasoning_effort.clone(),
|
||||
self.session.reasoning_effort_auto,
|
||||
self.session.auto_model,
|
||||
self.session.allow_shell,
|
||||
self.session.trust_mode,
|
||||
self.session.auto_approve,
|
||||
@@ -758,6 +776,8 @@ impl Engine {
|
||||
model: String,
|
||||
goal_objective: Option<String>,
|
||||
reasoning_effort: Option<String>,
|
||||
reasoning_effort_auto: bool,
|
||||
auto_model: bool,
|
||||
allow_shell: bool,
|
||||
trust_mode: bool,
|
||||
auto_approve: bool,
|
||||
@@ -838,6 +858,8 @@ impl Engine {
|
||||
self.config.model.clone_from(&self.session.model);
|
||||
self.config.goal_objective = goal_objective;
|
||||
self.session.reasoning_effort = reasoning_effort;
|
||||
self.session.reasoning_effort_auto = reasoning_effort_auto;
|
||||
self.session.auto_model = auto_model;
|
||||
self.session.allow_shell = allow_shell;
|
||||
self.config.allow_shell = allow_shell;
|
||||
self.session.trust_mode = trust_mode;
|
||||
@@ -900,6 +922,11 @@ impl Engine {
|
||||
Arc::clone(&self.subagent_manager),
|
||||
)
|
||||
.with_role_models(self.config.subagent_model_overrides.clone())
|
||||
.with_auto_model(self.session.auto_model)
|
||||
.with_reasoning_effort(
|
||||
self.session.reasoning_effort.clone(),
|
||||
self.session.reasoning_effort_auto,
|
||||
)
|
||||
.with_max_spawn_depth(self.config.max_spawn_depth);
|
||||
if let Some((mailbox, cancel_token)) = mailbox_for_runtime.as_ref() {
|
||||
rt = rt
|
||||
|
||||
@@ -20,6 +20,11 @@ pub enum Op {
|
||||
/// Reasoning-effort tier: `"off" | "low" | "medium" | "high" | "max"`.
|
||||
/// `None` lets the provider apply its default.
|
||||
reasoning_effort: Option<String>,
|
||||
/// True when the user selected auto thinking, even though the UI sends
|
||||
/// a concrete per-turn value to the model API.
|
||||
reasoning_effort_auto: bool,
|
||||
/// True when the user selected auto model routing.
|
||||
auto_model: bool,
|
||||
allow_shell: bool,
|
||||
trust_mode: bool,
|
||||
auto_approve: bool,
|
||||
@@ -89,27 +94,3 @@ pub enum Op {
|
||||
/// Shutdown the engine
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
impl Op {
|
||||
/// Create a send message operation
|
||||
pub fn send(
|
||||
content: impl Into<String>,
|
||||
mode: AppMode,
|
||||
model: impl Into<String>,
|
||||
reasoning_effort: Option<String>,
|
||||
allow_shell: bool,
|
||||
trust_mode: bool,
|
||||
auto_approve: bool,
|
||||
) -> Self {
|
||||
Op::SendMessage {
|
||||
content: content.into(),
|
||||
mode,
|
||||
model: model.into(),
|
||||
goal_objective: None,
|
||||
reasoning_effort,
|
||||
allow_shell,
|
||||
trust_mode,
|
||||
auto_approve,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,6 +19,11 @@ pub struct Session {
|
||||
/// `"off" | "low" | "medium" | "high" | "max"`. `None` lets the provider
|
||||
/// apply its own defaults.
|
||||
pub reasoning_effort: Option<String>,
|
||||
/// Whether the user selected automatic reasoning effort.
|
||||
pub reasoning_effort_auto: bool,
|
||||
|
||||
/// Whether the user selected automatic model routing.
|
||||
pub auto_model: bool,
|
||||
|
||||
/// Workspace directory
|
||||
pub workspace: PathBuf,
|
||||
@@ -118,6 +123,8 @@ impl Session {
|
||||
Self {
|
||||
model,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort_auto: false,
|
||||
auto_model: false,
|
||||
workspace,
|
||||
system_prompt: None,
|
||||
compaction_summary_prompt: None,
|
||||
|
||||
+63
-15
@@ -2504,6 +2504,11 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> {
|
||||
.model
|
||||
.or_else(|| config.default_text_model.clone())
|
||||
.unwrap_or_else(|| config.default_model());
|
||||
let route = resolve_cli_auto_route(config, &model, &diff).await;
|
||||
let model = route.model;
|
||||
let reasoning_effort = route
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string());
|
||||
|
||||
let system = SystemPrompt::Text(
|
||||
"You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \
|
||||
@@ -2529,7 +2534,7 @@ Provide findings ordered by severity with file references, then open questions,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort,
|
||||
stream: Some(false),
|
||||
temperature: Some(0.2),
|
||||
top_p: Some(0.9),
|
||||
@@ -3620,14 +3625,42 @@ async fn run_interactive(
|
||||
.await
|
||||
}
|
||||
|
||||
struct CliAutoRoute {
|
||||
model: String,
|
||||
reasoning_effort: Option<crate::tui::app::ReasoningEffort>,
|
||||
auto_model: bool,
|
||||
}
|
||||
|
||||
async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> CliAutoRoute {
|
||||
if model.trim().eq_ignore_ascii_case("auto") {
|
||||
let selection =
|
||||
commands::resolve_auto_route_with_flash(config, prompt, "", "auto", "auto").await;
|
||||
CliAutoRoute {
|
||||
model: selection.model,
|
||||
reasoning_effort: selection.reasoning_effort,
|
||||
auto_model: true,
|
||||
}
|
||||
} else {
|
||||
CliAutoRoute {
|
||||
model: model.to_string(),
|
||||
reasoning_effort: None,
|
||||
auto_model: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()> {
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::models::{ContentBlock, Message, MessageRequest};
|
||||
|
||||
let client = DeepSeekClient::new(config)?;
|
||||
let route = resolve_cli_auto_route(config, model, prompt).await;
|
||||
let reasoning_effort = route
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string());
|
||||
|
||||
let request = MessageRequest {
|
||||
model: model.to_string(),
|
||||
model: route.model,
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
@@ -3641,7 +3674,7 @@ async fn run_one_shot(config: &Config, model: &str, prompt: &str) -> Result<()>
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort,
|
||||
stream: Some(false),
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
@@ -3663,8 +3696,13 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt};
|
||||
|
||||
let client = DeepSeekClient::new(config)?;
|
||||
let route = resolve_cli_auto_route(config, model, prompt).await;
|
||||
let model = route.model;
|
||||
let reasoning_effort = route
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string());
|
||||
let request = MessageRequest {
|
||||
model: model.to_string(),
|
||||
model: model.clone(),
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
@@ -3680,7 +3718,7 @@ async fn run_one_shot_json(config: &Config, model: &str, prompt: &str) -> Result
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort,
|
||||
stream: Some(false),
|
||||
temperature: Some(0.2),
|
||||
top_p: Some(0.9),
|
||||
@@ -3725,6 +3763,13 @@ async fn run_exec_agent(
|
||||
use crate::tools::todo::new_shared_todo_list;
|
||||
use crate::tui::app::AppMode;
|
||||
|
||||
let route = resolve_cli_auto_route(config, model, prompt).await;
|
||||
let auto_model = route.auto_model;
|
||||
let effective_model = route.model;
|
||||
let effective_reasoning_effort = route
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string());
|
||||
|
||||
// Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle
|
||||
// architecture (issue #124) handles long-context resets via fresh contexts
|
||||
// rather than progressive summarization. The compaction config is still
|
||||
@@ -3732,8 +3777,8 @@ async fn run_exec_agent(
|
||||
// or direct engine config keep their old behavior.
|
||||
let compaction = CompactionConfig {
|
||||
enabled: false,
|
||||
model: model.to_string(),
|
||||
token_threshold: compaction_threshold_for_model(model),
|
||||
model: effective_model.clone(),
|
||||
token_threshold: compaction_threshold_for_model(&effective_model),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -3747,7 +3792,7 @@ async fn run_exec_agent(
|
||||
.map(crate::config::LspConfigToml::into_runtime);
|
||||
|
||||
let engine_config = EngineConfig {
|
||||
model: model.to_string(),
|
||||
model: effective_model.clone(),
|
||||
workspace: workspace.clone(),
|
||||
allow_shell: auto_approve || config.allow_shell(),
|
||||
trust_mode,
|
||||
@@ -3784,15 +3829,18 @@ async fn run_exec_agent(
|
||||
};
|
||||
|
||||
engine_handle
|
||||
.send(Op::send(
|
||||
prompt,
|
||||
.send(Op::SendMessage {
|
||||
content: prompt.to_string(),
|
||||
mode,
|
||||
model,
|
||||
None,
|
||||
auto_approve || config.allow_shell(),
|
||||
model: effective_model.clone(),
|
||||
goal_objective: None,
|
||||
reasoning_effort: effective_reasoning_effort,
|
||||
reasoning_effort_auto: auto_model,
|
||||
auto_model,
|
||||
allow_shell: auto_approve || config.allow_shell(),
|
||||
trust_mode,
|
||||
auto_approve,
|
||||
))
|
||||
})
|
||||
.await?;
|
||||
|
||||
#[derive(serde::Serialize)]
|
||||
@@ -3813,7 +3861,7 @@ async fn run_exec_agent(
|
||||
}
|
||||
let mut summary = ExecSummary {
|
||||
mode: "agent".to_string(),
|
||||
model: model.to_string(),
|
||||
model: effective_model,
|
||||
prompt: prompt.to_string(),
|
||||
..ExecSummary::default()
|
||||
};
|
||||
|
||||
@@ -1457,21 +1457,43 @@ impl RuntimeThreadManager {
|
||||
}
|
||||
|
||||
let mode = parse_mode(req.mode.as_deref().unwrap_or(&thread.mode));
|
||||
let model = req.model.unwrap_or_else(|| thread.model.clone());
|
||||
let requested_model = req.model.unwrap_or_else(|| thread.model.clone());
|
||||
let auto_model = requested_model.trim().eq_ignore_ascii_case("auto");
|
||||
let (model, reasoning_effort) = if auto_model {
|
||||
let selection = crate::commands::resolve_auto_route_with_flash(
|
||||
&self.config,
|
||||
&prompt,
|
||||
"",
|
||||
"auto",
|
||||
"auto",
|
||||
)
|
||||
.await;
|
||||
(
|
||||
selection.model,
|
||||
selection
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string()),
|
||||
)
|
||||
} else {
|
||||
(requested_model, None)
|
||||
};
|
||||
let allow_shell = req.allow_shell.unwrap_or(thread.allow_shell);
|
||||
let trust_mode = req.trust_mode.unwrap_or(thread.trust_mode);
|
||||
let auto_approve = req.auto_approve.unwrap_or(thread.auto_approve);
|
||||
|
||||
engine
|
||||
.send(Op::send(
|
||||
prompt,
|
||||
.send(Op::SendMessage {
|
||||
content: prompt,
|
||||
mode,
|
||||
model.clone(),
|
||||
None,
|
||||
model: model.clone(),
|
||||
goal_objective: None,
|
||||
reasoning_effort,
|
||||
reasoning_effort_auto: auto_model,
|
||||
auto_model,
|
||||
allow_shell,
|
||||
trust_mode,
|
||||
auto_approve,
|
||||
))
|
||||
})
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to start turn: {e}"))?;
|
||||
|
||||
|
||||
@@ -284,7 +284,7 @@ impl Settings {
|
||||
s.locale = normalize_configured_locale(&s.locale)
|
||||
.unwrap_or("en")
|
||||
.to_string();
|
||||
s.default_model = s.default_model.as_deref().and_then(normalize_model_name);
|
||||
s.default_model = s.default_model.as_deref().and_then(normalize_default_model);
|
||||
s
|
||||
};
|
||||
settings.apply_env_overrides();
|
||||
@@ -444,9 +444,9 @@ impl Settings {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let Some(model) = normalize_model_name(trimmed) else {
|
||||
let Some(model) = normalize_default_model(trimmed) else {
|
||||
anyhow::bail!(
|
||||
"Failed to update setting: invalid model '{value}'. Expected: a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
|
||||
"Failed to update setting: invalid model '{value}'. Expected: auto, a DeepSeek model ID (for example deepseek-v4-pro, deepseek-v4-flash), or none/default."
|
||||
);
|
||||
};
|
||||
self.default_model = Some(model);
|
||||
@@ -549,12 +549,21 @@ impl Settings {
|
||||
("max_history", "Max input history entries"),
|
||||
(
|
||||
"default_model",
|
||||
"Default model: any DeepSeek model ID (e.g. deepseek-v4-pro)",
|
||||
"Default model: auto or any DeepSeek model ID (e.g. deepseek-v4-pro)",
|
||||
),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_default_model(value: &str) -> Option<String> {
|
||||
let trimmed = value.trim();
|
||||
if trimmed.eq_ignore_ascii_case("auto") {
|
||||
Some("auto".to_string())
|
||||
} else {
|
||||
normalize_model_name(trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a boolean value from various formats
|
||||
fn parse_bool(value: &str) -> Result<bool> {
|
||||
match value.to_lowercase().as_str() {
|
||||
|
||||
@@ -557,6 +557,9 @@ pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
|
||||
pub struct SubAgentRuntime {
|
||||
pub client: DeepSeekClient,
|
||||
pub model: String,
|
||||
pub auto_model: bool,
|
||||
pub reasoning_effort: Option<String>,
|
||||
pub reasoning_effort_auto: bool,
|
||||
pub role_models: HashMap<String, String>,
|
||||
pub context: ToolContext,
|
||||
pub allow_shell: bool,
|
||||
@@ -597,6 +600,9 @@ impl SubAgentRuntime {
|
||||
Self {
|
||||
client,
|
||||
model,
|
||||
auto_model: false,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort_auto: false,
|
||||
role_models: HashMap::new(),
|
||||
context,
|
||||
allow_shell,
|
||||
@@ -646,6 +652,27 @@ impl SubAgentRuntime {
|
||||
self
|
||||
}
|
||||
|
||||
/// Preserve whether the parent session is using per-turn model routing.
|
||||
#[must_use]
|
||||
pub fn with_auto_model(mut self, auto_model: bool) -> Self {
|
||||
self.auto_model = auto_model;
|
||||
self
|
||||
}
|
||||
|
||||
/// Preserve the parent's thinking configuration. `reasoning_effort_auto`
|
||||
/// stays true even when the parent turn itself was sent with a concrete
|
||||
/// flash-router recommendation, so children can resolve their own tier.
|
||||
#[must_use]
|
||||
pub fn with_reasoning_effort(
|
||||
mut self,
|
||||
reasoning_effort: Option<String>,
|
||||
reasoning_effort_auto: bool,
|
||||
) -> Self {
|
||||
self.reasoning_effort = reasoning_effort;
|
||||
self.reasoning_effort_auto = reasoning_effort_auto;
|
||||
self
|
||||
}
|
||||
|
||||
/// Return a child runtime that is deliberately detached from the parent
|
||||
/// turn cancellation token. Background sub-agents should keep running when
|
||||
/// the parent turn is cancelled; explicit agent cancellation still
|
||||
@@ -675,6 +702,9 @@ impl SubAgentRuntime {
|
||||
Self {
|
||||
client: self.client.clone(),
|
||||
model: self.model.clone(),
|
||||
auto_model: self.auto_model,
|
||||
reasoning_effort: self.reasoning_effort.clone(),
|
||||
reasoning_effort_auto: self.reasoning_effort_auto,
|
||||
role_models: self.role_models.clone(),
|
||||
context: child_context,
|
||||
allow_shell: self.allow_shell,
|
||||
@@ -1621,16 +1651,14 @@ impl ToolSpec for AgentSpawnTool {
|
||||
if let Some(cwd) = validated_cwd {
|
||||
child_runtime.context.workspace = cwd;
|
||||
}
|
||||
let effective_model = match spawn_request.model.clone() {
|
||||
Some(model) => model,
|
||||
let configured_model = match spawn_request.model.clone() {
|
||||
Some(model) => Some(model),
|
||||
None => configured_model_for_role_or_type(
|
||||
&self.runtime,
|
||||
spawn_request.assignment.role.as_deref(),
|
||||
&spawn_request.agent_type,
|
||||
)?
|
||||
.unwrap_or_else(|| self.runtime.model.clone()),
|
||||
)?,
|
||||
};
|
||||
child_runtime.model = effective_model.clone();
|
||||
|
||||
// Cache-aware resident mode (#529): prepend file contents to the prompt
|
||||
// so the child's prefix is byte-stable for DeepSeek prefix caching.
|
||||
@@ -1666,6 +1694,14 @@ impl ToolSpec for AgentSpawnTool {
|
||||
(spawn_request.prompt, None)
|
||||
};
|
||||
|
||||
let route =
|
||||
resolve_subagent_assignment_route(&self.runtime, configured_model, &effective_prompt)
|
||||
.await;
|
||||
child_runtime.model = route.model.clone();
|
||||
child_runtime.reasoning_effort = route.reasoning_effort.clone();
|
||||
child_runtime.reasoning_effort_auto = false;
|
||||
let effective_model = route.model;
|
||||
|
||||
let mut manager = self.manager.write().await;
|
||||
|
||||
let result = manager
|
||||
@@ -2743,7 +2779,7 @@ async fn run_subagent(
|
||||
tool_choice: Some(json!({ "type": "auto" })),
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort: runtime.reasoning_effort.clone(),
|
||||
stream: Some(false),
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
@@ -3272,6 +3308,167 @@ pub(crate) fn configured_model_for_role_or_type(
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct SubAgentResolvedRoute {
|
||||
pub(crate) model: String,
|
||||
pub(crate) reasoning_effort: Option<String>,
|
||||
}
|
||||
|
||||
pub(crate) async fn resolve_subagent_assignment_route(
|
||||
runtime: &SubAgentRuntime,
|
||||
configured_model: Option<String>,
|
||||
prompt: &str,
|
||||
) -> SubAgentResolvedRoute {
|
||||
let explicit_model = configured_model.is_some();
|
||||
let mut route = fallback_subagent_assignment_route(runtime, configured_model, prompt);
|
||||
|
||||
if (runtime.auto_model || runtime.reasoning_effort_auto)
|
||||
&& let Ok(Some(recommendation)) = subagent_flash_router(runtime, prompt).await
|
||||
{
|
||||
if runtime.auto_model && !explicit_model {
|
||||
route.model = recommendation.model;
|
||||
}
|
||||
if runtime.reasoning_effort_auto {
|
||||
route.reasoning_effort = recommendation
|
||||
.reasoning_effort
|
||||
.map(|effort| effort.as_setting().to_string())
|
||||
.or(route.reasoning_effort);
|
||||
}
|
||||
}
|
||||
|
||||
route
|
||||
}
|
||||
|
||||
fn fallback_subagent_assignment_route(
|
||||
runtime: &SubAgentRuntime,
|
||||
configured_model: Option<String>,
|
||||
prompt: &str,
|
||||
) -> SubAgentResolvedRoute {
|
||||
let model = if let Some(model) = configured_model {
|
||||
model
|
||||
} else if runtime.auto_model {
|
||||
crate::commands::auto_model_heuristic(prompt, &runtime.model)
|
||||
} else {
|
||||
runtime.model.clone()
|
||||
};
|
||||
|
||||
let reasoning_effort = if runtime.reasoning_effort_auto {
|
||||
let effort = match crate::auto_reasoning::select(false, prompt) {
|
||||
crate::tui::app::ReasoningEffort::Low | crate::tui::app::ReasoningEffort::Medium => {
|
||||
crate::tui::app::ReasoningEffort::High
|
||||
}
|
||||
other => other,
|
||||
};
|
||||
Some(effort.as_setting().to_string())
|
||||
} else {
|
||||
runtime.reasoning_effort.clone()
|
||||
};
|
||||
|
||||
SubAgentResolvedRoute {
|
||||
model,
|
||||
reasoning_effort,
|
||||
}
|
||||
}
|
||||
|
||||
async fn subagent_flash_router(
|
||||
runtime: &SubAgentRuntime,
|
||||
prompt: &str,
|
||||
) -> Result<Option<crate::commands::AutoRouteRecommendation>> {
|
||||
if cfg!(test) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: subagent_router_prompt(runtime, prompt),
|
||||
cache_control: None,
|
||||
}],
|
||||
}],
|
||||
max_tokens: 96,
|
||||
system: Some(SystemPrompt::Text(
|
||||
SUBAGENT_ROUTER_SYSTEM_PROMPT.to_string(),
|
||||
)),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: Some("off".to_string()),
|
||||
stream: Some(false),
|
||||
temperature: Some(0.0),
|
||||
top_p: None,
|
||||
};
|
||||
|
||||
let response = tokio::time::timeout(
|
||||
Duration::from_secs(4),
|
||||
runtime.client.create_message(request),
|
||||
)
|
||||
.await??;
|
||||
Ok(crate::commands::parse_auto_route_recommendation(
|
||||
&message_response_text(&response.content),
|
||||
))
|
||||
}
|
||||
|
||||
const SUBAGENT_ROUTER_SYSTEM_PROMPT: &str = "\
|
||||
You are the DeepSeek TUI sub-agent routing manager. Return only compact JSON: \
|
||||
{\"model\":\"deepseek-v4-flash|deepseek-v4-pro\",\"thinking\":\"off|high|max\"}. \
|
||||
Treat each child assignment like a customer request entering a team queue: decide the least \
|
||||
sufficient worker and thinking budget for that assignment. Do not treat being a sub-agent as \
|
||||
important by itself. Use Flash for trivial, read-only, status, lookup, or single-step work. \
|
||||
Use Pro for coding, debugging, release work, multi-file changes, security, architecture, \
|
||||
high-risk decisions, ambiguous requests, or work likely to need tool-call judgment. Use thinking \
|
||||
off for trivial no-tool work, high for ordinary reasoning, and max only for hard, risky, \
|
||||
multi-step, uncertain, or tool-heavy work.";
|
||||
|
||||
fn subagent_router_prompt(runtime: &SubAgentRuntime, prompt: &str) -> String {
|
||||
format!(
|
||||
"Parent selected model mode: {}\nParent selected thinking mode: {}\n\nSub-agent assignment:\n{}\n\nReturn JSON only.",
|
||||
if runtime.auto_model { "auto" } else { "fixed" },
|
||||
if runtime.reasoning_effort_auto {
|
||||
"auto"
|
||||
} else {
|
||||
runtime
|
||||
.reasoning_effort
|
||||
.as_deref()
|
||||
.unwrap_or("provider-default")
|
||||
},
|
||||
truncate_subagent_router_prompt(prompt, 4_000)
|
||||
)
|
||||
}
|
||||
|
||||
fn truncate_subagent_router_prompt(text: &str, max_chars: usize) -> String {
|
||||
if text.chars().count() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
let mut out = text.chars().take(max_chars).collect::<String>();
|
||||
out.push_str("\n[truncated]");
|
||||
out
|
||||
}
|
||||
|
||||
fn message_response_text(blocks: &[ContentBlock]) -> String {
|
||||
let mut out = String::new();
|
||||
for block in blocks {
|
||||
match block {
|
||||
ContentBlock::Text { text, .. } => {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str(thinking);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn parse_optional_subagent_model(input: &Value, key: &str) -> Result<Option<String>, ToolError> {
|
||||
match input.get(key) {
|
||||
None | Some(Value::Null) => Ok(None),
|
||||
|
||||
@@ -398,6 +398,62 @@ fn test_build_assignment_prompt_includes_metadata() {
|
||||
assert!(prompt.contains("role: explorer"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_auto_model_routes_unconfigured_assignments() {
|
||||
let runtime = stub_runtime().with_auto_model(true);
|
||||
|
||||
assert_eq!(
|
||||
fallback_subagent_assignment_route(&runtime, None, "implement the release fix").model,
|
||||
"deepseek-v4-pro"
|
||||
);
|
||||
assert_eq!(
|
||||
fallback_subagent_assignment_route(&runtime, None, "say hello").model,
|
||||
"deepseek-v4-flash"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_auto_route_respects_explicit_or_role_model() {
|
||||
let runtime = stub_runtime().with_auto_model(true);
|
||||
|
||||
assert_eq!(
|
||||
fallback_subagent_assignment_route(
|
||||
&runtime,
|
||||
Some("deepseek-v4-flash".to_string()),
|
||||
"implement the release fix"
|
||||
)
|
||||
.model,
|
||||
"deepseek-v4-flash"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_auto_reasoning_resolves_to_distinct_v4_tiers() {
|
||||
let runtime = stub_runtime().with_reasoning_effort(Some("high".to_string()), true);
|
||||
|
||||
assert_eq!(
|
||||
fallback_subagent_assignment_route(&runtime, None, "quick lookup").reasoning_effort,
|
||||
Some("high".to_string())
|
||||
);
|
||||
assert_eq!(
|
||||
fallback_subagent_assignment_route(&runtime, None, "debug this release failure")
|
||||
.reasoning_effort,
|
||||
Some("max".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_router_prompt_frames_assignment_as_auto_routing() {
|
||||
let runtime = stub_runtime()
|
||||
.with_auto_model(true)
|
||||
.with_reasoning_effort(Some("high".to_string()), true);
|
||||
let prompt = subagent_router_prompt(&runtime, "inspect one file");
|
||||
|
||||
assert!(prompt.contains("Parent selected model mode: auto"));
|
||||
assert!(prompt.contains("Parent selected thinking mode: auto"));
|
||||
assert!(prompt.contains("inspect one file"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_subagent_tool_registry_reports_unavailable_tools() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -1102,6 +1158,9 @@ fn stub_runtime() -> SubAgentRuntime {
|
||||
SubAgentRuntime {
|
||||
client: stub_client(),
|
||||
model: "deepseek-v4-flash".to_string(),
|
||||
auto_model: false,
|
||||
reasoning_effort: None,
|
||||
reasoning_effort_auto: false,
|
||||
role_models: std::collections::HashMap::new(),
|
||||
context,
|
||||
allow_shell: true,
|
||||
|
||||
+64
-12
@@ -9,7 +9,9 @@ use serde_json::Value;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::compaction::CompactionConfig;
|
||||
use crate::config::{ApiProvider, Config, SavedCredential, has_api_key, save_api_key};
|
||||
use crate::config::{
|
||||
ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key,
|
||||
};
|
||||
use crate::config_ui::ConfigUiMode;
|
||||
use crate::core::coherence::CoherenceState;
|
||||
use crate::cycle_manager::{CycleBriefing, CycleConfig};
|
||||
@@ -628,6 +630,8 @@ pub struct App {
|
||||
/// `dispatch_user_message` calls `auto_model_heuristic` to resolve the
|
||||
/// effective model for each outbound message.
|
||||
pub auto_model: bool,
|
||||
/// Last concrete model chosen while `auto_model` is active.
|
||||
pub last_effective_model: Option<String>,
|
||||
/// Current API provider (mirrors `Config::api_provider`).
|
||||
/// Updated by `/provider` switches so the UI/commands can read the
|
||||
/// active backend without re-deriving it from the live config.
|
||||
@@ -635,6 +639,8 @@ pub struct App {
|
||||
/// Current reasoning-effort tier for DeepSeek thinking mode.
|
||||
/// Cycled via Shift+Tab; initialized from config at startup.
|
||||
pub reasoning_effort: ReasoningEffort,
|
||||
/// Last concrete thinking tier chosen while `reasoning_effort` is auto.
|
||||
pub last_effective_reasoning_effort: Option<ReasoningEffort>,
|
||||
pub workspace: PathBuf,
|
||||
pub config_path: Option<PathBuf>,
|
||||
pub config_profile: Option<String>,
|
||||
@@ -1080,8 +1086,23 @@ impl App {
|
||||
let use_paste_burst_detection = settings.paste_burst_detection;
|
||||
let ui_theme = palette::UI_THEME;
|
||||
let model = settings.default_model.clone().unwrap_or(model);
|
||||
let auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
let threshold_model = if auto_model {
|
||||
DEFAULT_TEXT_MODEL
|
||||
} else {
|
||||
model.as_str()
|
||||
};
|
||||
let compact_threshold =
|
||||
compaction_threshold_for_model_and_effort(&model, config.reasoning_effort());
|
||||
compaction_threshold_for_model_and_effort(threshold_model, config.reasoning_effort());
|
||||
let reasoning_effort = if auto_model {
|
||||
ReasoningEffort::Auto
|
||||
} else {
|
||||
config
|
||||
.reasoning_effort()
|
||||
.map_or_else(ReasoningEffort::default, |s| {
|
||||
ReasoningEffort::from_setting(s)
|
||||
})
|
||||
};
|
||||
|
||||
// Start in YOLO mode if --yolo flag was passed
|
||||
let preferred_mode = AppMode::from_setting(&settings.default_mode);
|
||||
@@ -1170,13 +1191,11 @@ impl App {
|
||||
sticky_status: None,
|
||||
last_status_message_seen: None,
|
||||
model,
|
||||
auto_model: false,
|
||||
auto_model,
|
||||
last_effective_model: None,
|
||||
api_provider: provider,
|
||||
reasoning_effort: config
|
||||
.reasoning_effort()
|
||||
.map_or_else(ReasoningEffort::default, |s| {
|
||||
ReasoningEffort::from_setting(s)
|
||||
}),
|
||||
reasoning_effort,
|
||||
last_effective_reasoning_effort: None,
|
||||
workspace,
|
||||
config_path,
|
||||
config_profile,
|
||||
@@ -1435,6 +1454,7 @@ impl App {
|
||||
/// `Off` → `High` → `Max` → `Off`.
|
||||
pub fn cycle_effort(&mut self) {
|
||||
self.reasoning_effort = self.reasoning_effort.cycle_next();
|
||||
self.last_effective_reasoning_effort = None;
|
||||
self.needs_redraw = true;
|
||||
self.push_status_toast(
|
||||
format!("Thinking: {}", self.reasoning_effort.short_label()),
|
||||
@@ -3464,10 +3484,42 @@ impl App {
|
||||
}
|
||||
|
||||
pub fn update_model_compaction_budget(&mut self) {
|
||||
self.compact_threshold = compaction_threshold_for_model_and_effort(
|
||||
&self.model,
|
||||
self.reasoning_effort.api_value(),
|
||||
);
|
||||
let model = self.effective_model_for_budget().to_string();
|
||||
self.compact_threshold =
|
||||
compaction_threshold_for_model_and_effort(&model, self.reasoning_effort.api_value());
|
||||
}
|
||||
|
||||
pub fn effective_model_for_budget(&self) -> &str {
|
||||
if self.auto_model {
|
||||
return self
|
||||
.last_effective_model
|
||||
.as_deref()
|
||||
.filter(|model| *model != "auto")
|
||||
.unwrap_or(DEFAULT_TEXT_MODEL);
|
||||
}
|
||||
&self.model
|
||||
}
|
||||
|
||||
pub fn model_display_label(&self) -> String {
|
||||
if self.auto_model {
|
||||
if let Some(effective) = self.last_effective_model.as_deref()
|
||||
&& effective != "auto"
|
||||
{
|
||||
return format!("auto: {effective}");
|
||||
}
|
||||
return "auto".to_string();
|
||||
}
|
||||
self.model.clone()
|
||||
}
|
||||
|
||||
pub fn reasoning_effort_display_label(&self) -> String {
|
||||
if self.auto_model || self.reasoning_effort == ReasoningEffort::Auto {
|
||||
if let Some(effective) = self.last_effective_reasoning_effort {
|
||||
return format!("auto: {}", effective.short_label());
|
||||
}
|
||||
return "auto".to_string();
|
||||
}
|
||||
self.reasoning_effort.short_label().to_string()
|
||||
}
|
||||
|
||||
pub fn compaction_config(&self) -> CompactionConfig {
|
||||
|
||||
@@ -34,6 +34,7 @@ use crate::tui::views::{ModalKind, ModalView, ViewAction, ViewEvent};
|
||||
/// Models the picker exposes by default. Kept short on purpose — power
|
||||
/// users can still type `/model <id>` for anything else.
|
||||
const PICKER_MODELS: &[(&str, &str)] = &[
|
||||
("auto", "select per turn"),
|
||||
("deepseek-v4-pro", "flagship"),
|
||||
("deepseek-v4-flash", "fast / cheap"),
|
||||
];
|
||||
@@ -41,6 +42,7 @@ const PICKER_MODELS: &[(&str, &str)] = &[
|
||||
/// Thinking-effort rows shown in the picker, in the order DeepSeek
|
||||
/// behaviorally distinguishes them.
|
||||
const PICKER_EFFORTS: &[ReasoningEffort] = &[
|
||||
ReasoningEffort::Auto,
|
||||
ReasoningEffort::Off,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffort::Max,
|
||||
@@ -68,7 +70,11 @@ pub struct ModelPickerView {
|
||||
impl ModelPickerView {
|
||||
#[must_use]
|
||||
pub fn new(app: &App) -> Self {
|
||||
let initial_model = app.model.clone();
|
||||
let initial_model = if app.auto_model {
|
||||
"auto".to_string()
|
||||
} else {
|
||||
app.model.clone()
|
||||
};
|
||||
let mut selected_model_idx = PICKER_MODELS
|
||||
.iter()
|
||||
.position(|(id, _)| *id == initial_model);
|
||||
@@ -88,7 +94,7 @@ impl ModelPickerView {
|
||||
let selected_effort_idx = PICKER_EFFORTS
|
||||
.iter()
|
||||
.position(|e| *e == normalized)
|
||||
.unwrap_or(1); // default to High if somehow unknown
|
||||
.unwrap_or(2); // default to High if somehow unknown
|
||||
|
||||
Self {
|
||||
initial_model,
|
||||
@@ -116,6 +122,9 @@ impl ModelPickerView {
|
||||
}
|
||||
|
||||
fn resolved_effort(&self) -> ReasoningEffort {
|
||||
if self.resolved_model().trim().eq_ignore_ascii_case("auto") {
|
||||
return ReasoningEffort::Auto;
|
||||
}
|
||||
PICKER_EFFORTS[self.selected_effort_idx]
|
||||
}
|
||||
|
||||
@@ -318,6 +327,7 @@ impl ModalView for ModelPickerView {
|
||||
.map(|effort| {
|
||||
let label = effort.short_label().to_string();
|
||||
let hint = match effort {
|
||||
ReasoningEffort::Auto => "auto-select per turn".to_string(),
|
||||
ReasoningEffort::Off => "thinking disabled".to_string(),
|
||||
ReasoningEffort::High => "thinking enabled (default)".to_string(),
|
||||
ReasoningEffort::Max => "thinking enabled, max effort".to_string(),
|
||||
@@ -387,6 +397,37 @@ mod tests {
|
||||
assert_eq!(view.resolved_effort(), ReasoningEffort::Max);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_initial_selection_matches_auto_state() {
|
||||
let mut app = create_test_app();
|
||||
app.model = "auto".to_string();
|
||||
app.auto_model = true;
|
||||
app.reasoning_effort = ReasoningEffort::Auto;
|
||||
|
||||
let view = ModelPickerView::new(&app);
|
||||
|
||||
assert_eq!(view.resolved_model(), "auto");
|
||||
assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_auto_model_forces_auto_effort_on_apply() {
|
||||
let mut app = create_test_app();
|
||||
app.model = "auto".to_string();
|
||||
app.auto_model = true;
|
||||
app.reasoning_effort = ReasoningEffort::Off;
|
||||
|
||||
let mut view = ModelPickerView::new(&app);
|
||||
view.selected_model_idx = 0;
|
||||
view.selected_effort_idx = PICKER_EFFORTS
|
||||
.iter()
|
||||
.position(|effort| *effort == ReasoningEffort::Max)
|
||||
.expect("max effort row");
|
||||
|
||||
assert_eq!(view.resolved_model(), "auto");
|
||||
assert_eq!(view.resolved_effort(), ReasoningEffort::Auto);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_normalizes_low_medium_to_high() {
|
||||
let mut app = create_test_app();
|
||||
@@ -399,6 +440,21 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_exposes_auto_and_distinct_thinking_tiers() {
|
||||
let model_labels: Vec<_> = PICKER_MODELS.iter().map(|(id, _)| *id).collect();
|
||||
assert_eq!(
|
||||
model_labels,
|
||||
vec!["auto", "deepseek-v4-pro", "deepseek-v4-flash"]
|
||||
);
|
||||
|
||||
let effort_labels: Vec<_> = PICKER_EFFORTS
|
||||
.iter()
|
||||
.map(|effort| effort.as_setting())
|
||||
.collect();
|
||||
assert_eq!(effort_labels, vec!["auto", "off", "high", "max"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_preserves_unknown_model_via_custom_row() {
|
||||
let mut app = create_test_app();
|
||||
@@ -429,7 +485,7 @@ mod tests {
|
||||
#[test]
|
||||
fn tab_switches_focus_and_arrow_now_moves_effort() {
|
||||
let mut app = create_test_app();
|
||||
// Default is Max (index 2 = last); pin to Off so the Down arrow has
|
||||
// Default is Max; pin to Off so the Down arrow has
|
||||
// somewhere to go.
|
||||
app.reasoning_effort = ReasoningEffort::Off;
|
||||
let mut view = ModelPickerView::new(&app);
|
||||
@@ -490,11 +546,11 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn picker_only_exposes_off_high_max() {
|
||||
fn picker_only_exposes_auto_off_high_max() {
|
||||
let labels: Vec<&str> = PICKER_EFFORTS
|
||||
.iter()
|
||||
.map(|effort| effort.short_label())
|
||||
.collect();
|
||||
assert_eq!(labels, vec!["off", "high", "max"]);
|
||||
assert_eq!(labels, vec!["auto", "off", "high", "max"]);
|
||||
}
|
||||
}
|
||||
|
||||
+195
-33
@@ -85,8 +85,8 @@ use crate::tui::user_input::UserInputView;
|
||||
|
||||
use super::active_cell::ActiveCell;
|
||||
use super::app::{
|
||||
App, AppAction, AppMode, OnboardingState, QueuedMessage, SidebarFocus, StatusToastLevel,
|
||||
SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
|
||||
App, AppAction, AppMode, OnboardingState, QueuedMessage, ReasoningEffort, SidebarFocus,
|
||||
StatusToastLevel, SubmitDisposition, TaskPanelEntry, ToolDetailRecord, TuiOptions,
|
||||
};
|
||||
use super::approval::{
|
||||
ApprovalMode, ApprovalRequest, ApprovalView, ElevationRequest, ElevationView, ReviewDecision,
|
||||
@@ -924,8 +924,13 @@ async fn run_event_loop(
|
||||
}
|
||||
|
||||
// Update session cost
|
||||
let pricing_model = if app.auto_model {
|
||||
app.last_effective_model.as_deref().unwrap_or(&app.model)
|
||||
} else {
|
||||
&app.model
|
||||
};
|
||||
let turn_cost =
|
||||
crate::pricing::calculate_turn_cost_from_usage(&app.model, &usage);
|
||||
crate::pricing::calculate_turn_cost_from_usage(pricing_model, &usage);
|
||||
if let Some(cost) = turn_cost {
|
||||
app.accrue_session_cost(cost);
|
||||
}
|
||||
@@ -1031,7 +1036,12 @@ async fn run_event_loop(
|
||||
} => {
|
||||
app.api_messages = messages;
|
||||
app.system_prompt = system_prompt;
|
||||
app.model = model;
|
||||
if app.auto_model {
|
||||
app.last_effective_model = Some(model);
|
||||
} else {
|
||||
app.model = model;
|
||||
app.last_effective_model = None;
|
||||
}
|
||||
app.update_model_compaction_budget();
|
||||
app.workspace = workspace;
|
||||
if (app.is_loading || app.is_compacting)
|
||||
@@ -1317,7 +1327,8 @@ async fn run_event_loop(
|
||||
}
|
||||
|
||||
if let Some(next) = queued_to_send {
|
||||
if let Err(err) = dispatch_user_message(app, &engine_handle, next.clone()).await {
|
||||
if let Err(err) = dispatch_user_message(app, config, &engine_handle, next.clone()).await
|
||||
{
|
||||
app.queue_message(next);
|
||||
app.status_message = Some(format!(
|
||||
"Dispatch failed ({err}); kept {} queued message(s)",
|
||||
@@ -2422,7 +2433,7 @@ async fn run_event_loop(
|
||||
app.close_slash_menu();
|
||||
}
|
||||
if let Some(input) = app.submit_input() {
|
||||
if handle_plan_choice(app, &engine_handle, &input).await? {
|
||||
if handle_plan_choice(app, config, &engine_handle, &input).await? {
|
||||
continue;
|
||||
}
|
||||
// `# foo` quick-add (#492) — when memory is enabled,
|
||||
@@ -2473,7 +2484,7 @@ async fn run_event_loop(
|
||||
})
|
||||
.await;
|
||||
}
|
||||
submit_or_steer_message(app, &engine_handle, queued).await?;
|
||||
submit_or_steer_message(app, config, &engine_handle, queued).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3229,6 +3240,7 @@ fn queued_message_content_for_app(
|
||||
|
||||
async fn dispatch_user_message(
|
||||
app: &mut App,
|
||||
config: &Config,
|
||||
engine_handle: &EngineHandle,
|
||||
message: QueuedMessage,
|
||||
) -> Result<()> {
|
||||
@@ -3300,21 +3312,61 @@ async fn dispatch_user_message(
|
||||
persistence_actor::persist(PersistRequest::Checkpoint(session));
|
||||
}
|
||||
|
||||
// Resolve the effective model: when auto_model is active, use the
|
||||
// heuristic to pick between Pro and Flash based on the user's input.
|
||||
let auto_selection = if app.auto_model || app.reasoning_effort == ReasoningEffort::Auto {
|
||||
Some(resolve_auto_model_selection(app, config, &message, &content).await)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let effective_model = if app.auto_model {
|
||||
commands::auto_model_heuristic(&message.display, &app.model)
|
||||
auto_selection
|
||||
.as_ref()
|
||||
.map(|selection| selection.model.clone())
|
||||
.unwrap_or_else(|| commands::auto_model_heuristic(&message.display, &app.model))
|
||||
} else {
|
||||
app.model.clone()
|
||||
};
|
||||
|
||||
let auto_controls_reasoning = app.auto_model || app.reasoning_effort == ReasoningEffort::Auto;
|
||||
let effective_reasoning_effort = if auto_controls_reasoning {
|
||||
let effort = auto_selection
|
||||
.as_ref()
|
||||
.and_then(|selection| selection.reasoning_effort)
|
||||
.unwrap_or_else(|| {
|
||||
normalize_auto_routed_effort(crate::auto_reasoning::select(false, &message.display))
|
||||
});
|
||||
app.last_effective_reasoning_effort = Some(effort);
|
||||
Some(effort.as_setting().to_string())
|
||||
} else {
|
||||
app.last_effective_reasoning_effort = None;
|
||||
app.reasoning_effort.api_value().map(str::to_string)
|
||||
};
|
||||
|
||||
if let Some(selection) = auto_selection.as_ref() {
|
||||
if app.auto_model {
|
||||
app.last_effective_model = Some(effective_model.clone());
|
||||
let mut status = format!(
|
||||
"Auto model selected: {effective_model} via {}",
|
||||
selection.source.label()
|
||||
);
|
||||
if let Some(effort) = app.last_effective_reasoning_effort {
|
||||
status.push_str(&format!("; thinking auto: {}", effort.as_setting()));
|
||||
}
|
||||
app.status_message = Some(status);
|
||||
}
|
||||
} else {
|
||||
app.last_effective_model = None;
|
||||
}
|
||||
|
||||
if let Err(err) = engine_handle
|
||||
.send(Op::SendMessage {
|
||||
content,
|
||||
mode: app.mode,
|
||||
model: effective_model,
|
||||
goal_objective: app.goal.goal_objective.clone(),
|
||||
reasoning_effort: app.reasoning_effort.api_value().map(str::to_string),
|
||||
reasoning_effort: effective_reasoning_effort,
|
||||
reasoning_effort_auto: auto_controls_reasoning,
|
||||
auto_model: app.auto_model,
|
||||
allow_shell: app.allow_shell,
|
||||
trust_mode: app.trust_mode,
|
||||
auto_approve: app.mode == AppMode::Yolo,
|
||||
@@ -3329,6 +3381,95 @@ async fn dispatch_user_message(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn resolve_auto_model_selection(
|
||||
app: &App,
|
||||
config: &Config,
|
||||
message: &QueuedMessage,
|
||||
latest_content: &str,
|
||||
) -> commands::AutoRouteSelection {
|
||||
let latest_request = if latest_content.trim().is_empty() {
|
||||
message.display.as_str()
|
||||
} else {
|
||||
latest_content
|
||||
};
|
||||
commands::resolve_auto_route_with_flash(
|
||||
config,
|
||||
latest_request,
|
||||
&recent_auto_router_context(&app.api_messages),
|
||||
if app.auto_model { "auto" } else { "fixed" },
|
||||
app.reasoning_effort.as_setting(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
fn normalize_auto_routed_effort(effort: ReasoningEffort) -> ReasoningEffort {
|
||||
commands::normalize_auto_route_effort(effort)
|
||||
}
|
||||
|
||||
fn recent_auto_router_context(messages: &[Message]) -> String {
|
||||
let mut rows = Vec::new();
|
||||
for message in messages.iter().rev().skip(1) {
|
||||
if rows.len() >= 6 {
|
||||
break;
|
||||
}
|
||||
let text = content_blocks_text(&message.content);
|
||||
let text = text.trim();
|
||||
if text.is_empty() {
|
||||
continue;
|
||||
}
|
||||
rows.push(format!(
|
||||
"{}: {}",
|
||||
message.role,
|
||||
truncate_for_auto_router(text, 900)
|
||||
));
|
||||
}
|
||||
rows.reverse();
|
||||
if rows.is_empty() {
|
||||
"No prior context.".to_string()
|
||||
} else {
|
||||
rows.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
fn content_blocks_text(blocks: &[ContentBlock]) -> String {
|
||||
let mut out = String::new();
|
||||
for block in blocks {
|
||||
match block {
|
||||
ContentBlock::Text { text, .. } => {
|
||||
append_router_text(&mut out, text);
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
append_router_text(&mut out, thinking);
|
||||
}
|
||||
ContentBlock::ToolUse { name, .. } => {
|
||||
append_router_text(&mut out, &format!("[tool call: {name}]"));
|
||||
}
|
||||
ContentBlock::ToolResult { content, .. } => {
|
||||
append_router_text(&mut out, &format!("[tool result] {content}"));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn append_router_text(out: &mut String, text: &str) {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
|
||||
fn truncate_for_auto_router(text: &str, max_chars: usize) -> String {
|
||||
let mut chars = text.chars();
|
||||
let truncated: String = chars.by_ref().take(max_chars).collect();
|
||||
if chars.next().is_some() {
|
||||
format!("{truncated}...")
|
||||
} else {
|
||||
truncated
|
||||
}
|
||||
}
|
||||
|
||||
async fn apply_model_and_compaction_update(
|
||||
engine_handle: &EngineHandle,
|
||||
compaction: crate::compaction::CompactionConfig,
|
||||
@@ -3423,11 +3564,15 @@ async fn apply_model_picker_choice(
|
||||
app: &mut App,
|
||||
engine_handle: &EngineHandle,
|
||||
model: String,
|
||||
effort: crate::tui::app::ReasoningEffort,
|
||||
mut effort: crate::tui::app::ReasoningEffort,
|
||||
previous_model: String,
|
||||
previous_effort: crate::tui::app::ReasoningEffort,
|
||||
) {
|
||||
let model_changed = model != previous_model;
|
||||
let model_is_auto = model.trim().eq_ignore_ascii_case("auto");
|
||||
if model_is_auto {
|
||||
effort = ReasoningEffort::Auto;
|
||||
}
|
||||
let model_changed = model != previous_model || app.auto_model != model_is_auto;
|
||||
let effort_changed = effort != previous_effort;
|
||||
if !model_changed && !effort_changed {
|
||||
app.status_message = Some(format!(
|
||||
@@ -3438,6 +3583,8 @@ async fn apply_model_picker_choice(
|
||||
}
|
||||
|
||||
if model_changed {
|
||||
app.auto_model = model_is_auto;
|
||||
app.last_effective_model = None;
|
||||
app.model = model.clone();
|
||||
app.update_model_compaction_budget();
|
||||
app.session.last_prompt_tokens = None;
|
||||
@@ -3448,6 +3595,7 @@ async fn apply_model_picker_choice(
|
||||
}
|
||||
if effort_changed {
|
||||
app.reasoning_effort = effort;
|
||||
app.last_effective_reasoning_effort = None;
|
||||
}
|
||||
|
||||
// Best-effort persist; surface a status warning if the settings file
|
||||
@@ -3474,20 +3622,27 @@ async fn apply_model_picker_choice(
|
||||
apply_model_and_compaction_update(engine_handle, app.compaction_config()).await;
|
||||
}
|
||||
|
||||
let model_summary = if model_is_auto {
|
||||
"auto (per-turn model)".to_string()
|
||||
} else {
|
||||
model.clone()
|
||||
};
|
||||
let previous_effort_summary = previous_effort.short_label();
|
||||
let effort_summary = if effort == ReasoningEffort::Auto {
|
||||
"auto (per-turn thinking)".to_string()
|
||||
} else {
|
||||
effort.short_label().to_string()
|
||||
};
|
||||
|
||||
let mut summary = match (model_changed, effort_changed) {
|
||||
(true, true) => format!(
|
||||
"Model: {previous_model} → {model} · thinking: {} → {}",
|
||||
previous_effort.short_label(),
|
||||
effort.short_label()
|
||||
),
|
||||
(true, false) => format!(
|
||||
"Model: {previous_model} → {model} · thinking {}",
|
||||
effort.short_label()
|
||||
"Model: {previous_model} → {model_summary} · thinking: {previous_effort_summary} → {effort_summary}"
|
||||
),
|
||||
(true, false) => {
|
||||
format!("Model: {previous_model} → {model_summary} · thinking {effort_summary}")
|
||||
}
|
||||
(false, true) => format!(
|
||||
"Thinking: {} → {} · model {model}",
|
||||
previous_effort.short_label(),
|
||||
effort.short_label()
|
||||
"Thinking: {previous_effort_summary} → {effort_summary} · model {model_summary}"
|
||||
),
|
||||
(false, false) => unreachable!(),
|
||||
};
|
||||
@@ -3896,7 +4051,7 @@ async fn apply_command_result(
|
||||
}
|
||||
AppAction::SendMessage(content) => {
|
||||
let queued = build_queued_message(app, content);
|
||||
submit_or_steer_message(app, engine_handle, queued).await?;
|
||||
submit_or_steer_message(app, config, engine_handle, queued).await?;
|
||||
}
|
||||
AppAction::Rlm {
|
||||
prompt,
|
||||
@@ -4417,11 +4572,14 @@ async fn queue_follow_up(app: &mut App, message: QueuedMessage) -> Result<()> {
|
||||
|
||||
async fn submit_or_steer_message(
|
||||
app: &mut App,
|
||||
config: &Config,
|
||||
engine_handle: &EngineHandle,
|
||||
message: QueuedMessage,
|
||||
) -> Result<()> {
|
||||
match app.decide_submit_disposition() {
|
||||
SubmitDisposition::Immediate => dispatch_user_message(app, engine_handle, message).await,
|
||||
SubmitDisposition::Immediate => {
|
||||
dispatch_user_message(app, config, engine_handle, message).await
|
||||
}
|
||||
SubmitDisposition::Queue => {
|
||||
let count = app.queued_message_count().saturating_add(1);
|
||||
app.queue_message(message);
|
||||
@@ -4523,6 +4681,7 @@ fn parse_plan_choice(input: &str) -> Option<PlanChoice> {
|
||||
|
||||
async fn apply_plan_choice(
|
||||
app: &mut App,
|
||||
config: &Config,
|
||||
engine_handle: &EngineHandle,
|
||||
choice: PlanChoice,
|
||||
) -> Result<()> {
|
||||
@@ -4539,7 +4698,7 @@ async fn apply_plan_choice(
|
||||
app.status_message =
|
||||
Some("Queued accepted plan execution (agent mode).".to_string());
|
||||
} else {
|
||||
dispatch_user_message(app, engine_handle, followup).await?;
|
||||
dispatch_user_message(app, config, engine_handle, followup).await?;
|
||||
}
|
||||
}
|
||||
PlanChoice::AcceptYolo => {
|
||||
@@ -4554,7 +4713,7 @@ async fn apply_plan_choice(
|
||||
app.status_message =
|
||||
Some("Queued accepted plan execution (YOLO mode).".to_string());
|
||||
} else {
|
||||
dispatch_user_message(app, engine_handle, followup).await?;
|
||||
dispatch_user_message(app, config, engine_handle, followup).await?;
|
||||
}
|
||||
}
|
||||
PlanChoice::RevisePlan => {
|
||||
@@ -4576,6 +4735,7 @@ async fn apply_plan_choice(
|
||||
|
||||
async fn handle_plan_choice(
|
||||
app: &mut App,
|
||||
config: &Config,
|
||||
engine_handle: &EngineHandle,
|
||||
input: &str,
|
||||
) -> Result<bool> {
|
||||
@@ -4590,7 +4750,7 @@ async fn handle_plan_choice(
|
||||
return Ok(false);
|
||||
};
|
||||
|
||||
apply_plan_choice(app, engine_handle, choice).await?;
|
||||
apply_plan_choice(app, config, engine_handle, choice).await?;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
@@ -4714,7 +4874,8 @@ fn render(f: &mut Frame, app: &mut App) {
|
||||
.and_then(|value| value.to_str())
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or("workspace");
|
||||
let effort_label = app.reasoning_effort.short_label();
|
||||
let model_label = app.model_display_label();
|
||||
let effort_label = app.reasoning_effort_display_label();
|
||||
let provider_label = match app.api_provider {
|
||||
crate::config::ApiProvider::Deepseek => None,
|
||||
crate::config::ApiProvider::DeepseekCN => None,
|
||||
@@ -4726,7 +4887,7 @@ fn render(f: &mut Frame, app: &mut App) {
|
||||
};
|
||||
let header_data = HeaderData::new(
|
||||
app.mode,
|
||||
&app.model,
|
||||
&model_label,
|
||||
workspace_name,
|
||||
app.is_loading,
|
||||
app.ui_theme.header_bg,
|
||||
@@ -4737,7 +4898,7 @@ fn render(f: &mut Frame, app: &mut App) {
|
||||
app.session.session_cost,
|
||||
sanitized_prompt_tokens,
|
||||
)
|
||||
.with_reasoning_effort(Some(effort_label))
|
||||
.with_reasoning_effort(Some(&effort_label))
|
||||
.with_provider(provider_label);
|
||||
let header_widget = HeaderWidget::new(header_data);
|
||||
let buf = f.buffer_mut();
|
||||
@@ -5016,7 +5177,8 @@ async fn handle_view_events(
|
||||
if app.plan_prompt_pending {
|
||||
app.plan_prompt_pending = false;
|
||||
if let Some(choice) = plan_choice_from_option(option)
|
||||
&& let Err(err) = apply_plan_choice(app, engine_handle, choice).await
|
||||
&& let Err(err) =
|
||||
apply_plan_choice(app, config, engine_handle, choice).await
|
||||
{
|
||||
app.status_message = Some(format!("Failed to apply plan selection: {err}"));
|
||||
}
|
||||
@@ -6496,7 +6658,7 @@ fn estimated_context_tokens(app: &App) -> Option<i64> {
|
||||
}
|
||||
|
||||
fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
|
||||
let max = context_window_for_model(&app.model)?;
|
||||
let max = context_window_for_model(app.effective_model_for_budget())?;
|
||||
let max_i64 = i64::from(max);
|
||||
let reported = app
|
||||
.session
|
||||
|
||||
@@ -699,10 +699,12 @@ async fn model_change_update_syncs_engine_model_before_compaction() {
|
||||
async fn dispatch_user_message_failed_send_clears_loading_state() {
|
||||
let mut app = create_test_app();
|
||||
let engine = mock_engine_handle();
|
||||
let config = Config::default();
|
||||
drop(engine.rx_op);
|
||||
|
||||
let result = dispatch_user_message(
|
||||
&mut app,
|
||||
&config,
|
||||
&engine.handle,
|
||||
QueuedMessage::new("hello".to_string(), None),
|
||||
)
|
||||
@@ -1469,8 +1471,9 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
|
||||
app.offline_mode = true;
|
||||
|
||||
let engine = crate::core::engine::mock_engine_handle();
|
||||
let config = Config::default();
|
||||
|
||||
let handled = handle_plan_choice(&mut app, &engine.handle, "yolo")
|
||||
let handled = handle_plan_choice(&mut app, &config, &engine.handle, "yolo")
|
||||
.await
|
||||
.expect("plan choice");
|
||||
|
||||
@@ -1479,7 +1482,7 @@ async fn dismissed_plan_prompt_leaves_non_numeric_input_for_normal_send_path() {
|
||||
assert_eq!(app.mode, AppMode::Plan);
|
||||
|
||||
let queued = build_queued_message(&mut app, "yolo".to_string());
|
||||
submit_or_steer_message(&mut app, &engine.handle, queued)
|
||||
submit_or_steer_message(&mut app, &config, &engine.handle, queued)
|
||||
.await
|
||||
.expect("submit normal message");
|
||||
|
||||
@@ -1504,8 +1507,9 @@ async fn numeric_plan_choice_still_queues_follow_up_when_busy() {
|
||||
app.is_loading = true;
|
||||
|
||||
let engine = crate::core::engine::mock_engine_handle();
|
||||
let config = Config::default();
|
||||
|
||||
let handled = handle_plan_choice(&mut app, &engine.handle, "2")
|
||||
let handled = handle_plan_choice(&mut app, &config, &engine.handle, "2")
|
||||
.await
|
||||
.expect("plan choice");
|
||||
|
||||
|
||||
@@ -275,7 +275,7 @@ impl FooterProps {
|
||||
// to cross the 60s threshold inside `footer_worked_chip`.
|
||||
let worked = footer_worked_chip(app.cumulative_turn_duration);
|
||||
Self {
|
||||
model: app.model.clone(),
|
||||
model: app.model_display_label(),
|
||||
mode_label,
|
||||
mode_color,
|
||||
text_dim_color: app.ui_theme.text_dim,
|
||||
|
||||
Reference in New Issue
Block a user