Merge remote-tracking branch 'origin/pr/1385' into work/v0.8.34

# Conflicts:
#	crates/tui/src/commands/config.rs
This commit is contained in:
Hunter Bown
2026-05-12 23:22:56 -05:00
2 changed files with 162 additions and 9 deletions
+132 -9
View File
@@ -732,17 +732,44 @@ fn expand_tilde(raw: &str) -> String {
/// Messages with complex keywords → Pro.
/// Default → Flash (cost savings).
pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
auto_model_heuristic_with_bias(input, _current_model, false)
}
/// `auto_model_heuristic` parameterised by the `[auto] cost_saving` opt-in
/// (#1207). When `cost_saving` is `true` the keyword set drops the borderline
/// triggers (`implement`, `analyze`) and the long-message length threshold
/// goes from 500 to 1000 — both shifts let "looks involved but might be a
/// one-liner" requests stay on Flash unless they actually look agentic.
pub fn auto_model_heuristic_with_bias(
input: &str,
_current_model: &str,
cost_saving: bool,
) -> String {
let len = input.chars().count();
let lower = input.to_lowercase();
if COMPLEX_KEYWORDS.iter().any(|kw| lower.contains(kw)) {
let borderline_pro_keywords: &[&str] = &[
"implement",
"analyze",
"\u{5b9e}\u{73b0}", // 实现
"\u{5206}\u{6790}", // 分析
"\u{5be6}\u{73fe}", // 實現
];
let strong_match = COMPLEX_KEYWORDS.iter().any(|kw| {
!borderline_pro_keywords.contains(kw) && lower.contains(kw)
});
let borderline_match = borderline_pro_keywords.iter().any(|kw| lower.contains(kw));
let pro_match = strong_match || (!cost_saving && borderline_match);
if pro_match {
return "deepseek-v4-pro".to_string();
}
// Short messages → Flash
if len < 100 {
return "deepseek-v4-flash".to_string();
}
// Long complex requests → Pro
if len > 500 {
// Long complex requests → Pro. Cost-saving raises the threshold so that
// long-but-routine requests (pasted logs, CSV-style data) don't escalate.
let long_threshold = if cost_saving { 1_000 } else { 500 };
if len > long_threshold {
return "deepseek-v4-pro".to_string();
}
// Default to Flash for cost savings
@@ -836,6 +863,16 @@ tool-heavy work, ambiguous requests, or anything that benefits from deeper reaso
Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";
/// Bias appended to the auto-router's system prompt when the user opts in to
/// `[auto] cost_saving = true` (#1207). Reverses the default tie-breaker for
/// genuinely ambiguous requests so Pro is reserved for tasks that clearly
/// require it; ordinary tweaks, config edits, and short reads stay on Flash.
pub const AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM: &str = "\
\n\nCost-saving mode is ON. Prefer deepseek-v4-flash for any request that is \
not unmistakably agentic, multi-step, architecture/design, security review, \
debugging, or otherwise clearly out of Flash's capability. Resolve ambiguous \
cases in favour of deepseek-v4-flash, not deepseek-v4-pro.";
/// Parse the Flash router's JSON-only response.
///
/// The runtime treats classifier output as untrusted: only known V4 model IDs
@@ -898,6 +935,7 @@ pub async fn resolve_auto_route_with_flash(
selected_model_mode: &str,
selected_thinking_mode: &str,
) -> AutoRouteSelection {
let cost_saving = config.auto_cost_saving();
match auto_route_flash_recommendation(
config,
latest_request,
@@ -912,13 +950,17 @@ pub async fn resolve_auto_route_with_flash(
reasoning_effort: recommendation.reasoning_effort,
source: AutoRouteSource::FlashRouter,
},
Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode, cost_saving),
}
}
fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
fn fallback_auto_route(
latest_request: &str,
selected_model_mode: &str,
cost_saving: bool,
) -> AutoRouteSelection {
AutoRouteSelection {
model: auto_model_heuristic(latest_request, selected_model_mode),
model: auto_model_heuristic_with_bias(latest_request, selected_model_mode, cost_saving),
reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
false,
latest_request,
@@ -939,6 +981,10 @@ async fn auto_route_flash_recommendation(
}
let client = DeepSeekClient::new(config)?;
let mut router_system = AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string();
if config.auto_cost_saving() {
router_system.push_str(AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM);
}
let request = MessageRequest {
model: "deepseek-v4-flash".to_string(),
messages: vec![Message {
@@ -954,9 +1000,7 @@ async fn auto_route_flash_recommendation(
}],
}],
max_tokens: 96,
system: Some(SystemPrompt::Text(
AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
)),
system: Some(SystemPrompt::Text(router_system)),
tools: None,
tool_choice: None,
metadata: None,
@@ -1409,6 +1453,85 @@ mod tests {
);
}
#[test]
fn auto_heuristic_default_routes_implement_to_pro() {
// Default (no cost-saving): "implement" is one of the borderline
// keywords that escalates to Pro.
assert_eq!(
auto_model_heuristic_with_bias("Please implement a binary search", "auto", false),
"deepseek-v4-pro"
);
}
#[test]
fn auto_heuristic_cost_saving_keeps_borderline_keywords_on_flash() {
// Cost-saving: "implement" / "analyze" are no longer enough to escalate.
assert_eq!(
auto_model_heuristic_with_bias("Please implement a binary search", "auto", true),
"deepseek-v4-flash"
);
assert_eq!(
auto_model_heuristic_with_bias("analyze this snippet", "auto", true),
"deepseek-v4-flash"
);
}
#[test]
fn auto_heuristic_strong_keywords_still_route_to_pro_under_cost_saving() {
// Cost-saving must NOT swallow obviously Pro-grade work.
for kw in [
"refactor",
"architecture",
"design",
"debug",
"security",
"review",
"audit",
"migrate",
"optimize",
"rewrite",
] {
let req = format!("Please {kw} this module");
assert_eq!(
auto_model_heuristic_with_bias(&req, "auto", true),
"deepseek-v4-pro",
"expected Pro for strong keyword `{kw}` even in cost-saving mode"
);
}
}
#[test]
fn auto_heuristic_cost_saving_raises_long_message_threshold() {
// 600-char request is "long" by default (>500) → Pro,
// but stays Flash under cost-saving (threshold 1000).
let body = "filler sentence. ".repeat(40); // ~680 chars
assert_eq!(
auto_model_heuristic_with_bias(&body, "auto", false),
"deepseek-v4-pro"
);
assert_eq!(
auto_model_heuristic_with_bias(&body, "auto", true),
"deepseek-v4-flash"
);
}
#[test]
fn config_auto_cost_saving_defaults_to_false() {
let cfg = crate::config::Config::default();
assert!(!cfg.auto_cost_saving());
}
#[test]
fn config_auto_cost_saving_reads_table() {
let cfg = crate::config::Config {
auto: Some(crate::config::AutoConfig {
cost_saving: Some(true),
}),
..Default::default()
};
assert!(cfg.auto_cost_saving());
}
#[test]
fn test_set_default_mode_normal_save_reports_normalized_value() {
let nanos = SystemTime::now()
+30
View File
@@ -804,6 +804,18 @@ pub struct SubagentsConfig {
pub max_concurrent: Option<usize>,
}
/// `[auto]` table — knobs for the `--model auto` / `/model auto` router.
///
/// `cost_saving` (#1207): when `true`, the auto-mode router prefers
/// `deepseek-v4-flash` for ambiguous requests, only escalating to
/// `deepseek-v4-pro` when the task clearly benefits from deeper reasoning.
/// Default is `false` (balanced — match the existing routing voice).
#[derive(Debug, Clone, Deserialize, Default)]
pub struct AutoConfig {
#[serde(default)]
pub cost_saving: Option<bool>,
}
/// Resolved CLI configuration, including defaults and environment overrides.
#[derive(Debug, Clone, Default, Deserialize)]
pub struct Config {
@@ -897,6 +909,11 @@ pub struct Config {
#[serde(default)]
pub memory: Option<MemoryConfig>,
/// Tunables for `--model auto` (#1207). When absent, the auto router
/// keeps its existing balanced behaviour.
#[serde(default)]
pub auto: Option<AutoConfig>,
/// Post-edit LSP diagnostics injection (#136). When absent, the engine
/// applies the defaults documented in [`LspConfigToml`].
#[serde(default)]
@@ -1142,6 +1159,18 @@ struct RequirementsFile {
// === Config Loading ===
impl Config {
/// Return `true` if the `[auto] cost_saving = true` opt-in is set
/// (#1207). When true, the auto-mode router biases toward
/// `deepseek-v4-flash` for ambiguous requests instead of escalating to
/// `deepseek-v4-pro`. Default: `false` (balanced behaviour).
#[must_use]
pub fn auto_cost_saving(&self) -> bool {
self.auto
.as_ref()
.and_then(|a| a.cost_saving)
.unwrap_or(false)
}
/// Load configuration from disk and merge with environment overrides.
///
/// # Examples
@@ -2705,6 +2734,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
snapshots: override_cfg.snapshots.or(base.snapshots),
search: override_cfg.search.or(base.search),
memory: override_cfg.memory.or(base.memory),
auto: override_cfg.auto.or(base.auto),
lsp: override_cfg.lsp.or(base.lsp),
context: ContextConfig {
enabled: override_cfg.context.enabled.or(base.context.enabled),