Merge remote-tracking branch 'origin/pr/1385' into work/v0.8.34

# Conflicts: # crates/tui/src/commands/config.rs
2026-05-12 23:22:56 -05:00
parent dfd02e343c 5eb961760e
commit e254780e48
2 changed files with 162 additions and 9 deletions
@@ -732,17 +732,44 @@ fn expand_tilde(raw: &str) -> String {
 /// Messages with complex keywords → Pro.
 /// Default → Flash (cost savings).
 pub fn auto_model_heuristic(input: &str, _current_model: &str) -> String {
+    auto_model_heuristic_with_bias(input, _current_model, false)
+}
+
+/// `auto_model_heuristic` parameterised by the `[auto] cost_saving` opt-in
+/// (#1207). When `cost_saving` is `true` the keyword set drops the borderline
+/// triggers (`implement`, `analyze`) and the long-message length threshold
+/// goes from 500 to 1000 — both shifts let "looks involved but might be a
+/// one-liner" requests stay on Flash unless they actually look agentic.
+pub fn auto_model_heuristic_with_bias(
+    input: &str,
+    _current_model: &str,
+    cost_saving: bool,
+) -> String {
    let len = input.chars().count();
    let lower = input.to_lowercase();
-    if COMPLEX_KEYWORDS.iter().any(|kw| lower.contains(kw)) {
+    let borderline_pro_keywords: &[&str] = &[
+        "implement",
+        "analyze",
+        "\u{5b9e}\u{73b0}", // 实现
+        "\u{5206}\u{6790}", // 分析
+        "\u{5be6}\u{73fe}", // 實現
+    ];
+    let strong_match = COMPLEX_KEYWORDS.iter().any(|kw| {
+        !borderline_pro_keywords.contains(kw) && lower.contains(kw)
+    });
+    let borderline_match = borderline_pro_keywords.iter().any(|kw| lower.contains(kw));
+    let pro_match = strong_match || (!cost_saving && borderline_match);
+    if pro_match {
        return "deepseek-v4-pro".to_string();
    }
    // Short messages → Flash
    if len < 100 {
        return "deepseek-v4-flash".to_string();
    }
-    // Long complex requests → Pro
-    if len > 500 {
+    // Long complex requests → Pro. Cost-saving raises the threshold so that
+    // long-but-routine requests (pasted logs, CSV-style data) don't escalate.
+    let long_threshold = if cost_saving { 1_000 } else { 500 };
+    if len > long_threshold {
        return "deepseek-v4-pro".to_string();
    }
    // Default to Flash for cost savings
@@ -836,6 +863,16 @@ tool-heavy work, ambiguous requests, or anything that benefits from deeper reaso
 Use thinking off only for trivial no-tool answers, high for ordinary reasoning, and max for \
 agentic, coding, multi-file, release, architecture, debugging, security, tool-heavy, or uncertain work.";

+/// Bias appended to the auto-router's system prompt when the user opts in to
+/// `[auto] cost_saving = true` (#1207). Reverses the default tie-breaker for
+/// genuinely ambiguous requests so Pro is reserved for tasks that clearly
+/// require it; ordinary tweaks, config edits, and short reads stay on Flash.
+pub const AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM: &str = "\
+\n\nCost-saving mode is ON. Prefer deepseek-v4-flash for any request that is \
+not unmistakably agentic, multi-step, architecture/design, security review, \
+debugging, or otherwise clearly out of Flash's capability. Resolve ambiguous \
+cases in favour of deepseek-v4-flash, not deepseek-v4-pro.";
+
 /// Parse the Flash router's JSON-only response.
 ///
 /// The runtime treats classifier output as untrusted: only known V4 model IDs
@@ -898,6 +935,7 @@ pub async fn resolve_auto_route_with_flash(
    selected_model_mode: &str,
    selected_thinking_mode: &str,
 ) -> AutoRouteSelection {
+    let cost_saving = config.auto_cost_saving();
    match auto_route_flash_recommendation(
        config,
        latest_request,
@@ -912,13 +950,17 @@ pub async fn resolve_auto_route_with_flash(
            reasoning_effort: recommendation.reasoning_effort,
            source: AutoRouteSource::FlashRouter,
        },
-        Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode),
+        Ok(None) | Err(_) => fallback_auto_route(latest_request, selected_model_mode, cost_saving),
    }
 }

-fn fallback_auto_route(latest_request: &str, selected_model_mode: &str) -> AutoRouteSelection {
+fn fallback_auto_route(
+    latest_request: &str,
+    selected_model_mode: &str,
+    cost_saving: bool,
+) -> AutoRouteSelection {
    AutoRouteSelection {
-        model: auto_model_heuristic(latest_request, selected_model_mode),
+        model: auto_model_heuristic_with_bias(latest_request, selected_model_mode, cost_saving),
        reasoning_effort: Some(normalize_auto_route_effort(crate::auto_reasoning::select(
            false,
            latest_request,
@@ -939,6 +981,10 @@ async fn auto_route_flash_recommendation(
    }

    let client = DeepSeekClient::new(config)?;
+    let mut router_system = AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string();
+    if config.auto_cost_saving() {
+        router_system.push_str(AUTO_MODEL_ROUTER_COST_SAVING_ADDENDUM);
+    }
    let request = MessageRequest {
        model: "deepseek-v4-flash".to_string(),
        messages: vec![Message {
@@ -954,9 +1000,7 @@ async fn auto_route_flash_recommendation(
            }],
        }],
        max_tokens: 96,
-        system: Some(SystemPrompt::Text(
-            AUTO_MODEL_ROUTER_SYSTEM_PROMPT.to_string(),
-        )),
+        system: Some(SystemPrompt::Text(router_system)),
        tools: None,
        tool_choice: None,
        metadata: None,
@@ -1409,6 +1453,85 @@ mod tests {
        );
    }

+    #[test]
+    fn auto_heuristic_default_routes_implement_to_pro() {
+        // Default (no cost-saving): "implement" is one of the borderline
+        // keywords that escalates to Pro.
+        assert_eq!(
+            auto_model_heuristic_with_bias("Please implement a binary search", "auto", false),
+            "deepseek-v4-pro"
+        );
+    }
+
+    #[test]
+    fn auto_heuristic_cost_saving_keeps_borderline_keywords_on_flash() {
+        // Cost-saving: "implement" / "analyze" are no longer enough to escalate.
+        assert_eq!(
+            auto_model_heuristic_with_bias("Please implement a binary search", "auto", true),
+            "deepseek-v4-flash"
+        );
+        assert_eq!(
+            auto_model_heuristic_with_bias("analyze this snippet", "auto", true),
+            "deepseek-v4-flash"
+        );
+    }
+
+    #[test]
+    fn auto_heuristic_strong_keywords_still_route_to_pro_under_cost_saving() {
+        // Cost-saving must NOT swallow obviously Pro-grade work.
+        for kw in [
+            "refactor",
+            "architecture",
+            "design",
+            "debug",
+            "security",
+            "review",
+            "audit",
+            "migrate",
+            "optimize",
+            "rewrite",
+        ] {
+            let req = format!("Please {kw} this module");
+            assert_eq!(
+                auto_model_heuristic_with_bias(&req, "auto", true),
+                "deepseek-v4-pro",
+                "expected Pro for strong keyword `{kw}` even in cost-saving mode"
+            );
+        }
+    }
+
+    #[test]
+    fn auto_heuristic_cost_saving_raises_long_message_threshold() {
+        // 600-char request is "long" by default (>500) → Pro,
+        // but stays Flash under cost-saving (threshold 1000).
+        let body = "filler sentence. ".repeat(40); // ~680 chars
+        assert_eq!(
+            auto_model_heuristic_with_bias(&body, "auto", false),
+            "deepseek-v4-pro"
+        );
+        assert_eq!(
+            auto_model_heuristic_with_bias(&body, "auto", true),
+            "deepseek-v4-flash"
+        );
+    }
+
+    #[test]
+    fn config_auto_cost_saving_defaults_to_false() {
+        let cfg = crate::config::Config::default();
+        assert!(!cfg.auto_cost_saving());
+    }
+
+    #[test]
+    fn config_auto_cost_saving_reads_table() {
+        let cfg = crate::config::Config {
+            auto: Some(crate::config::AutoConfig {
+                cost_saving: Some(true),
+            }),
+            ..Default::default()
+        };
+        assert!(cfg.auto_cost_saving());
+    }
+
    #[test]
    fn test_set_default_mode_normal_save_reports_normalized_value() {
        let nanos = SystemTime::now()
@@ -804,6 +804,18 @@ pub struct SubagentsConfig {
    pub max_concurrent: Option<usize>,
 }

+/// `[auto]` table — knobs for the `--model auto` / `/model auto` router.
+///
+/// `cost_saving` (#1207): when `true`, the auto-mode router prefers
+/// `deepseek-v4-flash` for ambiguous requests, only escalating to
+/// `deepseek-v4-pro` when the task clearly benefits from deeper reasoning.
+/// Default is `false` (balanced — match the existing routing voice).
+#[derive(Debug, Clone, Deserialize, Default)]
+pub struct AutoConfig {
+    #[serde(default)]
+    pub cost_saving: Option<bool>,
+}
+
 /// Resolved CLI configuration, including defaults and environment overrides.
 #[derive(Debug, Clone, Default, Deserialize)]
 pub struct Config {
@@ -897,6 +909,11 @@ pub struct Config {
    #[serde(default)]
    pub memory: Option<MemoryConfig>,

+    /// Tunables for `--model auto` (#1207). When absent, the auto router
+    /// keeps its existing balanced behaviour.
+    #[serde(default)]
+    pub auto: Option<AutoConfig>,
+
    /// Post-edit LSP diagnostics injection (#136). When absent, the engine
    /// applies the defaults documented in [`LspConfigToml`].
    #[serde(default)]
@@ -1142,6 +1159,18 @@ struct RequirementsFile {
 // === Config Loading ===

 impl Config {
+    /// Return `true` if the `[auto] cost_saving = true` opt-in is set
+    /// (#1207). When true, the auto-mode router biases toward
+    /// `deepseek-v4-flash` for ambiguous requests instead of escalating to
+    /// `deepseek-v4-pro`. Default: `false` (balanced behaviour).
+    #[must_use]
+    pub fn auto_cost_saving(&self) -> bool {
+        self.auto
+            .as_ref()
+            .and_then(|a| a.cost_saving)
+            .unwrap_or(false)
+    }
+
    /// Load configuration from disk and merge with environment overrides.
    ///
    /// # Examples
@@ -2705,6 +2734,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
        snapshots: override_cfg.snapshots.or(base.snapshots),
        search: override_cfg.search.or(base.search),
        memory: override_cfg.memory.or(base.memory),
+        auto: override_cfg.auto.or(base.auto),
        lsp: override_cfg.lsp.or(base.lsp),
        context: ContextConfig {
            enabled: override_cfg.context.enabled.or(base.context.enabled),