diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index c2cdc842..df096e0f 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -268,6 +268,48 @@ pub fn compaction_threshold_for_model(model: &str) -> usize { usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD) } +/// Mode-aware soft context caps for V4 models. +/// +/// DeepSeek V4 paper Figure 9 shows retrieval quality (MRCR MMR) collapses as +/// context grows: 0.90 at 8K, 0.94 at 32K, 0.92 at 128K, 0.66 at 512K, 0.49 +/// at 1M. The paper's own eval harness uses budget tiers per §5.3.1: +/// +/// | Mode / Reasoning tier | Soft cap | Paper eval window | +/// |-----------------------|----------|-------------------| +/// | Plan / Non-Think (off) | 64,000 | 8K-128K | +/// | Agent / High | 192,000 | 128K | +/// | YOLO / Max | 384,000 | 384K-512K | +/// +/// These caps keep the agent inside the regime DeepSeek tuned for, triggering +/// compaction *before* recall quality degrades. The 1M hard ceiling remains — +/// users can override via config or by declining the /compact suggestion. +pub const V4_PLAN_SOFT_CAP: usize = 64_000; +pub const V4_AGENT_SOFT_CAP: usize = 192_000; +pub const V4_YOLO_SOFT_CAP: usize = 384_000; + +/// Compaction threshold keyed by model and caller-supplied effort tier. +/// +/// For V4-family models the threshold is a mode-aware soft cap (see constants +/// above). For all other models the legacy 80%-of-window rule applies. +#[must_use] +pub fn compaction_threshold_for_model_and_effort( + model: &str, + reasoning_effort: Option<&str>, +) -> usize { + let lower = model.to_lowercase(); + if !lower.contains("deepseek") || !(lower.contains("v4") || is_current_deepseek_v4_alias(&lower)) + { + return compaction_threshold_for_model(model); + } + + match reasoning_effort.map(str::trim).filter(|s| !s.is_empty()) { + Some("off" | "disabled" | "none" | "false") => V4_PLAN_SOFT_CAP, + Some("low" | "medium" | "high") => V4_AGENT_SOFT_CAP, + Some("max" | "maximum" | "xhigh") => V4_YOLO_SOFT_CAP, + _ => V4_AGENT_SOFT_CAP, + } +} + /// Derive a compaction message-count threshold from model context window. #[must_use] pub fn compaction_message_threshold_for_model(model: &str) -> usize { @@ -446,12 +488,50 @@ mod tests { #[test] fn compaction_scales_for_deepseek_v4_1m_context() { - // 80% of 1M = 800k tokens before token-based compaction. assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000); - // 1M / 500 = 2k messages before message-count compaction. assert_eq!( compaction_message_threshold_for_model("deepseek-v4-pro"), 2_000 ); } + + #[test] + fn v4_mode_aware_soft_caps() { + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("off")), + V4_PLAN_SOFT_CAP + ); + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("high")), + V4_AGENT_SOFT_CAP + ); + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("max")), + V4_YOLO_SOFT_CAP + ); + } + + #[test] + fn v4_soft_caps_only_apply_to_v4_models() { + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v3.2-128k", Some("max")), + 102_400 + ); + assert_eq!( + compaction_threshold_for_model_and_effort("unknown-model", Some("max")), + 50_000 + ); + } + + #[test] + fn v4_soft_cap_defaults_to_agent_when_effort_unknown() { + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v4-pro", None), + V4_AGENT_SOFT_CAP + ); + assert_eq!( + compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("unknown")), + V4_AGENT_SOFT_CAP + ); + } } diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 3488a3de..c8a0b38d 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -14,6 +14,7 @@ use crate::core::coherence::CoherenceState; use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult}; use crate::models::{ Message, SystemPrompt, compaction_message_threshold_for_model, compaction_threshold_for_model, + compaction_threshold_for_model_and_effort, }; use crate::palette::{self, UiTheme}; use crate::settings::Settings; @@ -622,7 +623,10 @@ impl App { let max_input_history = settings.max_input_history; let ui_theme = palette::ui_theme(&settings.theme); let model = settings.default_model.clone().unwrap_or(model); - let compact_threshold = compaction_threshold_for_model(&model); + let compact_threshold = compaction_threshold_for_model_and_effort( + &model, + config.reasoning_effort(), + ); // Start in YOLO mode if --yolo flag was passed let preferred_mode = AppMode::from_setting(&settings.default_mode); @@ -1390,7 +1394,10 @@ impl App { } pub fn update_model_compaction_budget(&mut self) { - self.compact_threshold = compaction_threshold_for_model(&self.model); + self.compact_threshold = compaction_threshold_for_model_and_effort( + &self.model, + self.reasoning_effort.api_value(), + ); } pub fn compaction_config(&self) -> CompactionConfig {