feat(#27): per-mode soft context budget for V4 compaction trigger
Add compaction_threshold_for_model_and_effort() with mode-aware soft caps based on DeepSeek V4 paper Figure 9 recall-quality data: Plan / off -> 64K (paper eval: 8K-128K) Agent / high -> 192K (paper eval: 128K) YOLO / max -> 384K (paper eval: 384K-512K) Previously, the 80%-of-window rule gave 800K for V4's 1M window, which is well past the point where MRCR MMR collapses (0.49 at 1M). Non-V4 models keep the legacy 80% rule. None/unknown effort defaults to agent-tier (192K).
This commit is contained in:
@@ -268,6 +268,48 @@ pub fn compaction_threshold_for_model(model: &str) -> usize {
|
|||||||
usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD)
|
usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Mode-aware soft context caps for V4 models.
|
||||||
|
///
|
||||||
|
/// DeepSeek V4 paper Figure 9 shows retrieval quality (MRCR MMR) collapses as
|
||||||
|
/// context grows: 0.90 at 8K, 0.94 at 32K, 0.92 at 128K, 0.66 at 512K, 0.49
|
||||||
|
/// at 1M. The paper's own eval harness uses budget tiers per §5.3.1:
|
||||||
|
///
|
||||||
|
/// | Mode / Reasoning tier | Soft cap | Paper eval window |
|
||||||
|
/// |-----------------------|----------|-------------------|
|
||||||
|
/// | Plan / Non-Think (off) | 64,000 | 8K-128K |
|
||||||
|
/// | Agent / High | 192,000 | 128K |
|
||||||
|
/// | YOLO / Max | 384,000 | 384K-512K |
|
||||||
|
///
|
||||||
|
/// These caps keep the agent inside the regime DeepSeek tuned for, triggering
|
||||||
|
/// compaction *before* recall quality degrades. The 1M hard ceiling remains —
|
||||||
|
/// users can override via config or by declining the /compact suggestion.
|
||||||
|
pub const V4_PLAN_SOFT_CAP: usize = 64_000;
|
||||||
|
pub const V4_AGENT_SOFT_CAP: usize = 192_000;
|
||||||
|
pub const V4_YOLO_SOFT_CAP: usize = 384_000;
|
||||||
|
|
||||||
|
/// Compaction threshold keyed by model and caller-supplied effort tier.
|
||||||
|
///
|
||||||
|
/// For V4-family models the threshold is a mode-aware soft cap (see constants
|
||||||
|
/// above). For all other models the legacy 80%-of-window rule applies.
|
||||||
|
#[must_use]
|
||||||
|
pub fn compaction_threshold_for_model_and_effort(
|
||||||
|
model: &str,
|
||||||
|
reasoning_effort: Option<&str>,
|
||||||
|
) -> usize {
|
||||||
|
let lower = model.to_lowercase();
|
||||||
|
if !lower.contains("deepseek") || !(lower.contains("v4") || is_current_deepseek_v4_alias(&lower))
|
||||||
|
{
|
||||||
|
return compaction_threshold_for_model(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
match reasoning_effort.map(str::trim).filter(|s| !s.is_empty()) {
|
||||||
|
Some("off" | "disabled" | "none" | "false") => V4_PLAN_SOFT_CAP,
|
||||||
|
Some("low" | "medium" | "high") => V4_AGENT_SOFT_CAP,
|
||||||
|
Some("max" | "maximum" | "xhigh") => V4_YOLO_SOFT_CAP,
|
||||||
|
_ => V4_AGENT_SOFT_CAP,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Derive a compaction message-count threshold from model context window.
|
/// Derive a compaction message-count threshold from model context window.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn compaction_message_threshold_for_model(model: &str) -> usize {
|
pub fn compaction_message_threshold_for_model(model: &str) -> usize {
|
||||||
@@ -446,12 +488,50 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn compaction_scales_for_deepseek_v4_1m_context() {
|
fn compaction_scales_for_deepseek_v4_1m_context() {
|
||||||
// 80% of 1M = 800k tokens before token-based compaction.
|
|
||||||
assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000);
|
assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000);
|
||||||
// 1M / 500 = 2k messages before message-count compaction.
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
compaction_message_threshold_for_model("deepseek-v4-pro"),
|
compaction_message_threshold_for_model("deepseek-v4-pro"),
|
||||||
2_000
|
2_000
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn v4_mode_aware_soft_caps() {
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("off")),
|
||||||
|
V4_PLAN_SOFT_CAP
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("high")),
|
||||||
|
V4_AGENT_SOFT_CAP
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("max")),
|
||||||
|
V4_YOLO_SOFT_CAP
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn v4_soft_caps_only_apply_to_v4_models() {
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v3.2-128k", Some("max")),
|
||||||
|
102_400
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
|
||||||
|
50_000
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn v4_soft_cap_defaults_to_agent_when_effort_unknown() {
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v4-pro", None),
|
||||||
|
V4_AGENT_SOFT_CAP
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("unknown")),
|
||||||
|
V4_AGENT_SOFT_CAP
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ use crate::core::coherence::CoherenceState;
|
|||||||
use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
|
use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
Message, SystemPrompt, compaction_message_threshold_for_model, compaction_threshold_for_model,
|
Message, SystemPrompt, compaction_message_threshold_for_model, compaction_threshold_for_model,
|
||||||
|
compaction_threshold_for_model_and_effort,
|
||||||
};
|
};
|
||||||
use crate::palette::{self, UiTheme};
|
use crate::palette::{self, UiTheme};
|
||||||
use crate::settings::Settings;
|
use crate::settings::Settings;
|
||||||
@@ -622,7 +623,10 @@ impl App {
|
|||||||
let max_input_history = settings.max_input_history;
|
let max_input_history = settings.max_input_history;
|
||||||
let ui_theme = palette::ui_theme(&settings.theme);
|
let ui_theme = palette::ui_theme(&settings.theme);
|
||||||
let model = settings.default_model.clone().unwrap_or(model);
|
let model = settings.default_model.clone().unwrap_or(model);
|
||||||
let compact_threshold = compaction_threshold_for_model(&model);
|
let compact_threshold = compaction_threshold_for_model_and_effort(
|
||||||
|
&model,
|
||||||
|
config.reasoning_effort(),
|
||||||
|
);
|
||||||
|
|
||||||
// Start in YOLO mode if --yolo flag was passed
|
// Start in YOLO mode if --yolo flag was passed
|
||||||
let preferred_mode = AppMode::from_setting(&settings.default_mode);
|
let preferred_mode = AppMode::from_setting(&settings.default_mode);
|
||||||
@@ -1390,7 +1394,10 @@ impl App {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_model_compaction_budget(&mut self) {
|
pub fn update_model_compaction_budget(&mut self) {
|
||||||
self.compact_threshold = compaction_threshold_for_model(&self.model);
|
self.compact_threshold = compaction_threshold_for_model_and_effort(
|
||||||
|
&self.model,
|
||||||
|
self.reasoning_effort.api_value(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compaction_config(&self) -> CompactionConfig {
|
pub fn compaction_config(&self) -> CompactionConfig {
|
||||||
|
|||||||
Reference in New Issue
Block a user