From fc4f1e656475021185f14a8584eebaceff041229 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Mon, 4 May 2026 19:28:02 -0500 Subject: [PATCH] fix(compaction): default to off + raise unknown-model floor to 80% (#664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coordinated changes that stop the engine from routinely rewriting the prompt prefix and burning DeepSeek V4's prefix-cache discount: 1. `Settings::default().auto_compact` flips from `true` to `false`. The `auto_compact = on` opt-in and the explicit `/compact` slash command stay available for users / agents that decide their workload benefits from compaction more than from cache stability. With V4's 1M-token window the user has plenty of headroom to run long sessions without auto-trimming, and aggressive compaction has been the dominant cost-spike vector in long sessions (the rewritten prefix invalidates ~90% of the cache discount on every compaction event). 2. `DEFAULT_COMPACTION_TOKEN_THRESHOLD` raised from `50_000` to `102_400` (80% of `DEFAULT_CONTEXT_WINDOW_TOKENS = 128_000`). This is the last-resort threshold used when `context_window_for_model` returns `None` — i.e. an unrecognised model id. Pre-v0.8.11 the fallback compacted at ~5% of a V4 window when model detection silently fell through. Now the fallback inherits the same late-trigger discipline as the V4 path, so model-detection drift doesn't quietly burn cache. Together: the two changes mean compaction never fires automatically by default, and even when explicitly opted in (or when the runtime-thread / capacity-flow paths invoke compaction with their own `enabled = true` config), the threshold is anchored at 80% of the model's context window (or 80% of the 128K default if the model is unknown), never below. Tests ===== - `default_settings_disable_auto_compact_to_protect_v4_prefix_cache` — pins the new default and explains the rationale inline. - `auto_compact_remains_explicitly_configurable` — unchanged; still asserts the `set("auto_compact", "on" | "off")` round-trip works. - `compaction_threshold_scales_with_context_window` — updated to assert `compaction_threshold_for_model("unknown-model") == 102_400`. - `v4_soft_caps_only_apply_to_v4_models` — updated to assert the unknown-model + reasoning-effort path also lands on the new floor. Verification ============ - `cargo fmt --all -- --check` clean. - `cargo clippy -p deepseek-tui --bin deepseek-tui --all-features --locked -- -D warnings` clean. - `cargo test -p deepseek-tui --bin deepseek-tui --locked` → 2028 passed, 2 ignored. Refs #664 (handoff-instead-of-compact pattern, full implementation deferred). Behaviour-only change for v0.8.11; the larger agent-aware-handoff mechanism is its own design surface. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 3 +++ crates/tui/src/models.rs | 23 ++++++++++++++++++++--- crates/tui/src/settings.rs | 23 ++++++++++++++++++----- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 0e6bd06b..90595039 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,6 @@ apps/ # Local-only Claude / ralph notes .claude/*.local.md .claude/*.local.json + +# Maintainer-internal design notes (trade-secret material, never published) +.private/ diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 9a274c40..8b1413ee 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -4,7 +4,14 @@ use serde::{Deserialize, Serialize}; pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000; pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000; -pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 50_000; +/// Last-resort compaction trigger when [`context_window_for_model`] returns +/// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to +/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models +/// inherit the same late-trigger discipline as V4 instead of paying the +/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models +/// resolve to their own scaled value via [`compaction_threshold_for_model`] +/// (#664). +pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400; pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50; const COMPACTION_THRESHOLD_PERCENT: u32 = 80; const COMPACTION_MESSAGE_DIVISOR: u32 = 500; @@ -450,7 +457,13 @@ mod tests { compaction_threshold_for_model("deepseek-v3.2-128k"), 102_400 ); - assert_eq!(compaction_threshold_for_model("unknown-model"), 50_000); + // v0.8.11 (#664): unknown-model fallback also resolves to 80% of + // `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger + // discipline as the V4 path. Was `50_000` pre-v0.8.11; that + // hardcoded value compacted at ~5% of a 1M window when the model + // detection silently fell through, which is exactly the + // prefix-cache-burning behaviour we're getting away from. + assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400); } #[test] @@ -495,9 +508,13 @@ mod tests { compaction_threshold_for_model_and_effort("deepseek-v3.2-128k", Some("max")), 102_400 ); + // v0.8.11 (#664): unknown-model fallback also lands on the + // 80%-of-128K floor instead of the legacy hardcoded 50K, so + // model-detection-fall-through doesn't quietly burn V4 prefix + // cache at 5%-of-window. assert_eq!( compaction_threshold_for_model_and_effort("unknown-model", Some("max")), - 50_000 + 102_400 ); } diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index c84b0f04..45076746 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -59,7 +59,16 @@ pub struct Settings { impl Default for Settings { fn default() -> Self { Self { - auto_compact: true, + // v0.8.11: default flipped to `false` to stop the engine from + // routinely rewriting the prompt prefix, which breaks DeepSeek + // V4's prefix cache (~90% discount on cached prefix tokens) and + // ends up costing more than the compaction itself saves. With + // V4's 1M-token window the user has plenty of headroom to run + // long sessions without auto-trimming, and the explicit + // `/compact` slash command + `auto_compact = on` opt-in remain + // available for users / agents that decide compaction is + // worth the cache hit on their workload (#664). + auto_compact: false, calm_mode: false, low_motion: false, fancy_animations: false, @@ -451,11 +460,15 @@ mod tests { use super::*; #[test] - fn default_settings_enable_auto_compact_for_session_survivability() { + fn default_settings_disable_auto_compact_to_protect_v4_prefix_cache() { let settings = Settings::default(); - // #402 P0: auto-compaction is on by default so long-running - // sessions stay within the model's context budget. - assert!(settings.auto_compact); + // v0.8.11: default is `false` to stop the engine from routinely + // rewriting the prompt prefix, which breaks V4's prefix-cache + // discount. The explicit `/compact` command and the + // `auto_compact = on` opt-in stay available; the default is + // flipped so the cache-friendly path is the one users get + // without configuring anything (#664). + assert!(!settings.auto_compact); } #[test]