fix(compaction): default to off + raise unknown-model floor to 80% (#664)

Two coordinated changes that stop the engine from routinely rewriting the
prompt prefix and burning DeepSeek V4's prefix-cache discount:

1. `Settings::default().auto_compact` flips from `true` to `false`. The
   `auto_compact = on` opt-in and the explicit `/compact` slash command
   stay available for users / agents that decide their workload benefits
   from compaction more than from cache stability. With V4's 1M-token
   window the user has plenty of headroom to run long sessions without
   auto-trimming, and aggressive compaction has been the dominant
   cost-spike vector in long sessions (the rewritten prefix invalidates
   ~90% of the cache discount on every compaction event).

2. `DEFAULT_COMPACTION_TOKEN_THRESHOLD` raised from `50_000` to
   `102_400` (80% of `DEFAULT_CONTEXT_WINDOW_TOKENS = 128_000`). This is
   the last-resort threshold used when `context_window_for_model` returns
   `None` — i.e. an unrecognised model id. Pre-v0.8.11 the fallback
   compacted at ~5% of a V4 window when model detection silently fell
   through. Now the fallback inherits the same late-trigger discipline as
   the V4 path, so model-detection drift doesn't quietly burn cache.

Together: the two changes mean compaction never fires automatically by
default, and even when explicitly opted in (or when the runtime-thread /
capacity-flow paths invoke compaction with their own `enabled = true`
config), the threshold is anchored at 80% of the model's context window
(or 80% of the 128K default if the model is unknown), never below.

Tests
=====

- `default_settings_disable_auto_compact_to_protect_v4_prefix_cache` —
  pins the new default and explains the rationale inline.
- `auto_compact_remains_explicitly_configurable` — unchanged; still
  asserts the `set("auto_compact", "on" | "off")` round-trip works.
- `compaction_threshold_scales_with_context_window` — updated to assert
  `compaction_threshold_for_model("unknown-model") == 102_400`.
- `v4_soft_caps_only_apply_to_v4_models` — updated to assert the
  unknown-model + reasoning-effort path also lands on the new floor.

Verification
============

- `cargo fmt --all -- --check` clean.
- `cargo clippy -p deepseek-tui --bin deepseek-tui --all-features
  --locked -- -D warnings` clean.
- `cargo test -p deepseek-tui --bin deepseek-tui --locked` →
  2028 passed, 2 ignored.

Refs #664 (handoff-instead-of-compact pattern, full implementation
deferred). Behaviour-only change for v0.8.11; the larger
agent-aware-handoff mechanism is its own design surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-05-04 19:28:02 -05:00
parent a61da8de2c
commit fc4f1e6564
3 changed files with 41 additions and 8 deletions
+3
View File
@@ -78,3 +78,6 @@ apps/
# Local-only Claude / ralph notes
.claude/*.local.md
.claude/*.local.json
# Maintainer-internal design notes (trade-secret material, never published)
.private/
+20 -3
View File
@@ -4,7 +4,14 @@ use serde::{Deserialize, Serialize};
pub const DEFAULT_CONTEXT_WINDOW_TOKENS: u32 = 128_000;
pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 50_000;
/// Last-resort compaction trigger when [`context_window_for_model`] returns
/// `None` (an unrecognised model id). v0.8.11 raised this from `50_000` to
/// `102_400` (80% of [`DEFAULT_CONTEXT_WINDOW_TOKENS`]) so unknown models
/// inherit the same late-trigger discipline as V4 instead of paying the
/// prefix-cache hit at 5% of the V4 window. Known DeepSeek / Claude models
/// resolve to their own scaled value via [`compaction_threshold_for_model`]
/// (#664).
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
const COMPACTION_MESSAGE_DIVISOR: u32 = 500;
@@ -450,7 +457,13 @@ mod tests {
compaction_threshold_for_model("deepseek-v3.2-128k"),
102_400
);
assert_eq!(compaction_threshold_for_model("unknown-model"), 50_000);
// v0.8.11 (#664): unknown-model fallback also resolves to 80% of
// `DEFAULT_CONTEXT_WINDOW_TOKENS` (128k) — same late-trigger
// discipline as the V4 path. Was `50_000` pre-v0.8.11; that
// hardcoded value compacted at ~5% of a 1M window when the model
// detection silently fell through, which is exactly the
// prefix-cache-burning behaviour we're getting away from.
assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
}
#[test]
@@ -495,9 +508,13 @@ mod tests {
compaction_threshold_for_model_and_effort("deepseek-v3.2-128k", Some("max")),
102_400
);
// v0.8.11 (#664): unknown-model fallback also lands on the
// 80%-of-128K floor instead of the legacy hardcoded 50K, so
// model-detection-fall-through doesn't quietly burn V4 prefix
// cache at 5%-of-window.
assert_eq!(
compaction_threshold_for_model_and_effort("unknown-model", Some("max")),
50_000
102_400
);
}
+18 -5
View File
@@ -59,7 +59,16 @@ pub struct Settings {
impl Default for Settings {
fn default() -> Self {
Self {
auto_compact: true,
// v0.8.11: default flipped to `false` to stop the engine from
// routinely rewriting the prompt prefix, which breaks DeepSeek
// V4's prefix cache (~90% discount on cached prefix tokens) and
// ends up costing more than the compaction itself saves. With
// V4's 1M-token window the user has plenty of headroom to run
// long sessions without auto-trimming, and the explicit
// `/compact` slash command + `auto_compact = on` opt-in remain
// available for users / agents that decide compaction is
// worth the cache hit on their workload (#664).
auto_compact: false,
calm_mode: false,
low_motion: false,
fancy_animations: false,
@@ -451,11 +460,15 @@ mod tests {
use super::*;
#[test]
fn default_settings_enable_auto_compact_for_session_survivability() {
fn default_settings_disable_auto_compact_to_protect_v4_prefix_cache() {
let settings = Settings::default();
// #402 P0: auto-compaction is on by default so long-running
// sessions stay within the model's context budget.
assert!(settings.auto_compact);
// v0.8.11: default is `false` to stop the engine from routinely
// rewriting the prompt prefix, which breaks V4's prefix-cache
// discount. The explicit `/compact` command and the
// `auto_compact = on` opt-in stay available; the default is
// flipped so the cache-friendly path is the one users get
// without configuring anything (#664).
assert!(!settings.auto_compact);
}
#[test]