fix(compaction): 500K hard floor plus V4 default

2026-05-04 22:06:07 -05:00
parent 43bb055dfe
commit a4dee56fcc
1 changed files with 119 additions and 6 deletions
@@ -25,25 +25,58 @@ pub struct CompactionConfig {
    pub message_threshold: usize,
    pub model: String,
    pub cache_summary: bool,
+    /// Hard floor — `should_compact` returns `false` when total session
+    /// tokens fall below this number, regardless of `enabled`,
+    /// `token_threshold`, or `message_threshold`. Defaults to
+    /// [`MINIMUM_AUTO_COMPACTION_TOKENS`] (500K) for v0.8.11+. Tests that
+    /// want to exercise the older threshold/message-count logic at small
+    /// fixture sizes can set this to `0` to disable the floor.
+    pub auto_floor_tokens: usize,
 }

 impl Default for CompactionConfig {
    fn default() -> Self {
        Self {
-            // ON BY DEFAULT since v0.8.6 (#402 P0 survivability).
-            // Long-running sessions need automatic compaction to stay
-            // within the model's context budget. Users who prefer the
-            // previous behaviour can opt out via `auto_compact = false`
-            // in settings or `compaction.enabled = false` in config.
+            // ON BY DEFAULT since v0.8.6 (#402 P0 survivability) — but the
+            // engine-level `auto_compact` setting was flipped OFF in v0.8.11
+            // (#665) so this default is mostly a fallback for code paths
+            // that build a `CompactionConfig` without going through
+            // `compaction_threshold_for_model_and_effort`. Real per-model
+            // values are still derived through that helper.
            enabled: true,
-            token_threshold: 50000,
+            // v0.8.11: 50K was a 128K-era leftover that biased every
+            // unconfigured caller toward "compact almost immediately on V4."
+            // Bumped to 800K (80% of V4's 1M window) so the dead-code
+            // default no longer lies. Real call sites override this via
+            // `compaction_threshold_for_model_and_effort`.
+            token_threshold: 800_000,
            message_threshold: 50,
            model: DEFAULT_TEXT_MODEL.to_string(),
            cache_summary: true,
+            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
        }
    }
 }

+/// Hard floor for automatic compaction in v0.8.11+.
+///
+/// Below this token count, `should_compact` returns `false` regardless of
+/// `enabled`, `token_threshold`, or `message_threshold`. The point of the
+/// floor is V4 prefix-cache economics: compaction rewrites the stable
+/// prefix, which destroys the KV cache. At low token counts the prefix
+/// cache is healthy and compaction's cost (full re-prefill at miss prices)
+/// dwarfs its benefit (a tiny budget reclaim). Above the floor compaction
+/// can still be net-positive — cache is already pressured, the prefix has
+/// drifted, and freeing budget matters.
+///
+/// Manual `/compact` slash command and the model-callable `compact_now`
+/// tool both bypass this floor with a deliberate refusal message — they
+/// represent explicit agency rather than implicit policy.
+///
+/// Constant rather than configurable for v0.8.11. If anyone needs to dial
+/// it (smaller models, opinionated workflows), we can add a setting later.
+pub const MINIMUM_AUTO_COMPACTION_TOKENS: usize = 500_000;
+
 pub const KEEP_RECENT_MESSAGES: usize = 4;
 const RECENT_WORKING_SET_WINDOW: usize = 12;
 const MAX_WORKING_SET_PATHS: usize = 24;
@@ -585,6 +618,21 @@ pub fn should_compact(
        return false;
    }

+    // v0.8.11: hard floor enforcement. Below the floor (default 500K tokens
+    // — see `MINIMUM_AUTO_COMPACTION_TOKENS`), automatic compaction is
+    // refused because rewriting the prefix kills V4's prefix cache for
+    // little budget recovery. Manual `/compact` and the `compact_now` tool
+    // bypass this floor by going through different code paths.
+    if config.auto_floor_tokens > 0 {
+        let total_session_tokens: usize = messages
+            .iter()
+            .map(|m| estimate_tokens_for_message(m, false))
+            .sum();
+        if total_session_tokens < config.auto_floor_tokens {
+            return false;
+        }
+    }
+
    let plan = plan_compaction(
        messages,
        workspace,
@@ -1445,6 +1493,9 @@ mod tests {
            enabled: true,
            token_threshold: 1_000_000, // Very high
            message_threshold: 5,
+            // Disable the v0.8.11 500K floor so this test exercises the
+            // pure message-count threshold logic at small fixture sizes.
+            auto_floor_tokens: 0,
            ..Default::default()
        };

@@ -1585,6 +1636,7 @@ mod tests {
            enabled: true,
            token_threshold: 1_000_000,
            message_threshold: 5,
+            auto_floor_tokens: 0,
            ..Default::default()
        };

@@ -1603,6 +1655,7 @@ mod tests {
            enabled: true,
            token_threshold: 50,
            message_threshold: 50,
+            auto_floor_tokens: 0,
            ..Default::default()
        };

@@ -1874,6 +1927,7 @@ mod tests {
            enabled: true,
            token_threshold: 100,    // Low threshold for testing
            message_threshold: 1000, // High message threshold
+            auto_floor_tokens: 0,
            ..Default::default()
        };

@@ -1901,6 +1955,65 @@ mod tests {
        assert!(!should_compact(&messages, &config, None, None, None));
    }

+    /// v0.8.11: the 500K hard floor blocks auto-compaction even when the
+    /// token-percentage threshold would otherwise fire. This is the V4
+    /// prefix-cache protection — below 500K total tokens, rewriting the
+    /// prefix loses cache for tiny budget gains.
+    #[test]
+    fn auto_compaction_floor_blocks_below_500k_even_when_threshold_says_yes() {
+        let config = CompactionConfig {
+            enabled: true,
+            token_threshold: 100,    // would normally fire instantly
+            message_threshold: 1000, // not the trigger
+            // Use the production default explicitly so this test pins the
+            // floor's contract rather than relying on `Default`.
+            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
+            ..Default::default()
+        };
+
+        let messages: Vec<Message> = (0..10).map(|_| msg("user", &"x".repeat(50))).collect();
+        // Total tokens way under 500K, so floor blocks compaction.
+        assert!(!should_compact(&messages, &config, None, None, None));
+    }
+
+    /// v0.8.11: when total tokens cross the 500K floor, the existing
+    /// threshold/message-count logic takes over again.
+    #[test]
+    fn auto_compaction_floor_yields_to_threshold_logic_above_500k() {
+        let config = CompactionConfig {
+            enabled: true,
+            token_threshold: 2_000_000,
+            message_threshold: 2_000,
+            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
+            ..Default::default()
+        };
+
+        // Each message ~500 tokens; 1100 messages → ~550K total tokens.
+        // That's above the floor (500K) AND below the deliberately high
+        // token_threshold, so auto-compaction stays off — by threshold,
+        // not floor.
+        let messages: Vec<Message> = (0..1100).map(|_| msg("user", &"x".repeat(2000))).collect();
+        assert!(!should_compact(&messages, &config, None, None, None));
+
+        // Crank threshold below total → compaction fires now that we're
+        // past the floor.
+        let config_lower = CompactionConfig {
+            token_threshold: 100_000,
+            ..config
+        };
+        assert!(should_compact(&messages, &config_lower, None, None, None));
+    }
+
+    /// `CompactionConfig::default()` ships with the 500K floor on by
+    /// default — production callers via `..Default::default()` get the
+    /// safety guarantee automatically.
+    #[test]
+    fn compaction_config_default_carries_500k_floor() {
+        let config = CompactionConfig::default();
+        assert_eq!(config.auto_floor_tokens, MINIMUM_AUTO_COMPACTION_TOKENS);
+        assert_eq!(config.auto_floor_tokens, 500_000);
+    }
+
    #[test]
    fn test_plan_compaction_pins_error_messages() {
        let messages = vec![