refactor(compaction): drop message_threshold, token-only triggering

The `message_threshold` field on `CompactionConfig` was a 128K-era heuristic that fired compaction on long sessions of small messages — exactly the case where rewriting V4's prefix cache is most wasteful. Token budget is the only signal that maps to actual model context pressure; counting messages adds nothing. Changes: * Remove `CompactionConfig::message_threshold` field. * Remove the message-count branch in `should_compact` — token threshold + 500K floor is now the sole compaction trigger. * Remove `compaction_message_threshold_for_model`, `DEFAULT_COMPACTION_MESSAGE_THRESHOLD`, `COMPACTION_MESSAGE_DIVISOR`, `MAX_COMPACTION_MESSAGE_THRESHOLD` from `models.rs`. * Drop the `forced_config.message_threshold` tweak in the engine's capacity-guardrail forced-compaction path; that path now also bypasses the floor (`auto_floor_tokens = 0`) because we're at a hard ceiling and have to free budget regardless of cache cost. * Update production constructors (`main.rs`, `runtime_threads.rs`, `app.rs::compaction_config`) to drop the field. * Update tests: keep the floor + token-threshold assertions, delete the two tests that specifically validated message-count triggering, replace `should_compact_respects_message_threshold` with `message_count_no_longer_triggers_compaction` pinning the new contract. Verified locally: * `cargo fmt --all -- --check` clean. * `cargo clippy --workspace --all-targets --all-features --locked -- -D warnings` clean. * `cargo test --workspace --all-features --locked` — 2036 passed in TUI bin (2 ignored), all other crates green. * parity gates: snapshot, parity_protocol, parity_state — all pass. * `git diff --exit-code -- Cargo.lock` — clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 22:19:09 -05:00
parent e98efcf31d
commit d0e95f23b0
6 changed files with 57 additions and 146 deletions
@@ -18,19 +18,25 @@ use crate::models::{
 };

 /// Configuration for conversation compaction behavior.
+///
+/// v0.8.11 simplified this from the prior token-OR-message-count trigger
+/// to a token-only trigger gated by an absolute floor. The
+/// `message_threshold` field was removed: its only purpose was to fire
+/// compaction on long sessions of small messages, which is exactly the
+/// case where rewriting the V4 prefix cache is least valuable. Token
+/// budget is the right signal; message count was a 128K-era heuristic.
 #[derive(Debug, Clone, PartialEq)]
 pub struct CompactionConfig {
    pub enabled: bool,
    pub token_threshold: usize,
-    pub message_threshold: usize,
    pub model: String,
    pub cache_summary: bool,
    /// Hard floor — `should_compact` returns `false` when total session
-    /// tokens fall below this number, regardless of `enabled`,
-    /// `token_threshold`, or `message_threshold`. Defaults to
-    /// [`MINIMUM_AUTO_COMPACTION_TOKENS`] (500K) for v0.8.11+. Tests that
-    /// want to exercise the older threshold/message-count logic at small
-    /// fixture sizes can set this to `0` to disable the floor.
+    /// tokens fall below this number, regardless of `enabled` or
+    /// `token_threshold`. Defaults to [`MINIMUM_AUTO_COMPACTION_TOKENS`]
+    /// (500K) for v0.8.11+. Tests that want to exercise the threshold
+    /// logic at small fixture sizes can set this to `0` to disable the
+    /// floor.
    pub auto_floor_tokens: usize,
 }

@@ -50,7 +56,6 @@ impl Default for CompactionConfig {
            // default no longer lies. Real call sites override this via
            // `compaction_threshold_for_model_and_effort`.
            token_threshold: 800_000,
-            message_threshold: 50,
            model: DEFAULT_TEXT_MODEL.to_string(),
            cache_summary: true,
            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
@@ -61,17 +66,15 @@ impl Default for CompactionConfig {
 /// Hard floor for automatic compaction in v0.8.11+.
 ///
 /// Below this token count, `should_compact` returns `false` regardless of
-/// `enabled`, `token_threshold`, or `message_threshold`. The point of the
-/// floor is V4 prefix-cache economics: compaction rewrites the stable
-/// prefix, which destroys the KV cache. At low token counts the prefix
-/// cache is healthy and compaction's cost (full re-prefill at miss prices)
-/// dwarfs its benefit (a tiny budget reclaim). Above the floor compaction
-/// can still be net-positive — cache is already pressured, the prefix has
-/// drifted, and freeing budget matters.
+/// `enabled` or `token_threshold`. The point of the floor is V4 prefix-cache
+/// economics: compaction rewrites the stable prefix, which destroys the KV
+/// cache. At low token counts the prefix cache is healthy and compaction's
+/// cost (full re-prefill at miss prices) dwarfs its benefit (a tiny budget
+/// reclaim). Above the floor compaction can still be net-positive — cache
+/// is already pressured, the prefix has drifted, and freeing budget matters.
 ///
-/// Manual `/compact` slash command and the model-callable `compact_now`
-/// tool both bypass this floor with a deliberate refusal message — they
-/// represent explicit agency rather than implicit policy.
+/// Manual `/compact` slash command bypasses this floor with explicit user
+/// agency.
 ///
 /// Constant rather than configurable for v0.8.11. If anyone needs to dial
 /// it (smaller models, opinionated workflows), we can add a setting later.
@@ -645,7 +648,6 @@ pub fn should_compact(
        .iter()
        .map(|&idx| estimate_tokens_for_message(&messages[idx], false))
        .sum();
-    let pinned_count = plan.pinned_indices.len();

    let token_estimate: usize = plan
        .summarize_indices
@@ -656,21 +658,19 @@ pub fn should_compact(

    // Pinned messages consume part of the budget, so compact earlier when needed.
    let effective_token_threshold = config.token_threshold.saturating_sub(pinned_tokens);
-    let effective_message_threshold = config.message_threshold.saturating_sub(pinned_count);

-    // Always compact if we exceed the token threshold, even with few unpinned messages.
-    if token_estimate > effective_token_threshold && effective_token_threshold > 0 {
-        return true;
+    // Token-only trigger (v0.8.11): the prior message-count branch was a
+    // 128K-era heuristic that fired compaction on long chats of small
+    // messages — exactly the case where rewriting the V4 prefix cache is
+    // most wasteful. Token budget is the only signal that maps to actual
+    // model context pressure.
+    if effective_token_threshold == 0 {
+        return message_count >= MIN_SUMMARIZE_MESSAGES;
    }
-
-    let enough_unpinned = message_count >= MIN_SUMMARIZE_MESSAGES
-        || effective_token_threshold == 0
-        || effective_message_threshold == 0;
-    if !enough_unpinned {
+    if message_count < MIN_SUMMARIZE_MESSAGES {
        return false;
    }
-
-    token_estimate > effective_token_threshold || message_count > effective_message_threshold
+    token_estimate > effective_token_threshold
 }

 fn truncate_chars(text: &str, max_chars: usize) -> &str {
@@ -1487,20 +1487,22 @@ mod tests {
        assert!(!should_compact(&messages, &config, None, None, None));
    }

+    /// v0.8.11: message-count is no longer a compaction trigger. Long
+    /// chats of small messages stay uncompacted because rewriting the V4
+    /// prefix cache for a tiny budget reclaim is net-negative. Only token
+    /// pressure (and the explicit `/compact` slash command) trigger
+    /// compaction.
    #[test]
-    fn should_compact_respects_message_threshold() {
+    fn message_count_no_longer_triggers_compaction() {
        let config = CompactionConfig {
            enabled: true,
-            token_threshold: 1_000_000, // Very high
-            message_threshold: 5,
-            // Disable the v0.8.11 500K floor so this test exercises the
-            // pure message-count threshold logic at small fixture sizes.
+            token_threshold: 1_000_000,
            auto_floor_tokens: 0,
            ..Default::default()
        };

-        // Under threshold
-        let few_messages: Vec<Message> = (0..4)
+        // 200 tiny messages, well above the prior message threshold.
+        let many_messages: Vec<Message> = (0..200)
            .map(|_| Message {
                role: "user".to_string(),
                content: vec![ContentBlock::Text {
@@ -1509,19 +1511,9 @@ mod tests {
                }],
            })
            .collect();
-        assert!(!should_compact(&few_messages, &config, None, None, None));
-
-        // Over threshold
-        let many_messages: Vec<Message> = (0..10)
-            .map(|_| Message {
-                role: "user".to_string(),
-                content: vec![ContentBlock::Text {
-                    text: "x".to_string(),
-                    cache_control: None,
-                }],
-            })
-            .collect();
-        assert!(should_compact(&many_messages, &config, None, None, None));
+        // Token total stays minuscule so the token threshold is not hit;
+        // without the prior message-count trigger, no compaction.
+        assert!(!should_compact(&many_messages, &config, None, None, None));
    }

    #[test]
@@ -1619,7 +1611,6 @@ mod tests {
        let config = CompactionConfig {
            enabled: true,
            token_threshold: 10,
-            message_threshold: 2,
            ..Default::default()
        };

@@ -1630,46 +1621,12 @@ mod tests {
        assert!(!should_compact(&messages, &config, None, None, None));
    }

-    #[test]
-    fn should_compact_counts_only_unpinned_messages() {
-        let config = CompactionConfig {
-            enabled: true,
-            token_threshold: 1_000_000,
-            message_threshold: 5,
-            auto_floor_tokens: 0,
-            ..Default::default()
-        };
-
-        let mut messages: Vec<Message> = (0..7)
-            .map(|i| msg("user", &format!("noise message {i}")))
-            .collect();
-        messages.push(msg("user", "Focus on src/core/engine.rs"));
-        messages.extend((0..4).map(|i| msg("assistant", &format!("recent {i}"))));
-
-        assert!(should_compact(&messages, &config, None, None, None));
-    }
-
-    #[test]
-    fn should_compact_when_pins_consume_budget() {
-        let config = CompactionConfig {
-            enabled: true,
-            token_threshold: 50,
-            message_threshold: 50,
-            auto_floor_tokens: 0,
-            ..Default::default()
-        };
-
-        let mut messages = vec![msg("user", "noise 0"), msg("assistant", "noise 1")];
-        messages.extend((0..4).map(|_| {
-            msg(
-                "assistant",
-                &format!("{} src/core/engine.rs", "x".repeat(400)),
-            )
-        }));
-
-        // Pinned recent messages exceed the token budget, so unpinned noise should trigger compaction.
-        assert!(should_compact(&messages, &config, None, None, None));
-    }
+    // v0.8.11: removed `should_compact_counts_only_unpinned_messages` and
+    // `should_compact_when_pins_consume_budget` — both tested the
+    // message-count compaction trigger that v0.8.11 deleted. The
+    // pinned-tokens accounting they exercised is still tested by
+    // `should_compact_ignores_fully_pinned_context` below; the rest of
+    // their setup has no contemporary contract to pin.

    #[test]
    fn enforce_tool_call_pairs_removes_orphaned_tool_call() {
@@ -1925,8 +1882,7 @@ mod tests {
    fn test_should_compact_token_threshold_triggers() {
        let config = CompactionConfig {
            enabled: true,
-            token_threshold: 100,    // Low threshold for testing
-            message_threshold: 1000, // High message threshold
+            token_threshold: 100, // Low threshold for testing
            auto_floor_tokens: 0,
            ..Default::default()
        };
@@ -1945,7 +1901,6 @@ mod tests {
        let config = CompactionConfig {
            enabled: true,
            token_threshold: 1000,
-            message_threshold: 1000,
            ..Default::default()
        };

@@ -1963,8 +1918,7 @@ mod tests {
    fn auto_compaction_floor_blocks_below_500k_even_when_threshold_says_yes() {
        let config = CompactionConfig {
            enabled: true,
-            token_threshold: 100,    // would normally fire instantly
-            message_threshold: 1000, // not the trigger
+            token_threshold: 100, // would normally fire instantly
            // Use the production default explicitly so this test pins the
            // floor's contract rather than relying on `Default`.
            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
@@ -1983,7 +1937,6 @@ mod tests {
        let config = CompactionConfig {
            enabled: true,
            token_threshold: 2_000_000,
-            message_threshold: 2_000,
            auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
            ..Default::default()
        };
@@ -2307,7 +2260,6 @@ mod tests {
        let _config = CompactionConfig {
            enabled: true,
            token_threshold: 1000,
-            message_threshold: 5,
            ..Default::default()
        };

@@ -2323,9 +2275,7 @@ mod tests {
            msg("assistant", "recent 2"),
        ];

-        // Should compact because:
-        // - More than message_threshold (5) unpinned messages
-        // - src/main.rs mention pins message 0
+        // src/main.rs mention should pin message 0 in the plan.
        let plan = plan_compaction(
            &messages,
            Some(&workspace),
@@ -1183,7 +1183,10 @@ impl Engine {
            .token_threshold
            .min(target_budget.saturating_sub(1))
            .max(1);
-        forced_config.message_threshold = forced_config.message_threshold.max(1);
+        // v0.8.11: forced compaction (capacity guardrail) bypasses the floor
+        // because we're at a hard ceiling and have to free budget regardless
+        // of cache cost.
+        forced_config.auto_floor_tokens = 0;

        match compact_messages_safe(
            client,
@@ -3711,7 +3711,7 @@ async fn run_exec_agent(
    use crate::core::engine::{EngineConfig, spawn_engine};
    use crate::core::events::Event;
    use crate::core::ops::Op;
-    use crate::models::{compaction_message_threshold_for_model, compaction_threshold_for_model};
+    use crate::models::compaction_threshold_for_model;
    use crate::tools::plan::new_shared_plan_state;
    use crate::tools::todo::new_shared_todo_list;
    use crate::tui::app::AppMode;
@@ -3725,7 +3725,6 @@ async fn run_exec_agent(
        enabled: false,
        model: model.to_string(),
        token_threshold: compaction_threshold_for_model(model),
-        message_threshold: compaction_message_threshold_for_model(model),
        ..Default::default()
    };

@@ -14,10 +14,7 @@ pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
 /// models resolve to their own scaled value via
 /// [`compaction_threshold_for_model`] (#664).
 pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
-pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
 const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
-const COMPACTION_MESSAGE_DIVISOR: u32 = 500;
-const MAX_COMPACTION_MESSAGE_THRESHOLD: usize = 2_000;

 // === Core Message Types ===

@@ -298,21 +295,6 @@ pub fn compaction_threshold_for_model_and_effort(
    compaction_threshold_for_model(model)
 }

-/// Derive a compaction message-count threshold from model context window.
-#[must_use]
-pub fn compaction_message_threshold_for_model(model: &str) -> usize {
-    let Some(window) = context_window_for_model(model) else {
-        return DEFAULT_COMPACTION_MESSAGE_THRESHOLD;
-    };
-
-    let scaled = usize::try_from(window / COMPACTION_MESSAGE_DIVISOR)
-        .unwrap_or(DEFAULT_COMPACTION_MESSAGE_THRESHOLD);
-    scaled.clamp(
-        DEFAULT_COMPACTION_MESSAGE_THRESHOLD,
-        MAX_COMPACTION_MESSAGE_THRESHOLD,
-    )
-}
-
 // === Streaming Structures ===

 #[allow(dead_code)]
@@ -469,24 +451,9 @@ mod tests {
        assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
    }

-    #[test]
-    fn compaction_message_threshold_scales_with_context_window() {
-        assert_eq!(
-            compaction_message_threshold_for_model("deepseek-v3.2-128k"),
-            256
-        );
-        assert_eq!(compaction_message_threshold_for_model("unknown-model"), 50);
-        // 200k / 500 = 400, within the 2k cap.
-        assert_eq!(compaction_message_threshold_for_model("claude-3"), 400);
-    }
-
    #[test]
    fn compaction_scales_for_deepseek_v4_1m_context() {
        assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000);
-        assert_eq!(
-            compaction_message_threshold_for_model("deepseek-v4-pro"),
-            2_000
-        );
    }

    #[test]
@@ -23,10 +23,7 @@ use crate::core::coherence::CoherenceState;
 use crate::core::engine::{EngineConfig, EngineHandle, spawn_engine};
 use crate::core::events::{Event as EngineEvent, TurnOutcomeStatus};
 use crate::core::ops::Op;
-use crate::models::{
-    ContentBlock, Message, SystemPrompt, Usage, compaction_message_threshold_for_model,
-    compaction_threshold_for_model,
-};
+use crate::models::{ContentBlock, Message, SystemPrompt, Usage, compaction_threshold_for_model};
 use crate::tools::plan::new_shared_plan_state;
 use crate::tools::subagent::SubAgentStatus;
 use crate::tools::todo::new_shared_todo_list;
@@ -1765,7 +1762,6 @@ impl RuntimeThreadManager {
            enabled: false,
            model: thread.model.clone(),
            token_threshold: compaction_threshold_for_model(&thread.model),
-            message_threshold: compaction_message_threshold_for_model(&thread.model),
            ..Default::default()
        };
        let network_policy = self.config.network.clone().map(|toml_cfg| {
@@ -15,10 +15,7 @@ use crate::core::coherence::CoherenceState;
 use crate::cycle_manager::{CycleBriefing, CycleConfig};
 use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
 use crate::localization::{Locale, MessageId, resolve_locale, tr};
-use crate::models::{
-    Message, SystemPrompt, compaction_message_threshold_for_model,
-    compaction_threshold_for_model_and_effort,
-};
+use crate::models::{Message, SystemPrompt, compaction_threshold_for_model_and_effort};
 use crate::palette::{self, UiTheme};
 use crate::session_manager::SessionContextReference;
 use crate::settings::Settings;
@@ -3169,7 +3166,6 @@ impl App {
        CompactionConfig {
            enabled: self.auto_compact,
            token_threshold: self.compact_threshold,
-            message_threshold: compaction_message_threshold_for_model(&self.model),
            model: self.model.clone(),
            ..Default::default()
        }