refactor(compaction): drop message_threshold, token-only triggering
The `message_threshold` field on `CompactionConfig` was a 128K-era heuristic that fired compaction on long sessions of small messages — exactly the case where rewriting V4's prefix cache is most wasteful. Token budget is the only signal that maps to actual model context pressure; counting messages adds nothing. Changes: * Remove `CompactionConfig::message_threshold` field. * Remove the message-count branch in `should_compact` — token threshold + 500K floor is now the sole compaction trigger. * Remove `compaction_message_threshold_for_model`, `DEFAULT_COMPACTION_MESSAGE_THRESHOLD`, `COMPACTION_MESSAGE_DIVISOR`, `MAX_COMPACTION_MESSAGE_THRESHOLD` from `models.rs`. * Drop the `forced_config.message_threshold` tweak in the engine's capacity-guardrail forced-compaction path; that path now also bypasses the floor (`auto_floor_tokens = 0`) because we're at a hard ceiling and have to free budget regardless of cache cost. * Update production constructors (`main.rs`, `runtime_threads.rs`, `app.rs::compaction_config`) to drop the field. * Update tests: keep the floor + token-threshold assertions, delete the two tests that specifically validated message-count triggering, replace `should_compact_respects_message_threshold` with `message_count_no_longer_triggers_compaction` pinning the new contract. Verified locally: * `cargo fmt --all -- --check` clean. * `cargo clippy --workspace --all-targets --all-features --locked -- -D warnings` clean. * `cargo test --workspace --all-features --locked` — 2036 passed in TUI bin (2 ignored), all other crates green. * parity gates: snapshot, parity_protocol, parity_state — all pass. * `git diff --exit-code -- Cargo.lock` — clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+50
-100
@@ -18,19 +18,25 @@ use crate::models::{
|
||||
};
|
||||
|
||||
/// Configuration for conversation compaction behavior.
|
||||
///
|
||||
/// v0.8.11 simplified this from the prior token-OR-message-count trigger
|
||||
/// to a token-only trigger gated by an absolute floor. The
|
||||
/// `message_threshold` field was removed: its only purpose was to fire
|
||||
/// compaction on long sessions of small messages, which is exactly the
|
||||
/// case where rewriting the V4 prefix cache is least valuable. Token
|
||||
/// budget is the right signal; message count was a 128K-era heuristic.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct CompactionConfig {
|
||||
pub enabled: bool,
|
||||
pub token_threshold: usize,
|
||||
pub message_threshold: usize,
|
||||
pub model: String,
|
||||
pub cache_summary: bool,
|
||||
/// Hard floor — `should_compact` returns `false` when total session
|
||||
/// tokens fall below this number, regardless of `enabled`,
|
||||
/// `token_threshold`, or `message_threshold`. Defaults to
|
||||
/// [`MINIMUM_AUTO_COMPACTION_TOKENS`] (500K) for v0.8.11+. Tests that
|
||||
/// want to exercise the older threshold/message-count logic at small
|
||||
/// fixture sizes can set this to `0` to disable the floor.
|
||||
/// tokens fall below this number, regardless of `enabled` or
|
||||
/// `token_threshold`. Defaults to [`MINIMUM_AUTO_COMPACTION_TOKENS`]
|
||||
/// (500K) for v0.8.11+. Tests that want to exercise the threshold
|
||||
/// logic at small fixture sizes can set this to `0` to disable the
|
||||
/// floor.
|
||||
pub auto_floor_tokens: usize,
|
||||
}
|
||||
|
||||
@@ -50,7 +56,6 @@ impl Default for CompactionConfig {
|
||||
// default no longer lies. Real call sites override this via
|
||||
// `compaction_threshold_for_model_and_effort`.
|
||||
token_threshold: 800_000,
|
||||
message_threshold: 50,
|
||||
model: DEFAULT_TEXT_MODEL.to_string(),
|
||||
cache_summary: true,
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
@@ -61,17 +66,15 @@ impl Default for CompactionConfig {
|
||||
/// Hard floor for automatic compaction in v0.8.11+.
|
||||
///
|
||||
/// Below this token count, `should_compact` returns `false` regardless of
|
||||
/// `enabled`, `token_threshold`, or `message_threshold`. The point of the
|
||||
/// floor is V4 prefix-cache economics: compaction rewrites the stable
|
||||
/// prefix, which destroys the KV cache. At low token counts the prefix
|
||||
/// cache is healthy and compaction's cost (full re-prefill at miss prices)
|
||||
/// dwarfs its benefit (a tiny budget reclaim). Above the floor compaction
|
||||
/// can still be net-positive — cache is already pressured, the prefix has
|
||||
/// drifted, and freeing budget matters.
|
||||
/// `enabled` or `token_threshold`. The point of the floor is V4 prefix-cache
|
||||
/// economics: compaction rewrites the stable prefix, which destroys the KV
|
||||
/// cache. At low token counts the prefix cache is healthy and compaction's
|
||||
/// cost (full re-prefill at miss prices) dwarfs its benefit (a tiny budget
|
||||
/// reclaim). Above the floor compaction can still be net-positive — cache
|
||||
/// is already pressured, the prefix has drifted, and freeing budget matters.
|
||||
///
|
||||
/// Manual `/compact` slash command and the model-callable `compact_now`
|
||||
/// tool both bypass this floor with a deliberate refusal message — they
|
||||
/// represent explicit agency rather than implicit policy.
|
||||
/// Manual `/compact` slash command bypasses this floor with explicit user
|
||||
/// agency.
|
||||
///
|
||||
/// Constant rather than configurable for v0.8.11. If anyone needs to dial
|
||||
/// it (smaller models, opinionated workflows), we can add a setting later.
|
||||
@@ -645,7 +648,6 @@ pub fn should_compact(
|
||||
.iter()
|
||||
.map(|&idx| estimate_tokens_for_message(&messages[idx], false))
|
||||
.sum();
|
||||
let pinned_count = plan.pinned_indices.len();
|
||||
|
||||
let token_estimate: usize = plan
|
||||
.summarize_indices
|
||||
@@ -656,21 +658,19 @@ pub fn should_compact(
|
||||
|
||||
// Pinned messages consume part of the budget, so compact earlier when needed.
|
||||
let effective_token_threshold = config.token_threshold.saturating_sub(pinned_tokens);
|
||||
let effective_message_threshold = config.message_threshold.saturating_sub(pinned_count);
|
||||
|
||||
// Always compact if we exceed the token threshold, even with few unpinned messages.
|
||||
if token_estimate > effective_token_threshold && effective_token_threshold > 0 {
|
||||
return true;
|
||||
// Token-only trigger (v0.8.11): the prior message-count branch was a
|
||||
// 128K-era heuristic that fired compaction on long chats of small
|
||||
// messages — exactly the case where rewriting the V4 prefix cache is
|
||||
// most wasteful. Token budget is the only signal that maps to actual
|
||||
// model context pressure.
|
||||
if effective_token_threshold == 0 {
|
||||
return message_count >= MIN_SUMMARIZE_MESSAGES;
|
||||
}
|
||||
|
||||
let enough_unpinned = message_count >= MIN_SUMMARIZE_MESSAGES
|
||||
|| effective_token_threshold == 0
|
||||
|| effective_message_threshold == 0;
|
||||
if !enough_unpinned {
|
||||
if message_count < MIN_SUMMARIZE_MESSAGES {
|
||||
return false;
|
||||
}
|
||||
|
||||
token_estimate > effective_token_threshold || message_count > effective_message_threshold
|
||||
token_estimate > effective_token_threshold
|
||||
}
|
||||
|
||||
fn truncate_chars(text: &str, max_chars: usize) -> &str {
|
||||
@@ -1487,20 +1487,22 @@ mod tests {
|
||||
assert!(!should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
/// v0.8.11: message-count is no longer a compaction trigger. Long
|
||||
/// chats of small messages stay uncompacted because rewriting the V4
|
||||
/// prefix cache for a tiny budget reclaim is net-negative. Only token
|
||||
/// pressure (and the explicit `/compact` slash command) trigger
|
||||
/// compaction.
|
||||
#[test]
|
||||
fn should_compact_respects_message_threshold() {
|
||||
fn message_count_no_longer_triggers_compaction() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 1_000_000, // Very high
|
||||
message_threshold: 5,
|
||||
// Disable the v0.8.11 500K floor so this test exercises the
|
||||
// pure message-count threshold logic at small fixture sizes.
|
||||
token_threshold: 1_000_000,
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Under threshold
|
||||
let few_messages: Vec<Message> = (0..4)
|
||||
// 200 tiny messages, well above the prior message threshold.
|
||||
let many_messages: Vec<Message> = (0..200)
|
||||
.map(|_| Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
@@ -1509,19 +1511,9 @@ mod tests {
|
||||
}],
|
||||
})
|
||||
.collect();
|
||||
assert!(!should_compact(&few_messages, &config, None, None, None));
|
||||
|
||||
// Over threshold
|
||||
let many_messages: Vec<Message> = (0..10)
|
||||
.map(|_| Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "x".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.collect();
|
||||
assert!(should_compact(&many_messages, &config, None, None, None));
|
||||
// Token total stays minuscule so the token threshold is not hit;
|
||||
// without the prior message-count trigger, no compaction.
|
||||
assert!(!should_compact(&many_messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1619,7 +1611,6 @@ mod tests {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 10,
|
||||
message_threshold: 2,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1630,46 +1621,12 @@ mod tests {
|
||||
assert!(!should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_compact_counts_only_unpinned_messages() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 1_000_000,
|
||||
message_threshold: 5,
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut messages: Vec<Message> = (0..7)
|
||||
.map(|i| msg("user", &format!("noise message {i}")))
|
||||
.collect();
|
||||
messages.push(msg("user", "Focus on src/core/engine.rs"));
|
||||
messages.extend((0..4).map(|i| msg("assistant", &format!("recent {i}"))));
|
||||
|
||||
assert!(should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_compact_when_pins_consume_budget() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 50,
|
||||
message_threshold: 50,
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut messages = vec![msg("user", "noise 0"), msg("assistant", "noise 1")];
|
||||
messages.extend((0..4).map(|_| {
|
||||
msg(
|
||||
"assistant",
|
||||
&format!("{} src/core/engine.rs", "x".repeat(400)),
|
||||
)
|
||||
}));
|
||||
|
||||
// Pinned recent messages exceed the token budget, so unpinned noise should trigger compaction.
|
||||
assert!(should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
// v0.8.11: removed `should_compact_counts_only_unpinned_messages` and
|
||||
// `should_compact_when_pins_consume_budget` — both tested the
|
||||
// message-count compaction trigger that v0.8.11 deleted. The
|
||||
// pinned-tokens accounting they exercised is still tested by
|
||||
// `should_compact_ignores_fully_pinned_context` below; the rest of
|
||||
// their setup has no contemporary contract to pin.
|
||||
|
||||
#[test]
|
||||
fn enforce_tool_call_pairs_removes_orphaned_tool_call() {
|
||||
@@ -1925,8 +1882,7 @@ mod tests {
|
||||
fn test_should_compact_token_threshold_triggers() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 100, // Low threshold for testing
|
||||
message_threshold: 1000, // High message threshold
|
||||
token_threshold: 100, // Low threshold for testing
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
@@ -1945,7 +1901,6 @@ mod tests {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 1000,
|
||||
message_threshold: 1000,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1963,8 +1918,7 @@ mod tests {
|
||||
fn auto_compaction_floor_blocks_below_500k_even_when_threshold_says_yes() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 100, // would normally fire instantly
|
||||
message_threshold: 1000, // not the trigger
|
||||
token_threshold: 100, // would normally fire instantly
|
||||
// Use the production default explicitly so this test pins the
|
||||
// floor's contract rather than relying on `Default`.
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
@@ -1983,7 +1937,6 @@ mod tests {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 2_000_000,
|
||||
message_threshold: 2_000,
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
..Default::default()
|
||||
};
|
||||
@@ -2307,7 +2260,6 @@ mod tests {
|
||||
let _config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 1000,
|
||||
message_threshold: 5,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -2323,9 +2275,7 @@ mod tests {
|
||||
msg("assistant", "recent 2"),
|
||||
];
|
||||
|
||||
// Should compact because:
|
||||
// - More than message_threshold (5) unpinned messages
|
||||
// - src/main.rs mention pins message 0
|
||||
// src/main.rs mention should pin message 0 in the plan.
|
||||
let plan = plan_compaction(
|
||||
&messages,
|
||||
Some(&workspace),
|
||||
|
||||
@@ -1183,7 +1183,10 @@ impl Engine {
|
||||
.token_threshold
|
||||
.min(target_budget.saturating_sub(1))
|
||||
.max(1);
|
||||
forced_config.message_threshold = forced_config.message_threshold.max(1);
|
||||
// v0.8.11: forced compaction (capacity guardrail) bypasses the floor
|
||||
// because we're at a hard ceiling and have to free budget regardless
|
||||
// of cache cost.
|
||||
forced_config.auto_floor_tokens = 0;
|
||||
|
||||
match compact_messages_safe(
|
||||
client,
|
||||
|
||||
@@ -3711,7 +3711,7 @@ async fn run_exec_agent(
|
||||
use crate::core::engine::{EngineConfig, spawn_engine};
|
||||
use crate::core::events::Event;
|
||||
use crate::core::ops::Op;
|
||||
use crate::models::{compaction_message_threshold_for_model, compaction_threshold_for_model};
|
||||
use crate::models::compaction_threshold_for_model;
|
||||
use crate::tools::plan::new_shared_plan_state;
|
||||
use crate::tools::todo::new_shared_todo_list;
|
||||
use crate::tui::app::AppMode;
|
||||
@@ -3725,7 +3725,6 @@ async fn run_exec_agent(
|
||||
enabled: false,
|
||||
model: model.to_string(),
|
||||
token_threshold: compaction_threshold_for_model(model),
|
||||
message_threshold: compaction_message_threshold_for_model(model),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
|
||||
@@ -14,10 +14,7 @@ pub const DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS: u32 = 1_000_000;
|
||||
/// models resolve to their own scaled value via
|
||||
/// [`compaction_threshold_for_model`] (#664).
|
||||
pub const DEFAULT_COMPACTION_TOKEN_THRESHOLD: usize = 102_400;
|
||||
pub const DEFAULT_COMPACTION_MESSAGE_THRESHOLD: usize = 50;
|
||||
const COMPACTION_THRESHOLD_PERCENT: u32 = 80;
|
||||
const COMPACTION_MESSAGE_DIVISOR: u32 = 500;
|
||||
const MAX_COMPACTION_MESSAGE_THRESHOLD: usize = 2_000;
|
||||
|
||||
// === Core Message Types ===
|
||||
|
||||
@@ -298,21 +295,6 @@ pub fn compaction_threshold_for_model_and_effort(
|
||||
compaction_threshold_for_model(model)
|
||||
}
|
||||
|
||||
/// Derive a compaction message-count threshold from model context window.
|
||||
#[must_use]
|
||||
pub fn compaction_message_threshold_for_model(model: &str) -> usize {
|
||||
let Some(window) = context_window_for_model(model) else {
|
||||
return DEFAULT_COMPACTION_MESSAGE_THRESHOLD;
|
||||
};
|
||||
|
||||
let scaled = usize::try_from(window / COMPACTION_MESSAGE_DIVISOR)
|
||||
.unwrap_or(DEFAULT_COMPACTION_MESSAGE_THRESHOLD);
|
||||
scaled.clamp(
|
||||
DEFAULT_COMPACTION_MESSAGE_THRESHOLD,
|
||||
MAX_COMPACTION_MESSAGE_THRESHOLD,
|
||||
)
|
||||
}
|
||||
|
||||
// === Streaming Structures ===
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -469,24 +451,9 @@ mod tests {
|
||||
assert_eq!(compaction_threshold_for_model("unknown-model"), 102_400);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compaction_message_threshold_scales_with_context_window() {
|
||||
assert_eq!(
|
||||
compaction_message_threshold_for_model("deepseek-v3.2-128k"),
|
||||
256
|
||||
);
|
||||
assert_eq!(compaction_message_threshold_for_model("unknown-model"), 50);
|
||||
// 200k / 500 = 400, within the 2k cap.
|
||||
assert_eq!(compaction_message_threshold_for_model("claude-3"), 400);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn compaction_scales_for_deepseek_v4_1m_context() {
|
||||
assert_eq!(compaction_threshold_for_model("deepseek-v4-pro"), 800_000);
|
||||
assert_eq!(
|
||||
compaction_message_threshold_for_model("deepseek-v4-pro"),
|
||||
2_000
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -23,10 +23,7 @@ use crate::core::coherence::CoherenceState;
|
||||
use crate::core::engine::{EngineConfig, EngineHandle, spawn_engine};
|
||||
use crate::core::events::{Event as EngineEvent, TurnOutcomeStatus};
|
||||
use crate::core::ops::Op;
|
||||
use crate::models::{
|
||||
ContentBlock, Message, SystemPrompt, Usage, compaction_message_threshold_for_model,
|
||||
compaction_threshold_for_model,
|
||||
};
|
||||
use crate::models::{ContentBlock, Message, SystemPrompt, Usage, compaction_threshold_for_model};
|
||||
use crate::tools::plan::new_shared_plan_state;
|
||||
use crate::tools::subagent::SubAgentStatus;
|
||||
use crate::tools::todo::new_shared_todo_list;
|
||||
@@ -1765,7 +1762,6 @@ impl RuntimeThreadManager {
|
||||
enabled: false,
|
||||
model: thread.model.clone(),
|
||||
token_threshold: compaction_threshold_for_model(&thread.model),
|
||||
message_threshold: compaction_message_threshold_for_model(&thread.model),
|
||||
..Default::default()
|
||||
};
|
||||
let network_policy = self.config.network.clone().map(|toml_cfg| {
|
||||
|
||||
@@ -15,10 +15,7 @@ use crate::core::coherence::CoherenceState;
|
||||
use crate::cycle_manager::{CycleBriefing, CycleConfig};
|
||||
use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
|
||||
use crate::localization::{Locale, MessageId, resolve_locale, tr};
|
||||
use crate::models::{
|
||||
Message, SystemPrompt, compaction_message_threshold_for_model,
|
||||
compaction_threshold_for_model_and_effort,
|
||||
};
|
||||
use crate::models::{Message, SystemPrompt, compaction_threshold_for_model_and_effort};
|
||||
use crate::palette::{self, UiTheme};
|
||||
use crate::session_manager::SessionContextReference;
|
||||
use crate::settings::Settings;
|
||||
@@ -3169,7 +3166,6 @@ impl App {
|
||||
CompactionConfig {
|
||||
enabled: self.auto_compact,
|
||||
token_threshold: self.compact_threshold,
|
||||
message_threshold: compaction_message_threshold_for_model(&self.model),
|
||||
model: self.model.clone(),
|
||||
..Default::default()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user