fix(compaction): 500K hard floor plus V4 default

This commit is contained in:
Hunter Bown
2026-05-04 22:06:07 -05:00
parent 43bb055dfe
commit a4dee56fcc
+119 -6
View File
@@ -25,25 +25,58 @@ pub struct CompactionConfig {
pub message_threshold: usize,
pub model: String,
pub cache_summary: bool,
/// Hard floor — `should_compact` returns `false` when total session
/// tokens fall below this number, regardless of `enabled`,
/// `token_threshold`, or `message_threshold`. Defaults to
/// [`MINIMUM_AUTO_COMPACTION_TOKENS`] (500K) for v0.8.11+. Tests that
/// want to exercise the older threshold/message-count logic at small
/// fixture sizes can set this to `0` to disable the floor.
pub auto_floor_tokens: usize,
}
impl Default for CompactionConfig {
fn default() -> Self {
Self {
// ON BY DEFAULT since v0.8.6 (#402 P0 survivability).
// Long-running sessions need automatic compaction to stay
// within the model's context budget. Users who prefer the
// previous behaviour can opt out via `auto_compact = false`
// in settings or `compaction.enabled = false` in config.
// ON BY DEFAULT since v0.8.6 (#402 P0 survivability) — but the
// engine-level `auto_compact` setting was flipped OFF in v0.8.11
// (#665) so this default is mostly a fallback for code paths
// that build a `CompactionConfig` without going through
// `compaction_threshold_for_model_and_effort`. Real per-model
// values are still derived through that helper.
enabled: true,
token_threshold: 50000,
// v0.8.11: 50K was a 128K-era leftover that biased every
// unconfigured caller toward "compact almost immediately on V4."
// Bumped to 800K (80% of V4's 1M window) so the dead-code
// default no longer lies. Real call sites override this via
// `compaction_threshold_for_model_and_effort`.
token_threshold: 800_000,
message_threshold: 50,
model: DEFAULT_TEXT_MODEL.to_string(),
cache_summary: true,
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
}
}
}
/// Hard floor for automatic compaction in v0.8.11+.
///
/// Below this token count, `should_compact` returns `false` regardless of
/// `enabled`, `token_threshold`, or `message_threshold`. The point of the
/// floor is V4 prefix-cache economics: compaction rewrites the stable
/// prefix, which destroys the KV cache. At low token counts the prefix
/// cache is healthy and compaction's cost (full re-prefill at miss prices)
/// dwarfs its benefit (a tiny budget reclaim). Above the floor compaction
/// can still be net-positive — cache is already pressured, the prefix has
/// drifted, and freeing budget matters.
///
/// Manual `/compact` slash command and the model-callable `compact_now`
/// tool both bypass this floor with a deliberate refusal message — they
/// represent explicit agency rather than implicit policy.
///
/// Constant rather than configurable for v0.8.11. If anyone needs to dial
/// it (smaller models, opinionated workflows), we can add a setting later.
pub const MINIMUM_AUTO_COMPACTION_TOKENS: usize = 500_000;
pub const KEEP_RECENT_MESSAGES: usize = 4;
const RECENT_WORKING_SET_WINDOW: usize = 12;
const MAX_WORKING_SET_PATHS: usize = 24;
@@ -585,6 +618,21 @@ pub fn should_compact(
return false;
}
// v0.8.11: hard floor enforcement. Below the floor (default 500K tokens
// — see `MINIMUM_AUTO_COMPACTION_TOKENS`), automatic compaction is
// refused because rewriting the prefix kills V4's prefix cache for
// little budget recovery. Manual `/compact` and the `compact_now` tool
// bypass this floor by going through different code paths.
if config.auto_floor_tokens > 0 {
let total_session_tokens: usize = messages
.iter()
.map(|m| estimate_tokens_for_message(m, false))
.sum();
if total_session_tokens < config.auto_floor_tokens {
return false;
}
}
let plan = plan_compaction(
messages,
workspace,
@@ -1445,6 +1493,9 @@ mod tests {
enabled: true,
token_threshold: 1_000_000, // Very high
message_threshold: 5,
// Disable the v0.8.11 500K floor so this test exercises the
// pure message-count threshold logic at small fixture sizes.
auto_floor_tokens: 0,
..Default::default()
};
@@ -1585,6 +1636,7 @@ mod tests {
enabled: true,
token_threshold: 1_000_000,
message_threshold: 5,
auto_floor_tokens: 0,
..Default::default()
};
@@ -1603,6 +1655,7 @@ mod tests {
enabled: true,
token_threshold: 50,
message_threshold: 50,
auto_floor_tokens: 0,
..Default::default()
};
@@ -1874,6 +1927,7 @@ mod tests {
enabled: true,
token_threshold: 100, // Low threshold for testing
message_threshold: 1000, // High message threshold
auto_floor_tokens: 0,
..Default::default()
};
@@ -1901,6 +1955,65 @@ mod tests {
assert!(!should_compact(&messages, &config, None, None, None));
}
/// v0.8.11: the 500K hard floor blocks auto-compaction even when the
/// token-percentage threshold would otherwise fire. This is the V4
/// prefix-cache protection — below 500K total tokens, rewriting the
/// prefix loses cache for tiny budget gains.
#[test]
fn auto_compaction_floor_blocks_below_500k_even_when_threshold_says_yes() {
let config = CompactionConfig {
enabled: true,
token_threshold: 100, // would normally fire instantly
message_threshold: 1000, // not the trigger
// Use the production default explicitly so this test pins the
// floor's contract rather than relying on `Default`.
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
..Default::default()
};
let messages: Vec<Message> = (0..10).map(|_| msg("user", &"x".repeat(50))).collect();
// Total tokens way under 500K, so floor blocks compaction.
assert!(!should_compact(&messages, &config, None, None, None));
}
/// v0.8.11: when total tokens cross the 500K floor, the existing
/// threshold/message-count logic takes over again.
#[test]
fn auto_compaction_floor_yields_to_threshold_logic_above_500k() {
let config = CompactionConfig {
enabled: true,
token_threshold: 2_000_000,
message_threshold: 2_000,
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
..Default::default()
};
// Each message ~500 tokens; 1100 messages → ~550K total tokens.
// That's above the floor (500K) AND below the deliberately high
// token_threshold, so auto-compaction stays off — by threshold,
// not floor.
let messages: Vec<Message> = (0..1100).map(|_| msg("user", &"x".repeat(2000))).collect();
assert!(!should_compact(&messages, &config, None, None, None));
// Crank threshold below total → compaction fires now that we're
// past the floor.
let config_lower = CompactionConfig {
token_threshold: 100_000,
..config
};
assert!(should_compact(&messages, &config_lower, None, None, None));
}
/// `CompactionConfig::default()` ships with the 500K floor on by
/// default — production callers via `..Default::default()` get the
/// safety guarantee automatically.
#[test]
fn compaction_config_default_carries_500k_floor() {
let config = CompactionConfig::default();
assert_eq!(config.auto_floor_tokens, MINIMUM_AUTO_COMPACTION_TOKENS);
assert_eq!(config.auto_floor_tokens, 500_000);
}
#[test]
fn test_plan_compaction_pins_error_messages() {
let messages = vec![