fix(compaction): 500K hard floor plus V4 default
This commit is contained in:
@@ -25,25 +25,58 @@ pub struct CompactionConfig {
|
||||
pub message_threshold: usize,
|
||||
pub model: String,
|
||||
pub cache_summary: bool,
|
||||
/// Hard floor — `should_compact` returns `false` when total session
|
||||
/// tokens fall below this number, regardless of `enabled`,
|
||||
/// `token_threshold`, or `message_threshold`. Defaults to
|
||||
/// [`MINIMUM_AUTO_COMPACTION_TOKENS`] (500K) for v0.8.11+. Tests that
|
||||
/// want to exercise the older threshold/message-count logic at small
|
||||
/// fixture sizes can set this to `0` to disable the floor.
|
||||
pub auto_floor_tokens: usize,
|
||||
}
|
||||
|
||||
impl Default for CompactionConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
// ON BY DEFAULT since v0.8.6 (#402 P0 survivability).
|
||||
// Long-running sessions need automatic compaction to stay
|
||||
// within the model's context budget. Users who prefer the
|
||||
// previous behaviour can opt out via `auto_compact = false`
|
||||
// in settings or `compaction.enabled = false` in config.
|
||||
// ON BY DEFAULT since v0.8.6 (#402 P0 survivability) — but the
|
||||
// engine-level `auto_compact` setting was flipped OFF in v0.8.11
|
||||
// (#665) so this default is mostly a fallback for code paths
|
||||
// that build a `CompactionConfig` without going through
|
||||
// `compaction_threshold_for_model_and_effort`. Real per-model
|
||||
// values are still derived through that helper.
|
||||
enabled: true,
|
||||
token_threshold: 50000,
|
||||
// v0.8.11: 50K was a 128K-era leftover that biased every
|
||||
// unconfigured caller toward "compact almost immediately on V4."
|
||||
// Bumped to 800K (80% of V4's 1M window) so the dead-code
|
||||
// default no longer lies. Real call sites override this via
|
||||
// `compaction_threshold_for_model_and_effort`.
|
||||
token_threshold: 800_000,
|
||||
message_threshold: 50,
|
||||
model: DEFAULT_TEXT_MODEL.to_string(),
|
||||
cache_summary: true,
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Hard floor for automatic compaction in v0.8.11+.
|
||||
///
|
||||
/// Below this token count, `should_compact` returns `false` regardless of
|
||||
/// `enabled`, `token_threshold`, or `message_threshold`. The point of the
|
||||
/// floor is V4 prefix-cache economics: compaction rewrites the stable
|
||||
/// prefix, which destroys the KV cache. At low token counts the prefix
|
||||
/// cache is healthy and compaction's cost (full re-prefill at miss prices)
|
||||
/// dwarfs its benefit (a tiny budget reclaim). Above the floor compaction
|
||||
/// can still be net-positive — cache is already pressured, the prefix has
|
||||
/// drifted, and freeing budget matters.
|
||||
///
|
||||
/// Manual `/compact` slash command and the model-callable `compact_now`
|
||||
/// tool both bypass this floor with a deliberate refusal message — they
|
||||
/// represent explicit agency rather than implicit policy.
|
||||
///
|
||||
/// Constant rather than configurable for v0.8.11. If anyone needs to dial
|
||||
/// it (smaller models, opinionated workflows), we can add a setting later.
|
||||
pub const MINIMUM_AUTO_COMPACTION_TOKENS: usize = 500_000;
|
||||
|
||||
pub const KEEP_RECENT_MESSAGES: usize = 4;
|
||||
const RECENT_WORKING_SET_WINDOW: usize = 12;
|
||||
const MAX_WORKING_SET_PATHS: usize = 24;
|
||||
@@ -585,6 +618,21 @@ pub fn should_compact(
|
||||
return false;
|
||||
}
|
||||
|
||||
// v0.8.11: hard floor enforcement. Below the floor (default 500K tokens
|
||||
// — see `MINIMUM_AUTO_COMPACTION_TOKENS`), automatic compaction is
|
||||
// refused because rewriting the prefix kills V4's prefix cache for
|
||||
// little budget recovery. Manual `/compact` and the `compact_now` tool
|
||||
// bypass this floor by going through different code paths.
|
||||
if config.auto_floor_tokens > 0 {
|
||||
let total_session_tokens: usize = messages
|
||||
.iter()
|
||||
.map(|m| estimate_tokens_for_message(m, false))
|
||||
.sum();
|
||||
if total_session_tokens < config.auto_floor_tokens {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
let plan = plan_compaction(
|
||||
messages,
|
||||
workspace,
|
||||
@@ -1445,6 +1493,9 @@ mod tests {
|
||||
enabled: true,
|
||||
token_threshold: 1_000_000, // Very high
|
||||
message_threshold: 5,
|
||||
// Disable the v0.8.11 500K floor so this test exercises the
|
||||
// pure message-count threshold logic at small fixture sizes.
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1585,6 +1636,7 @@ mod tests {
|
||||
enabled: true,
|
||||
token_threshold: 1_000_000,
|
||||
message_threshold: 5,
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1603,6 +1655,7 @@ mod tests {
|
||||
enabled: true,
|
||||
token_threshold: 50,
|
||||
message_threshold: 50,
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1874,6 +1927,7 @@ mod tests {
|
||||
enabled: true,
|
||||
token_threshold: 100, // Low threshold for testing
|
||||
message_threshold: 1000, // High message threshold
|
||||
auto_floor_tokens: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
@@ -1901,6 +1955,65 @@ mod tests {
|
||||
assert!(!should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
/// v0.8.11: the 500K hard floor blocks auto-compaction even when the
|
||||
/// token-percentage threshold would otherwise fire. This is the V4
|
||||
/// prefix-cache protection — below 500K total tokens, rewriting the
|
||||
/// prefix loses cache for tiny budget gains.
|
||||
#[test]
|
||||
fn auto_compaction_floor_blocks_below_500k_even_when_threshold_says_yes() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 100, // would normally fire instantly
|
||||
message_threshold: 1000, // not the trigger
|
||||
// Use the production default explicitly so this test pins the
|
||||
// floor's contract rather than relying on `Default`.
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let messages: Vec<Message> = (0..10).map(|_| msg("user", &"x".repeat(50))).collect();
|
||||
// Total tokens way under 500K, so floor blocks compaction.
|
||||
assert!(!should_compact(&messages, &config, None, None, None));
|
||||
}
|
||||
|
||||
/// v0.8.11: when total tokens cross the 500K floor, the existing
|
||||
/// threshold/message-count logic takes over again.
|
||||
#[test]
|
||||
fn auto_compaction_floor_yields_to_threshold_logic_above_500k() {
|
||||
let config = CompactionConfig {
|
||||
enabled: true,
|
||||
token_threshold: 2_000_000,
|
||||
message_threshold: 2_000,
|
||||
auto_floor_tokens: MINIMUM_AUTO_COMPACTION_TOKENS,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Each message ~500 tokens; 1100 messages → ~550K total tokens.
|
||||
// That's above the floor (500K) AND below the deliberately high
|
||||
// token_threshold, so auto-compaction stays off — by threshold,
|
||||
// not floor.
|
||||
let messages: Vec<Message> = (0..1100).map(|_| msg("user", &"x".repeat(2000))).collect();
|
||||
assert!(!should_compact(&messages, &config, None, None, None));
|
||||
|
||||
// Crank threshold below total → compaction fires now that we're
|
||||
// past the floor.
|
||||
let config_lower = CompactionConfig {
|
||||
token_threshold: 100_000,
|
||||
..config
|
||||
};
|
||||
assert!(should_compact(&messages, &config_lower, None, None, None));
|
||||
}
|
||||
|
||||
/// `CompactionConfig::default()` ships with the 500K floor on by
|
||||
/// default — production callers via `..Default::default()` get the
|
||||
/// safety guarantee automatically.
|
||||
#[test]
|
||||
fn compaction_config_default_carries_500k_floor() {
|
||||
let config = CompactionConfig::default();
|
||||
assert_eq!(config.auto_floor_tokens, MINIMUM_AUTO_COMPACTION_TOKENS);
|
||||
assert_eq!(config.auto_floor_tokens, 500_000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_plan_compaction_pins_error_messages() {
|
||||
let messages = vec![
|
||||
|
||||
Reference in New Issue
Block a user