Disable default compaction and opt in context seams

This commit is contained in:
Hunter Bown
2026-04-29 09:12:20 -05:00
parent 00c92e1c2a
commit 41e8f2b5b2
14 changed files with 265 additions and 114 deletions
+17 -4
View File
@@ -195,22 +195,35 @@ exponential_base = 2.0
# There is no config-file `[compaction]` table yet; detailed thresholds are
# chosen by the TUI from the active model/context budget.
# Append-only Flash seams are experimental and opt-in while the v0.7.5
# context/cache audit validates prefix-cache behavior.
[context]
enabled = false
verbatim_window_turns = 16
l1_threshold = 192000
l2_threshold = 384000
l3_threshold = 576000
cycle_threshold = 768000
seam_model = "deepseek-v4-flash"
# ─────────────────────────────────────────────────────────────────────────────────
# Capacity Controller (runtime pressure guardrails)
# ─────────────────────────────────────────────────────────────────────────────────
[capacity]
enabled = true
low_risk_max = 0.34
enabled = false
low_risk_max = 0.50
medium_risk_max = 0.62
severe_min_slack = -0.25
severe_violation_ratio = 0.40
refresh_cooldown_turns = 2
refresh_cooldown_turns = 6
replan_cooldown_turns = 5
max_replay_per_turn = 1
min_turns_before_guardrail = 2
min_turns_before_guardrail = 4
profile_window = 8
deepseek_v3_2_chat_prior = 3.9
deepseek_v3_2_reasoner_prior = 4.1
deepseek_v4_pro_prior = 3.5
deepseek_v4_flash_prior = 4.2
fallback_default_prior = 3.8
# ─────────────────────────────────────────────────────────────────────────────────
+38 -17
View File
@@ -436,9 +436,10 @@ impl RetryPolicy {
}
/// Context management configuration (append-only layered context with Flash seams).
#[derive(Debug, Clone, Deserialize)]
#[derive(Debug, Clone, Deserialize, Default)]
pub struct ContextConfig {
/// Master enable for layered context management. Default: true.
/// Master enable for layered context management. Default: false while
/// v0.7.5 audits V4 prefix-cache behavior.
#[serde(default)]
pub enabled: Option<bool>,
/// Verbatim window: last N turns never summarized. Default: 16.
@@ -475,21 +476,6 @@ pub struct PerModelContextConfig {
pub cycle_threshold: Option<usize>,
}
impl Default for ContextConfig {
fn default() -> Self {
Self {
enabled: Some(true),
verbatim_window_turns: Some(16),
l1_threshold: Some(192_000),
l2_threshold: Some(384_000),
l3_threshold: Some(576_000),
cycle_threshold: Some(768_000),
seam_model: Some("deepseek-v4-flash".to_string()),
per_model: None,
}
}
}
/// Resolved CLI configuration, including defaults and environment overrides.
#[derive(Debug, Clone, Default, Deserialize)]
pub struct Config {
@@ -2480,6 +2466,41 @@ mod tests {
assert!(normalize_model_name("").is_none());
}
#[test]
fn default_context_seams_are_opt_in() {
let config = Config::default();
assert!(!config.context.enabled.unwrap_or(false));
assert_eq!(config.context.l1_threshold.unwrap_or(192_000), 192_000);
assert_eq!(config.context.cycle_threshold.unwrap_or(768_000), 768_000);
assert_eq!(
config
.context
.seam_model
.as_deref()
.unwrap_or("deepseek-v4-flash"),
"deepseek-v4-flash"
);
}
#[test]
fn profile_without_context_does_not_disable_base_context() {
let mut profiles = HashMap::new();
profiles.insert("work".to_string(), Config::default());
let config = ConfigFile {
base: Config {
context: ContextConfig {
enabled: Some(true),
..Default::default()
},
..Default::default()
},
profiles: Some(profiles),
};
let merged = apply_profile(config, Some("work")).expect("profile");
assert_eq!(merged.context.enabled, Some(true));
}
#[test]
fn validate_accepts_future_deepseek_model_id() -> Result<()> {
let config = Config {
+32
View File
@@ -618,6 +618,38 @@ mod tests {
assert_eq!(decide_policy(&cfg, &snap), GuardrailAction::VerifyAndReplan);
}
#[test]
fn default_controller_is_disabled_and_does_not_observe() {
let cfg = CapacityControllerConfig::default();
assert!(!cfg.enabled);
let mut controller = CapacityController::new(cfg);
let snapshot = controller.observe_pre_turn(CapacityObservationInput {
turn_index: 1,
model: "deepseek-v4-pro".to_string(),
action_count_this_turn: 10,
tool_calls_recent_window: 10,
unique_reference_ids_recent_window: 10,
context_used_ratio: 0.95,
});
assert!(snapshot.is_none());
let decision = controller.decide(1, snapshot.as_ref());
assert_eq!(decision.action, GuardrailAction::NoIntervention);
assert_eq!(decision.reason, "capacity_controller_disabled");
}
#[test]
fn app_config_without_capacity_keeps_controller_disabled() {
let cfg = CapacityControllerConfig::from_app_config(&crate::config::Config::default());
assert!(!cfg.enabled);
assert_eq!(cfg.low_risk_max, 0.50);
assert_eq!(cfg.refresh_cooldown_turns, 6);
assert_eq!(cfg.min_turns_before_guardrail, 4);
assert_eq!(cfg.model_priors.get("deepseek_v4_pro"), Some(&3.5));
assert_eq!(cfg.model_priors.get("deepseek_v4_flash"), Some(&4.2));
}
#[test]
fn normalize_v4_pro_variants() {
assert_eq!(
+8 -7
View File
@@ -94,7 +94,8 @@ pub struct EngineConfig {
/// is **disabled by default**; the checkpoint-restart cycle architecture
/// (`cycle_manager`) replaces it. The compaction config is still wired through
/// for the per-tool-result truncation path (`compact_tool_result_for_context`)
/// and for users who explicitly opt back in via `[compaction] enabled = true`.
/// and for users who explicitly opt back in through the `auto_compact`
/// setting or a direct engine config.
pub compaction: CompactionConfig,
/// Checkpoint-restart cycle settings (issue #124).
pub cycle: CycleConfig,
@@ -270,8 +271,8 @@ pub struct Engine {
shared_cancel_token: Arc<StdMutex<CancellationToken>>,
tool_exec_lock: Arc<RwLock<()>>,
capacity_controller: CapacityController,
/// Append-only layered context manager (#159). Produces soft seams at
/// 192K/384K/576K and Flash-cycle briefings at 768K.
/// Append-only layered context manager (#159). Opt-in for v0.7.5 while
/// cache-hit behavior is audited.
seam_manager: Option<SeamManager>,
coherence_state: CoherenceState,
turn_counter: u64,
@@ -1271,12 +1272,12 @@ impl Engine {
let shell_manager = new_shared_shell_manager(config.workspace.clone());
let capacity_controller = CapacityController::new(config.capacity.clone());
// Create Flash seam manager for layered context (#159). Uses the same
// API credentials as the main client but targets the Flash model for
// cost-effective summarisation and cycle briefing work.
// Create Flash seam manager for layered context (#159). v0.7.5 keeps
// this opt-in until the prefix-cache audit proves when seam production
// is worth the extra request and transcript mutation.
let seam_manager = deepseek_client.as_ref().map(|main_client| {
let seam_config = SeamConfig {
enabled: api_config.context.enabled.unwrap_or(true),
enabled: api_config.context.enabled.unwrap_or(false),
verbatim_window_turns: api_config
.context
.verbatim_window_turns
+3 -4
View File
@@ -212,10 +212,9 @@ impl Engine {
// model sees compile errors before its next reasoning step.
self.flush_pending_lsp_diagnostics().await;
// #159: layered context seam checkpoint. Produces soft seams at
// 192K/384K/576K via Flash and appends <archived_context> blocks
// so the model can navigate deep history without losing prefix
// cache affinity.
// #159: layered context seam checkpoint. This is opt-in for
// v0.7.5 while #200 audits cache-hit behavior; when enabled it
// appends <archived_context> blocks rather than replacing history.
self.layered_context_checkpoint().await;
// Build the request
+9 -8
View File
@@ -2,8 +2,9 @@
//!
//! ## Why
//!
//! DeepSeek V4's empirical retrieval elbow is 128K tokens (paper Figure 9 —
//! 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76, 512K/0.66, 1M/0.59). Lossy
//! DeepSeek V4's empirical retrieval degradation begins around the 256K band
//! (paper Figure 9: 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76,
//! 512K/0.66, 1M/0.59). Lossy
//! summarization compaction creates a "Frankenstein" context — half verbatim,
//! half paraphrased — that the model cannot tell apart, so it treats the
//! summary as if it were verbatim and confabulates around the gaps.
@@ -28,9 +29,9 @@
//!
//! ## Trigger
//!
//! - Token threshold: **768K** by default (~75% of the 1M window). Soft seams
//! at 192K/384K/576K (layered context manager, #159) handle intermediate
//! thresholds. The hard cycle only fires near the wall.
//! - Token threshold: **768K** by default (~75% of the 1M window). This is a
//! rare overflow safety net. Optional soft seams at 192K/384K/576K are
//! controlled by the opt-in layered context manager (#159).
//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
//! boundaries (no in-flight tool, no streaming, no approval modal).
//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
@@ -58,9 +59,9 @@ const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1;
/// Default token threshold at which a cycle boundary fires.
///
/// Bumped from 110K (pre-#159) to 768K (~75% of 1M window) in v0.7.2.
/// The layered context manager (#159) handles intermediate thresholds via
/// soft seams at 192K/384K/576K, so the hard cycle only fires near the wall.
/// Bumped from 110K to 768K (~75% of 1M window). The layered context manager
/// (#159) can add opt-in soft seams at 192K/384K/576K; the hard cycle remains
/// a near-wall safety net.
pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 768_000;
/// Default cap on the model-curated briefing block.
+2 -2
View File
@@ -2940,8 +2940,8 @@ async fn run_exec_agent(
// Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle
// architecture (issue #124) handles long-context resets via fresh contexts
// rather than progressive summarization. The compaction config is still
// wired through so users who explicitly opt back in via [compaction]
// enabled = true keep their old behavior.
// wired through so users who explicitly opt back in through TUI settings
// or direct engine config keep their old behavior.
let compaction = CompactionConfig {
enabled: false,
model: model.to_string(),
+13 -42
View File
@@ -274,47 +274,18 @@ pub fn compaction_threshold_for_model(model: &str) -> usize {
usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD)
}
/// Mode-aware soft context caps for V4 models.
///
/// DeepSeek V4 paper Figure 9 shows retrieval quality (MRCR MMR) collapses as
/// context grows: 0.90 at 8K, 0.94 at 32K, 0.92 at 128K, 0.66 at 512K, 0.49
/// at 1M. The paper's own eval harness uses budget tiers per §5.3.1:
///
/// | Mode / Reasoning tier | Soft cap | Paper eval window |
/// |-----------------------|----------|-------------------|
/// | Plan / Non-Think (off) | 64,000 | 8K-128K |
/// | Agent / High | 192,000 | 128K |
/// | YOLO / Max | 384,000 | 384K-512K |
///
/// These caps keep the agent inside the regime DeepSeek tuned for, triggering
/// compaction *before* recall quality degrades. The 1M hard ceiling remains —
/// users can override via config or by declining the /compact suggestion.
pub const V4_PLAN_SOFT_CAP: usize = 64_000;
pub const V4_AGENT_SOFT_CAP: usize = 192_000;
pub const V4_YOLO_SOFT_CAP: usize = 384_000;
/// Compaction threshold keyed by model and caller-supplied effort tier.
///
/// For V4-family models the threshold is a mode-aware soft cap (see constants
/// above). For all other models the legacy 80%-of-window rule applies.
/// Replacement-style compaction rewrites the stable prefix, which works against
/// DeepSeek V4 prefix-cache economics. Reasoning effort must not lower V4's
/// automatic replacement threshold; V4-family models use the same late
/// 80%-of-window guard as `compaction_threshold_for_model`.
#[must_use]
pub fn compaction_threshold_for_model_and_effort(
model: &str,
reasoning_effort: Option<&str>,
_reasoning_effort: Option<&str>,
) -> usize {
let lower = model.to_lowercase();
if !lower.contains("deepseek")
|| !(lower.contains("v4") || is_current_deepseek_v4_alias(&lower))
{
return compaction_threshold_for_model(model);
}
match reasoning_effort.map(str::trim).filter(|s| !s.is_empty()) {
Some("off" | "disabled" | "none" | "false") => V4_PLAN_SOFT_CAP,
Some("low" | "medium" | "high") => V4_AGENT_SOFT_CAP,
Some("max" | "maximum" | "xhigh") => V4_YOLO_SOFT_CAP,
_ => V4_AGENT_SOFT_CAP,
}
compaction_threshold_for_model(model)
}
/// Derive a compaction message-count threshold from model context window.
@@ -503,18 +474,18 @@ mod tests {
}
#[test]
fn v4_mode_aware_soft_caps() {
fn v4_replacement_compaction_ignores_reasoning_effort() {
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("off")),
V4_PLAN_SOFT_CAP
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("high")),
V4_AGENT_SOFT_CAP
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("max")),
V4_YOLO_SOFT_CAP
800_000
);
}
@@ -531,14 +502,14 @@ mod tests {
}
#[test]
fn v4_soft_cap_defaults_to_agent_when_effort_unknown() {
fn v4_replacement_compaction_defaults_to_late_guard_when_effort_unknown() {
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", None),
V4_AGENT_SOFT_CAP
800_000
);
assert_eq!(
compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("unknown")),
V4_AGENT_SOFT_CAP
800_000
);
}
}
+1 -1
View File
@@ -47,7 +47,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`
You run on V4 architecture. Understanding the internals helps you self-manage:
**Degradation curve.** Retrieval quality holds well to ~256K tokens, then degrades rapidly. Keep your active working set below ~256K. Older verbatim messages persist but are harder to retrieve accurately — treat `<archived_context>` seams as navigational markers, not a working-memory substitute.
**Degradation curve.** Retrieval quality holds well through large V4 contexts and remains usable deep into the 1M window. Do not summarize or delete earlier turns just because the transcript has crossed an older 128K-era threshold. Prefer appending stable evidence and suggest `/compact` only near real pressure or when the user asks.
**Prefix cache economics.** V4 caches shared prefixes at 128-token granularity with ~90% cost discount. Prefer appending to existing messages over mutating old ones — deletion or replacement breaks the cache and increases cost. Structure output to maximize prefix reuse across turns.
+2 -1
View File
@@ -12,7 +12,8 @@
//! `<archived_context>` summary blocks produced by V4 Flash. These blocks
//! are *navigational aids* — the model reads them first, then drills into
//! verbatim messages when precision is needed. The prefix cache stays hot
//! for the entire stable prefix.
//! for the entire stable prefix. In v0.7.5 this manager is opt-in while the
//! cache/timing policy is audited.
//!
//! ## Soft seam levels
//!
+26 -3
View File
@@ -13,7 +13,7 @@ use crate::config::{expand_path, normalize_model_name};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(default)]
pub struct Settings {
/// Auto-compact conversations when they get long
/// Auto-compact conversations when they approach the model limit.
pub auto_compact: bool,
/// Reduce status noise and collapse details more aggressively
pub calm_mode: bool,
@@ -50,7 +50,7 @@ pub struct Settings {
impl Default for Settings {
fn default() -> Self {
Self {
auto_compact: true,
auto_compact: false,
calm_mode: false,
low_motion: false,
fancy_animations: false,
@@ -288,7 +288,10 @@ impl Settings {
#[allow(dead_code)]
pub fn available_settings() -> Vec<(&'static str, &'static str)> {
vec![
("auto_compact", "Auto-compact conversations: on/off"),
(
"auto_compact",
"Auto-compact near context limit: on/off (default off)",
),
("calm_mode", "Calmer UI defaults: on/off"),
("low_motion", "Reduce animation and redraw churn: on/off"),
(
@@ -377,3 +380,23 @@ fn normalize_sidebar_focus(value: &str) -> &str {
_ => "auto",
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_settings_preserve_v4_prefix_cache_by_default() {
let settings = Settings::default();
assert!(!settings.auto_compact);
}
#[test]
fn auto_compact_remains_explicitly_configurable() {
let mut settings = Settings::default();
settings.set("auto_compact", "on").expect("enable");
assert!(settings.auto_compact);
settings.set("auto_compact", "off").expect("disable");
assert!(!settings.auto_compact);
}
}
+25 -9
View File
@@ -117,6 +117,8 @@ These override config values:
- `DEEPSEEK_CAPACITY_PROFILE_WINDOW`
- `DEEPSEEK_CAPACITY_PRIOR_CHAT`
- `DEEPSEEK_CAPACITY_PRIOR_REASONER`
- `DEEPSEEK_CAPACITY_PRIOR_V4_PRO`
- `DEEPSEEK_CAPACITY_PRIOR_V4_FLASH`
- `DEEPSEEK_CAPACITY_PRIOR_FALLBACK`
## Settings File (Persistent UI Preferences)
@@ -125,14 +127,17 @@ DeepSeek TUI also stores user preferences in:
- `~/.config/deepseek/settings.toml`
Notable settings include `auto_compact` (default `true`), which automatically summarizes
earlier turns once the conversation grows large. You can inspect or update these from the
TUI with `/settings` and `/config` (interactive editor).
Notable settings include `auto_compact` (default `false`), which opts into
replacement-style summarization only near the active model limit. The default
V4 path preserves the stable message prefix for cache reuse; use manual
`/compact` or enable `auto_compact` only when you explicitly want automatic
replacement compaction. You can inspect or update these from the TUI with
`/settings` and `/config` (interactive editor).
Common settings keys:
- `theme` (default, dark, light, whale)
- `auto_compact` (on/off)
- `auto_compact` (on/off, default off)
- `show_thinking` (on/off)
- `show_tool_details` (on/off)
- `default_mode` (agent, plan, yolo; legacy `normal` is accepted and normalized to `agent`)
@@ -188,25 +193,36 @@ If you are upgrading from older releases:
- `[snapshots].enabled` (bool, default `true`)
- `[snapshots].max_age_days` (int, default `7`)
- snapshots live under `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git` and never use the workspace's own `.git` directory
- `context.*` (optional): append-only Flash seam manager, currently opt-in:
- `[context].enabled` (bool, default `false`)
- `[context].verbatim_window_turns` (int, default `16`)
- `[context].l1_threshold` (int, default `192000`)
- `[context].l2_threshold` (int, default `384000`)
- `[context].l3_threshold` (int, default `576000`)
- `[context].cycle_threshold` (int, default `768000`)
- `[context].seam_model` (string, default `deepseek-v4-flash`)
- `retry.*` (optional): retry/backoff settings for API requests:
- `[retry].enabled` (bool, default `true`)
- `[retry].max_retries` (int, default `3`)
- `[retry].initial_delay` (float seconds, default `1.0`)
- `[retry].max_delay` (float seconds, default `60.0`)
- `[retry].exponential_base` (float, default `2.0`)
- `capacity.*` (optional): runtime context-capacity controller:
- `[capacity].enabled` (bool, default `true`)
- `[capacity].low_risk_max` (float, default `0.34`)
- `capacity.*` (optional): runtime context-capacity controller. This is opt-in
because its active interventions can rewrite the live transcript.
- `[capacity].enabled` (bool, default `false`)
- `[capacity].low_risk_max` (float, default `0.50`)
- `[capacity].medium_risk_max` (float, default `0.62`)
- `[capacity].severe_min_slack` (float, default `-0.25`)
- `[capacity].severe_violation_ratio` (float, default `0.40`)
- `[capacity].refresh_cooldown_turns` (int, default `2`)
- `[capacity].refresh_cooldown_turns` (int, default `6`)
- `[capacity].replan_cooldown_turns` (int, default `5`)
- `[capacity].max_replay_per_turn` (int, default `1`)
- `[capacity].min_turns_before_guardrail` (int, default `2`)
- `[capacity].min_turns_before_guardrail` (int, default `4`)
- `[capacity].profile_window` (int, default `8`)
- `[capacity].deepseek_v3_2_chat_prior` (float, default `3.9`)
- `[capacity].deepseek_v3_2_reasoner_prior` (float, default `4.1`)
- `[capacity].deepseek_v4_pro_prior` (float, default `3.5`)
- `[capacity].deepseek_v4_flash_prior` (float, default `4.2`)
- `[capacity].fallback_default_prior` (float, default `3.8`)
- `tui.alternate_screen` (string, optional): `auto`, `always`, or `never`. `auto` disables the alternate screen in Zellij; `--no-alt-screen` forces inline mode. Set `never` or run with `--no-alt-screen` when you want real terminal scrollback.
- `tui.mouse_capture` (bool, optional, default `true` when the alternate screen is active): enable internal mouse scrolling/transcript selection. Set this to `false` or run with `--no-mouse-capture` for terminal-native drag selection and highlight-to-copy.
+61
View File
@@ -0,0 +1,61 @@
# v0.7.5 Implementation Plan
Scope: background shell job UX, in-TUI MCP management/discovery, and V4
context/cache policy. Do not include provider expansion or Whalescale
rename/migration work in this release lane.
## Context/cache decision
Default path:
- Keep the transcript append-only and preserve the stable prefix for DeepSeek V4 cache reuse.
- Disable replacement-style `auto_compact` by default.
- Keep replacement compaction manual or late: if a user enables `auto_compact`, V4 compacts only near the 80% model-window guard (`800000` tokens for 1M-context models), not at reasoning-effort soft caps.
- Keep the Flash seam manager (`[context].enabled`) opt-in until issue #200 has repeatable cache-hit/miss evidence.
- Keep the capacity controller disabled by default. Treat it as telemetry or an experimental guardrail unless `capacity.enabled = true` is set.
- Use emergency overflow recovery only when the request would otherwise exceed the model input budget.
Rationale: V4's 1M-token window and prefix-cache economics make early
replacement compaction suspect. The first shippable slice should prevent old
128K-era heuristics from rewriting context before there is evidence that the
rewrite is cheaper and more reliable than preserving a hot prefix.
## Shippable slices
### Slice 1: Context policy and docs
- Change default `auto_compact` to off.
- Keep V4 replacement-compaction thresholds late and independent of reasoning effort.
- Make `[context].enabled` default to false.
- Make `docs/CONFIGURATION.md`, `docs/capacity_controller.md`, and `config.example.toml` match code defaults.
- Add focused tests for defaults and V4 threshold behavior.
### Slice 2: Background shell job center (#195)
- Add a job-center view fed by `ShellManager::list()`.
- Show command, cwd, linked task id when available, status, elapsed time, exit code, and latest output.
- Add controls to inspect full output, poll latest output, send stdin for PTY/stdin-capable jobs, kill a background job, and attach completed output as task evidence.
- Mark restart-stale jobs explicitly rather than presenting them as live.
- Add lifecycle tests for start, poll, cancel, complete, stale/restart, plus TUI snapshots for running and completed job details.
### Slice 3: MCP manager (#196)
- Add `/mcp` or a command-palette action that opens an MCP manager view.
- Show resolved config path, server enabled/disabled state, transport, command/url, timeout settings, startup errors, and discovered tool/resource/prompt counts.
- Wire `mcp_config_path` into the interactive config surface.
- Support init, add stdio server, add HTTP/SSE server, enable, disable, remove, validate, reconnect, and inspect tools/resources/prompts.
- Preserve both `servers` and `mcpServers` config shapes.
### Slice 4: MCP discoverability (#197)
- Add an MCP command-palette section backed by the same discovery state as the manager.
- Group tools/resources/prompts by server.
- Show disabled/failed servers without blocking palette rendering.
- Keep model-visible names consistent with `mcp_<server>_<tool>`.
## Stop rules
- Do not close #159 or #162 unless a verified PR actually resolves them.
- Do not add provider expansion.
- Do not rename or migrate anything to Whalescale.
- Do not broaden the TUI into a large redesign; each slice should remain independently testable and shippable.
+28 -16
View File
@@ -1,6 +1,9 @@
# Capacity Controller
`deepseek-tui` includes a capacity-aware context controller that keeps active prompt context near coherent operating range while preserving full history on disk.
`deepseek-tui` includes an opt-in capacity-aware context controller. In the
default V4 path it is disabled, because its active interventions can rewrite
the live prompt and break prefix-cache affinity. Treat it as telemetry or an
experimental guardrail unless `capacity.enabled = true` is set explicitly.
## Policy Overview
@@ -28,6 +31,8 @@ Per-model priors:
- `deepseek_v3_2_chat = 3.9`
- `deepseek_v3_2_reasoner = 4.1`
- `deepseek_v4_pro = 3.5`
- `deepseek_v4_flash = 4.2`
- fallback `3.8` (used for other DeepSeek IDs, including future releases)
### Failure Probability
@@ -52,7 +57,7 @@ Risk bands:
- medium: `p_fail <= medium_risk_max`
- high: otherwise
Action mapping:
Action mapping when the controller is explicitly enabled:
- low -> `NoIntervention`
- medium -> `TargetedContextRefresh`
@@ -61,7 +66,7 @@ Action mapping:
## Checkpoints
The engine evaluates controller policy at:
When enabled, the engine evaluates controller policy at:
1. Pre-request checkpoint (before `MessageRequest` assembly).
2. Post-tool checkpoint (after tool result append).
@@ -69,6 +74,11 @@ The engine evaluates controller policy at:
## Interventions
Interventions are not part of the default v0.7.5 V4 path. The default path is:
append messages, preserve prefix-cache reuse, suggest manual `/compact` near
real model pressure, and use overflow recovery only if the request would exceed
the model input budget.
### `TargetedContextRefresh`
- Runs compaction (`compact_messages_safe`) when possible.
@@ -119,18 +129,20 @@ Loader utility supports fetching last `K` snapshots for rehydration.
`[capacity]` keys:
- `enabled`
- `low_risk_max`
- `medium_risk_max`
- `severe_min_slack`
- `severe_violation_ratio`
- `refresh_cooldown_turns`
- `replan_cooldown_turns`
- `max_replay_per_turn`
- `min_turns_before_guardrail`
- `profile_window`
- `deepseek_v3_2_chat_prior`
- `deepseek_v3_2_reasoner_prior`
- `fallback_default_prior`
- `enabled` (default `false`)
- `low_risk_max` (default `0.50`)
- `medium_risk_max` (default `0.62`)
- `severe_min_slack` (default `-0.25`)
- `severe_violation_ratio` (default `0.40`)
- `refresh_cooldown_turns` (default `6`)
- `replan_cooldown_turns` (default `5`)
- `max_replay_per_turn` (default `1`)
- `min_turns_before_guardrail` (default `4`)
- `profile_window` (default `8`)
- `deepseek_v3_2_chat_prior` (default `3.9`)
- `deepseek_v3_2_reasoner_prior` (default `4.1`)
- `deepseek_v4_pro_prior` (default `3.5`)
- `deepseek_v4_flash_prior` (default `4.2`)
- `fallback_default_prior` (default `3.8`)
Equivalent environment overrides are available with `DEEPSEEK_CAPACITY_*`.