diff --git a/config.example.toml b/config.example.toml index 74e92e19..8c8d054b 100644 --- a/config.example.toml +++ b/config.example.toml @@ -195,22 +195,35 @@ exponential_base = 2.0 # There is no config-file `[compaction]` table yet; detailed thresholds are # chosen by the TUI from the active model/context budget. +# Append-only Flash seams are experimental and opt-in while the v0.7.5 +# context/cache audit validates prefix-cache behavior. +[context] +enabled = false +verbatim_window_turns = 16 +l1_threshold = 192000 +l2_threshold = 384000 +l3_threshold = 576000 +cycle_threshold = 768000 +seam_model = "deepseek-v4-flash" + # ───────────────────────────────────────────────────────────────────────────────── # Capacity Controller (runtime pressure guardrails) # ───────────────────────────────────────────────────────────────────────────────── [capacity] -enabled = true -low_risk_max = 0.34 +enabled = false +low_risk_max = 0.50 medium_risk_max = 0.62 severe_min_slack = -0.25 severe_violation_ratio = 0.40 -refresh_cooldown_turns = 2 +refresh_cooldown_turns = 6 replan_cooldown_turns = 5 max_replay_per_turn = 1 -min_turns_before_guardrail = 2 +min_turns_before_guardrail = 4 profile_window = 8 deepseek_v3_2_chat_prior = 3.9 deepseek_v3_2_reasoner_prior = 4.1 +deepseek_v4_pro_prior = 3.5 +deepseek_v4_flash_prior = 4.2 fallback_default_prior = 3.8 # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 7cb19100..1af64f15 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -436,9 +436,10 @@ impl RetryPolicy { } /// Context management configuration (append-only layered context with Flash seams). -#[derive(Debug, Clone, Deserialize)] +#[derive(Debug, Clone, Deserialize, Default)] pub struct ContextConfig { - /// Master enable for layered context management. Default: true. + /// Master enable for layered context management. Default: false while + /// v0.7.5 audits V4 prefix-cache behavior. #[serde(default)] pub enabled: Option, /// Verbatim window: last N turns never summarized. Default: 16. @@ -475,21 +476,6 @@ pub struct PerModelContextConfig { pub cycle_threshold: Option, } -impl Default for ContextConfig { - fn default() -> Self { - Self { - enabled: Some(true), - verbatim_window_turns: Some(16), - l1_threshold: Some(192_000), - l2_threshold: Some(384_000), - l3_threshold: Some(576_000), - cycle_threshold: Some(768_000), - seam_model: Some("deepseek-v4-flash".to_string()), - per_model: None, - } - } -} - /// Resolved CLI configuration, including defaults and environment overrides. #[derive(Debug, Clone, Default, Deserialize)] pub struct Config { @@ -2480,6 +2466,41 @@ mod tests { assert!(normalize_model_name("").is_none()); } + #[test] + fn default_context_seams_are_opt_in() { + let config = Config::default(); + assert!(!config.context.enabled.unwrap_or(false)); + assert_eq!(config.context.l1_threshold.unwrap_or(192_000), 192_000); + assert_eq!(config.context.cycle_threshold.unwrap_or(768_000), 768_000); + assert_eq!( + config + .context + .seam_model + .as_deref() + .unwrap_or("deepseek-v4-flash"), + "deepseek-v4-flash" + ); + } + + #[test] + fn profile_without_context_does_not_disable_base_context() { + let mut profiles = HashMap::new(); + profiles.insert("work".to_string(), Config::default()); + let config = ConfigFile { + base: Config { + context: ContextConfig { + enabled: Some(true), + ..Default::default() + }, + ..Default::default() + }, + profiles: Some(profiles), + }; + + let merged = apply_profile(config, Some("work")).expect("profile"); + assert_eq!(merged.context.enabled, Some(true)); + } + #[test] fn validate_accepts_future_deepseek_model_id() -> Result<()> { let config = Config { diff --git a/crates/tui/src/core/capacity.rs b/crates/tui/src/core/capacity.rs index 55547c9b..2964ec46 100644 --- a/crates/tui/src/core/capacity.rs +++ b/crates/tui/src/core/capacity.rs @@ -618,6 +618,38 @@ mod tests { assert_eq!(decide_policy(&cfg, &snap), GuardrailAction::VerifyAndReplan); } + #[test] + fn default_controller_is_disabled_and_does_not_observe() { + let cfg = CapacityControllerConfig::default(); + assert!(!cfg.enabled); + + let mut controller = CapacityController::new(cfg); + let snapshot = controller.observe_pre_turn(CapacityObservationInput { + turn_index: 1, + model: "deepseek-v4-pro".to_string(), + action_count_this_turn: 10, + tool_calls_recent_window: 10, + unique_reference_ids_recent_window: 10, + context_used_ratio: 0.95, + }); + + assert!(snapshot.is_none()); + let decision = controller.decide(1, snapshot.as_ref()); + assert_eq!(decision.action, GuardrailAction::NoIntervention); + assert_eq!(decision.reason, "capacity_controller_disabled"); + } + + #[test] + fn app_config_without_capacity_keeps_controller_disabled() { + let cfg = CapacityControllerConfig::from_app_config(&crate::config::Config::default()); + assert!(!cfg.enabled); + assert_eq!(cfg.low_risk_max, 0.50); + assert_eq!(cfg.refresh_cooldown_turns, 6); + assert_eq!(cfg.min_turns_before_guardrail, 4); + assert_eq!(cfg.model_priors.get("deepseek_v4_pro"), Some(&3.5)); + assert_eq!(cfg.model_priors.get("deepseek_v4_flash"), Some(&4.2)); + } + #[test] fn normalize_v4_pro_variants() { assert_eq!( diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 19ac5d19..7815b8f2 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -94,7 +94,8 @@ pub struct EngineConfig { /// is **disabled by default**; the checkpoint-restart cycle architecture /// (`cycle_manager`) replaces it. The compaction config is still wired through /// for the per-tool-result truncation path (`compact_tool_result_for_context`) - /// and for users who explicitly opt back in via `[compaction] enabled = true`. + /// and for users who explicitly opt back in through the `auto_compact` + /// setting or a direct engine config. pub compaction: CompactionConfig, /// Checkpoint-restart cycle settings (issue #124). pub cycle: CycleConfig, @@ -270,8 +271,8 @@ pub struct Engine { shared_cancel_token: Arc>, tool_exec_lock: Arc>, capacity_controller: CapacityController, - /// Append-only layered context manager (#159). Produces soft seams at - /// 192K/384K/576K and Flash-cycle briefings at 768K. + /// Append-only layered context manager (#159). Opt-in for v0.7.5 while + /// cache-hit behavior is audited. seam_manager: Option, coherence_state: CoherenceState, turn_counter: u64, @@ -1271,12 +1272,12 @@ impl Engine { let shell_manager = new_shared_shell_manager(config.workspace.clone()); let capacity_controller = CapacityController::new(config.capacity.clone()); - // Create Flash seam manager for layered context (#159). Uses the same - // API credentials as the main client but targets the Flash model for - // cost-effective summarisation and cycle briefing work. + // Create Flash seam manager for layered context (#159). v0.7.5 keeps + // this opt-in until the prefix-cache audit proves when seam production + // is worth the extra request and transcript mutation. let seam_manager = deepseek_client.as_ref().map(|main_client| { let seam_config = SeamConfig { - enabled: api_config.context.enabled.unwrap_or(true), + enabled: api_config.context.enabled.unwrap_or(false), verbatim_window_turns: api_config .context .verbatim_window_turns diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index a67af16c..f8a2ed6d 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -212,10 +212,9 @@ impl Engine { // model sees compile errors before its next reasoning step. self.flush_pending_lsp_diagnostics().await; - // #159: layered context seam checkpoint. Produces soft seams at - // 192K/384K/576K via Flash and appends blocks - // so the model can navigate deep history without losing prefix - // cache affinity. + // #159: layered context seam checkpoint. This is opt-in for + // v0.7.5 while #200 audits cache-hit behavior; when enabled it + // appends blocks rather than replacing history. self.layered_context_checkpoint().await; // Build the request diff --git a/crates/tui/src/cycle_manager.rs b/crates/tui/src/cycle_manager.rs index 27681ffb..293e1bc8 100644 --- a/crates/tui/src/cycle_manager.rs +++ b/crates/tui/src/cycle_manager.rs @@ -2,8 +2,9 @@ //! //! ## Why //! -//! DeepSeek V4's empirical retrieval elbow is 128K tokens (paper Figure 9 — -//! 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76, 512K/0.66, 1M/0.59). Lossy +//! DeepSeek V4's empirical retrieval degradation begins around the 256K band +//! (paper Figure 9: 8K/0.90, 64K/0.87, 128K/0.85, 256K/0.76, +//! 512K/0.66, 1M/0.59). Lossy //! summarization compaction creates a "Frankenstein" context — half verbatim, //! half paraphrased — that the model cannot tell apart, so it treats the //! summary as if it were verbatim and confabulates around the gaps. @@ -28,9 +29,9 @@ //! //! ## Trigger //! -//! - Token threshold: **768K** by default (~75% of the 1M window). Soft seams -//! at 192K/384K/576K (layered context manager, #159) handle intermediate -//! thresholds. The hard cycle only fires near the wall. +//! - Token threshold: **768K** by default (~75% of the 1M window). This is a +//! rare overflow safety net. Optional soft seams at 192K/384K/576K are +//! controlled by the opt-in layered context manager (#159). //! - Phase guard: callers only invoke `should_advance_cycle` at clean turn //! boundaries (no in-flight tool, no streaming, no approval modal). //! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators @@ -58,9 +59,9 @@ const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1; /// Default token threshold at which a cycle boundary fires. /// -/// Bumped from 110K (pre-#159) to 768K (~75% of 1M window) in v0.7.2. -/// The layered context manager (#159) handles intermediate thresholds via -/// soft seams at 192K/384K/576K, so the hard cycle only fires near the wall. +/// Bumped from 110K to 768K (~75% of 1M window). The layered context manager +/// (#159) can add opt-in soft seams at 192K/384K/576K; the hard cycle remains +/// a near-wall safety net. pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 768_000; /// Default cap on the model-curated briefing block. diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 5f55e99b..0507b92b 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -2940,8 +2940,8 @@ async fn run_exec_agent( // Compaction defaults to disabled in v0.6.6: the checkpoint-restart cycle // architecture (issue #124) handles long-context resets via fresh contexts // rather than progressive summarization. The compaction config is still - // wired through so users who explicitly opt back in via [compaction] - // enabled = true keep their old behavior. + // wired through so users who explicitly opt back in through TUI settings + // or direct engine config keep their old behavior. let compaction = CompactionConfig { enabled: false, model: model.to_string(), diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 32f357f3..9a274c40 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -274,47 +274,18 @@ pub fn compaction_threshold_for_model(model: &str) -> usize { usize::try_from(threshold).unwrap_or(DEFAULT_COMPACTION_TOKEN_THRESHOLD) } -/// Mode-aware soft context caps for V4 models. -/// -/// DeepSeek V4 paper Figure 9 shows retrieval quality (MRCR MMR) collapses as -/// context grows: 0.90 at 8K, 0.94 at 32K, 0.92 at 128K, 0.66 at 512K, 0.49 -/// at 1M. The paper's own eval harness uses budget tiers per §5.3.1: -/// -/// | Mode / Reasoning tier | Soft cap | Paper eval window | -/// |-----------------------|----------|-------------------| -/// | Plan / Non-Think (off) | 64,000 | 8K-128K | -/// | Agent / High | 192,000 | 128K | -/// | YOLO / Max | 384,000 | 384K-512K | -/// -/// These caps keep the agent inside the regime DeepSeek tuned for, triggering -/// compaction *before* recall quality degrades. The 1M hard ceiling remains — -/// users can override via config or by declining the /compact suggestion. -pub const V4_PLAN_SOFT_CAP: usize = 64_000; -pub const V4_AGENT_SOFT_CAP: usize = 192_000; -pub const V4_YOLO_SOFT_CAP: usize = 384_000; - /// Compaction threshold keyed by model and caller-supplied effort tier. /// -/// For V4-family models the threshold is a mode-aware soft cap (see constants -/// above). For all other models the legacy 80%-of-window rule applies. +/// Replacement-style compaction rewrites the stable prefix, which works against +/// DeepSeek V4 prefix-cache economics. Reasoning effort must not lower V4's +/// automatic replacement threshold; V4-family models use the same late +/// 80%-of-window guard as `compaction_threshold_for_model`. #[must_use] pub fn compaction_threshold_for_model_and_effort( model: &str, - reasoning_effort: Option<&str>, + _reasoning_effort: Option<&str>, ) -> usize { - let lower = model.to_lowercase(); - if !lower.contains("deepseek") - || !(lower.contains("v4") || is_current_deepseek_v4_alias(&lower)) - { - return compaction_threshold_for_model(model); - } - - match reasoning_effort.map(str::trim).filter(|s| !s.is_empty()) { - Some("off" | "disabled" | "none" | "false") => V4_PLAN_SOFT_CAP, - Some("low" | "medium" | "high") => V4_AGENT_SOFT_CAP, - Some("max" | "maximum" | "xhigh") => V4_YOLO_SOFT_CAP, - _ => V4_AGENT_SOFT_CAP, - } + compaction_threshold_for_model(model) } /// Derive a compaction message-count threshold from model context window. @@ -503,18 +474,18 @@ mod tests { } #[test] - fn v4_mode_aware_soft_caps() { + fn v4_replacement_compaction_ignores_reasoning_effort() { assert_eq!( compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("off")), - V4_PLAN_SOFT_CAP + 800_000 ); assert_eq!( compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("high")), - V4_AGENT_SOFT_CAP + 800_000 ); assert_eq!( compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("max")), - V4_YOLO_SOFT_CAP + 800_000 ); } @@ -531,14 +502,14 @@ mod tests { } #[test] - fn v4_soft_cap_defaults_to_agent_when_effort_unknown() { + fn v4_replacement_compaction_defaults_to_late_guard_when_effort_unknown() { assert_eq!( compaction_threshold_for_model_and_effort("deepseek-v4-pro", None), - V4_AGENT_SOFT_CAP + 800_000 ); assert_eq!( compaction_threshold_for_model_and_effort("deepseek-v4-pro", Some("unknown")), - V4_AGENT_SOFT_CAP + 800_000 ); } } diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index da11fa1b..6ce7fce2 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -47,7 +47,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking` You run on V4 architecture. Understanding the internals helps you self-manage: -**Degradation curve.** Retrieval quality holds well to ~256K tokens, then degrades rapidly. Keep your active working set below ~256K. Older verbatim messages persist but are harder to retrieve accurately — treat `` seams as navigational markers, not a working-memory substitute. +**Degradation curve.** Retrieval quality holds well through large V4 contexts and remains usable deep into the 1M window. Do not summarize or delete earlier turns just because the transcript has crossed an older 128K-era threshold. Prefer appending stable evidence and suggest `/compact` only near real pressure or when the user asks. **Prefix cache economics.** V4 caches shared prefixes at 128-token granularity with ~90% cost discount. Prefer appending to existing messages over mutating old ones — deletion or replacement breaks the cache and increases cost. Structure output to maximize prefix reuse across turns. diff --git a/crates/tui/src/seam_manager.rs b/crates/tui/src/seam_manager.rs index 08273140..1ecc0c51 100644 --- a/crates/tui/src/seam_manager.rs +++ b/crates/tui/src/seam_manager.rs @@ -12,7 +12,8 @@ //! `` summary blocks produced by V4 Flash. These blocks //! are *navigational aids* — the model reads them first, then drills into //! verbatim messages when precision is needed. The prefix cache stays hot -//! for the entire stable prefix. +//! for the entire stable prefix. In v0.7.5 this manager is opt-in while the +//! cache/timing policy is audited. //! //! ## Soft seam levels //! diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index bb8d980b..26956b5d 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -13,7 +13,7 @@ use crate::config::{expand_path, normalize_model_name}; #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct Settings { - /// Auto-compact conversations when they get long + /// Auto-compact conversations when they approach the model limit. pub auto_compact: bool, /// Reduce status noise and collapse details more aggressively pub calm_mode: bool, @@ -50,7 +50,7 @@ pub struct Settings { impl Default for Settings { fn default() -> Self { Self { - auto_compact: true, + auto_compact: false, calm_mode: false, low_motion: false, fancy_animations: false, @@ -288,7 +288,10 @@ impl Settings { #[allow(dead_code)] pub fn available_settings() -> Vec<(&'static str, &'static str)> { vec![ - ("auto_compact", "Auto-compact conversations: on/off"), + ( + "auto_compact", + "Auto-compact near context limit: on/off (default off)", + ), ("calm_mode", "Calmer UI defaults: on/off"), ("low_motion", "Reduce animation and redraw churn: on/off"), ( @@ -377,3 +380,23 @@ fn normalize_sidebar_focus(value: &str) -> &str { _ => "auto", } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_settings_preserve_v4_prefix_cache_by_default() { + let settings = Settings::default(); + assert!(!settings.auto_compact); + } + + #[test] + fn auto_compact_remains_explicitly_configurable() { + let mut settings = Settings::default(); + settings.set("auto_compact", "on").expect("enable"); + assert!(settings.auto_compact); + settings.set("auto_compact", "off").expect("disable"); + assert!(!settings.auto_compact); + } +} diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index b9806700..5032f701 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -117,6 +117,8 @@ These override config values: - `DEEPSEEK_CAPACITY_PROFILE_WINDOW` - `DEEPSEEK_CAPACITY_PRIOR_CHAT` - `DEEPSEEK_CAPACITY_PRIOR_REASONER` +- `DEEPSEEK_CAPACITY_PRIOR_V4_PRO` +- `DEEPSEEK_CAPACITY_PRIOR_V4_FLASH` - `DEEPSEEK_CAPACITY_PRIOR_FALLBACK` ## Settings File (Persistent UI Preferences) @@ -125,14 +127,17 @@ DeepSeek TUI also stores user preferences in: - `~/.config/deepseek/settings.toml` -Notable settings include `auto_compact` (default `true`), which automatically summarizes -earlier turns once the conversation grows large. You can inspect or update these from the -TUI with `/settings` and `/config` (interactive editor). +Notable settings include `auto_compact` (default `false`), which opts into +replacement-style summarization only near the active model limit. The default +V4 path preserves the stable message prefix for cache reuse; use manual +`/compact` or enable `auto_compact` only when you explicitly want automatic +replacement compaction. You can inspect or update these from the TUI with +`/settings` and `/config` (interactive editor). Common settings keys: - `theme` (default, dark, light, whale) -- `auto_compact` (on/off) +- `auto_compact` (on/off, default off) - `show_thinking` (on/off) - `show_tool_details` (on/off) - `default_mode` (agent, plan, yolo; legacy `normal` is accepted and normalized to `agent`) @@ -188,25 +193,36 @@ If you are upgrading from older releases: - `[snapshots].enabled` (bool, default `true`) - `[snapshots].max_age_days` (int, default `7`) - snapshots live under `~/.deepseek/snapshots///.git` and never use the workspace's own `.git` directory +- `context.*` (optional): append-only Flash seam manager, currently opt-in: + - `[context].enabled` (bool, default `false`) + - `[context].verbatim_window_turns` (int, default `16`) + - `[context].l1_threshold` (int, default `192000`) + - `[context].l2_threshold` (int, default `384000`) + - `[context].l3_threshold` (int, default `576000`) + - `[context].cycle_threshold` (int, default `768000`) + - `[context].seam_model` (string, default `deepseek-v4-flash`) - `retry.*` (optional): retry/backoff settings for API requests: - `[retry].enabled` (bool, default `true`) - `[retry].max_retries` (int, default `3`) - `[retry].initial_delay` (float seconds, default `1.0`) - `[retry].max_delay` (float seconds, default `60.0`) - `[retry].exponential_base` (float, default `2.0`) -- `capacity.*` (optional): runtime context-capacity controller: - - `[capacity].enabled` (bool, default `true`) - - `[capacity].low_risk_max` (float, default `0.34`) +- `capacity.*` (optional): runtime context-capacity controller. This is opt-in + because its active interventions can rewrite the live transcript. + - `[capacity].enabled` (bool, default `false`) + - `[capacity].low_risk_max` (float, default `0.50`) - `[capacity].medium_risk_max` (float, default `0.62`) - `[capacity].severe_min_slack` (float, default `-0.25`) - `[capacity].severe_violation_ratio` (float, default `0.40`) - - `[capacity].refresh_cooldown_turns` (int, default `2`) + - `[capacity].refresh_cooldown_turns` (int, default `6`) - `[capacity].replan_cooldown_turns` (int, default `5`) - `[capacity].max_replay_per_turn` (int, default `1`) - - `[capacity].min_turns_before_guardrail` (int, default `2`) + - `[capacity].min_turns_before_guardrail` (int, default `4`) - `[capacity].profile_window` (int, default `8`) - `[capacity].deepseek_v3_2_chat_prior` (float, default `3.9`) - `[capacity].deepseek_v3_2_reasoner_prior` (float, default `4.1`) + - `[capacity].deepseek_v4_pro_prior` (float, default `3.5`) + - `[capacity].deepseek_v4_flash_prior` (float, default `4.2`) - `[capacity].fallback_default_prior` (float, default `3.8`) - `tui.alternate_screen` (string, optional): `auto`, `always`, or `never`. `auto` disables the alternate screen in Zellij; `--no-alt-screen` forces inline mode. Set `never` or run with `--no-alt-screen` when you want real terminal scrollback. - `tui.mouse_capture` (bool, optional, default `true` when the alternate screen is active): enable internal mouse scrolling/transcript selection. Set this to `false` or run with `--no-mouse-capture` for terminal-native drag selection and highlight-to-copy. diff --git a/docs/V0_7_5_IMPLEMENTATION_PLAN.md b/docs/V0_7_5_IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..775a9436 --- /dev/null +++ b/docs/V0_7_5_IMPLEMENTATION_PLAN.md @@ -0,0 +1,61 @@ +# v0.7.5 Implementation Plan + +Scope: background shell job UX, in-TUI MCP management/discovery, and V4 +context/cache policy. Do not include provider expansion or Whalescale +rename/migration work in this release lane. + +## Context/cache decision + +Default path: + +- Keep the transcript append-only and preserve the stable prefix for DeepSeek V4 cache reuse. +- Disable replacement-style `auto_compact` by default. +- Keep replacement compaction manual or late: if a user enables `auto_compact`, V4 compacts only near the 80% model-window guard (`800000` tokens for 1M-context models), not at reasoning-effort soft caps. +- Keep the Flash seam manager (`[context].enabled`) opt-in until issue #200 has repeatable cache-hit/miss evidence. +- Keep the capacity controller disabled by default. Treat it as telemetry or an experimental guardrail unless `capacity.enabled = true` is set. +- Use emergency overflow recovery only when the request would otherwise exceed the model input budget. + +Rationale: V4's 1M-token window and prefix-cache economics make early +replacement compaction suspect. The first shippable slice should prevent old +128K-era heuristics from rewriting context before there is evidence that the +rewrite is cheaper and more reliable than preserving a hot prefix. + +## Shippable slices + +### Slice 1: Context policy and docs + +- Change default `auto_compact` to off. +- Keep V4 replacement-compaction thresholds late and independent of reasoning effort. +- Make `[context].enabled` default to false. +- Make `docs/CONFIGURATION.md`, `docs/capacity_controller.md`, and `config.example.toml` match code defaults. +- Add focused tests for defaults and V4 threshold behavior. + +### Slice 2: Background shell job center (#195) + +- Add a job-center view fed by `ShellManager::list()`. +- Show command, cwd, linked task id when available, status, elapsed time, exit code, and latest output. +- Add controls to inspect full output, poll latest output, send stdin for PTY/stdin-capable jobs, kill a background job, and attach completed output as task evidence. +- Mark restart-stale jobs explicitly rather than presenting them as live. +- Add lifecycle tests for start, poll, cancel, complete, stale/restart, plus TUI snapshots for running and completed job details. + +### Slice 3: MCP manager (#196) + +- Add `/mcp` or a command-palette action that opens an MCP manager view. +- Show resolved config path, server enabled/disabled state, transport, command/url, timeout settings, startup errors, and discovered tool/resource/prompt counts. +- Wire `mcp_config_path` into the interactive config surface. +- Support init, add stdio server, add HTTP/SSE server, enable, disable, remove, validate, reconnect, and inspect tools/resources/prompts. +- Preserve both `servers` and `mcpServers` config shapes. + +### Slice 4: MCP discoverability (#197) + +- Add an MCP command-palette section backed by the same discovery state as the manager. +- Group tools/resources/prompts by server. +- Show disabled/failed servers without blocking palette rendering. +- Keep model-visible names consistent with `mcp__`. + +## Stop rules + +- Do not close #159 or #162 unless a verified PR actually resolves them. +- Do not add provider expansion. +- Do not rename or migrate anything to Whalescale. +- Do not broaden the TUI into a large redesign; each slice should remain independently testable and shippable. diff --git a/docs/capacity_controller.md b/docs/capacity_controller.md index ee47bd8b..3160adb7 100644 --- a/docs/capacity_controller.md +++ b/docs/capacity_controller.md @@ -1,6 +1,9 @@ # Capacity Controller -`deepseek-tui` includes a capacity-aware context controller that keeps active prompt context near coherent operating range while preserving full history on disk. +`deepseek-tui` includes an opt-in capacity-aware context controller. In the +default V4 path it is disabled, because its active interventions can rewrite +the live prompt and break prefix-cache affinity. Treat it as telemetry or an +experimental guardrail unless `capacity.enabled = true` is set explicitly. ## Policy Overview @@ -28,6 +31,8 @@ Per-model priors: - `deepseek_v3_2_chat = 3.9` - `deepseek_v3_2_reasoner = 4.1` +- `deepseek_v4_pro = 3.5` +- `deepseek_v4_flash = 4.2` - fallback `3.8` (used for other DeepSeek IDs, including future releases) ### Failure Probability @@ -52,7 +57,7 @@ Risk bands: - medium: `p_fail <= medium_risk_max` - high: otherwise -Action mapping: +Action mapping when the controller is explicitly enabled: - low -> `NoIntervention` - medium -> `TargetedContextRefresh` @@ -61,7 +66,7 @@ Action mapping: ## Checkpoints -The engine evaluates controller policy at: +When enabled, the engine evaluates controller policy at: 1. Pre-request checkpoint (before `MessageRequest` assembly). 2. Post-tool checkpoint (after tool result append). @@ -69,6 +74,11 @@ The engine evaluates controller policy at: ## Interventions +Interventions are not part of the default v0.7.5 V4 path. The default path is: +append messages, preserve prefix-cache reuse, suggest manual `/compact` near +real model pressure, and use overflow recovery only if the request would exceed +the model input budget. + ### `TargetedContextRefresh` - Runs compaction (`compact_messages_safe`) when possible. @@ -119,18 +129,20 @@ Loader utility supports fetching last `K` snapshots for rehydration. `[capacity]` keys: -- `enabled` -- `low_risk_max` -- `medium_risk_max` -- `severe_min_slack` -- `severe_violation_ratio` -- `refresh_cooldown_turns` -- `replan_cooldown_turns` -- `max_replay_per_turn` -- `min_turns_before_guardrail` -- `profile_window` -- `deepseek_v3_2_chat_prior` -- `deepseek_v3_2_reasoner_prior` -- `fallback_default_prior` +- `enabled` (default `false`) +- `low_risk_max` (default `0.50`) +- `medium_risk_max` (default `0.62`) +- `severe_min_slack` (default `-0.25`) +- `severe_violation_ratio` (default `0.40`) +- `refresh_cooldown_turns` (default `6`) +- `replan_cooldown_turns` (default `5`) +- `max_replay_per_turn` (default `1`) +- `min_turns_before_guardrail` (default `4`) +- `profile_window` (default `8`) +- `deepseek_v3_2_chat_prior` (default `3.9`) +- `deepseek_v3_2_reasoner_prior` (default `4.1`) +- `deepseek_v4_pro_prior` (default `3.5`) +- `deepseek_v4_flash_prior` (default `4.2`) +- `fallback_default_prior` (default `3.8`) Equivalent environment overrides are available with `DEEPSEEK_CAPACITY_*`.