From c2b2c284f62b406c32df23e3895c773e293618e0 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 29 Apr 2026 10:13:27 -0500 Subject: [PATCH] =?UTF-8?q?release:=20v0.7.5=20=E2=80=94=20token-basis=20f?= =?UTF-8?q?ixes,=20shell=20timeout=20recovery,=20context/cache=20policy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issues #202, #203, #204, #205: - Cycle/seam triggers use active request input size + response headroom reserve, not lifetime cumulative API usage. - V4 hard-cycle headroom calibrated around fixed TURN_MAX_OUTPUT_TOKENS plus CONTEXT_HEADROOM_TOKENS safety buffer. - /tokens, /cost, footer/header labels, and docs now separate active context, turn telemetry, cumulative usage, cache hit/miss, context percent, and cost. - Foreground exec_shell timeout output tells the model the process was killed and suggests task_shell_start or background exec_shell plus poll/wait. - Added regression tests for active-token basis, V4 headroom, seam trigger basis, footer label behavior, and shell timeout recovery metadata. - Preserved #200/#201 policy: V4 default is append-only, prefix-cache preserving; replacement compaction, Flash seams, and capacity intervention remain opt-in. --- Cargo.lock | 28 ++++----- Cargo.toml | 2 +- config.example.toml | 4 ++ crates/agent/Cargo.toml | 2 +- crates/app-server/Cargo.toml | 18 +++--- crates/cli/Cargo.toml | 14 ++--- crates/config/Cargo.toml | 2 +- crates/core/Cargo.toml | 16 ++--- crates/execpolicy/Cargo.toml | 2 +- crates/hooks/Cargo.toml | 2 +- crates/mcp/Cargo.toml | 2 +- crates/tools/Cargo.toml | 2 +- crates/tui/Cargo.toml | 4 +- crates/tui/src/commands/debug.rs | 80 +++++++++++++++++++---- crates/tui/src/config.rs | 2 +- crates/tui/src/core/engine.rs | 28 ++++----- crates/tui/src/cycle_manager.rs | 93 ++++++++++++++++++++++----- crates/tui/src/prompts/base.md | 3 +- crates/tui/src/seam_manager.rs | 87 +++++++++++++++---------- crates/tui/src/tools/shell.rs | 94 +++++++++++++++++----------- crates/tui/src/tools/shell/tests.rs | 41 ++++++++++++ crates/tui/src/tui/app.rs | 10 +-- crates/tui/src/tui/ui.rs | 4 +- crates/tui/src/tui/ui/tests.rs | 4 +- crates/tui/src/tui/widgets/header.rs | 7 ++- docs/CONFIGURATION.md | 24 ++++++- docs/TOOL_SURFACE.md | 14 +++-- npm/deepseek-tui/package.json | 4 +- 28 files changed, 412 insertions(+), 181 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57c9f0bc..fd631b83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1011,7 +1011,7 @@ dependencies = [ [[package]] name = "deepseek-agent" -version = "0.7.4" +version = "0.7.5" dependencies = [ "deepseek-config", "serde", @@ -1019,7 +1019,7 @@ dependencies = [ [[package]] name = "deepseek-app-server" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "axum", @@ -1042,7 +1042,7 @@ dependencies = [ [[package]] name = "deepseek-config" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "deepseek-secrets", @@ -1055,7 +1055,7 @@ dependencies = [ [[package]] name = "deepseek-core" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "chrono", @@ -1074,7 +1074,7 @@ dependencies = [ [[package]] name = "deepseek-execpolicy" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "deepseek-protocol", @@ -1083,7 +1083,7 @@ dependencies = [ [[package]] name = "deepseek-hooks" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "async-trait", @@ -1097,7 +1097,7 @@ dependencies = [ [[package]] name = "deepseek-mcp" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "deepseek-protocol", @@ -1107,7 +1107,7 @@ dependencies = [ [[package]] name = "deepseek-protocol" -version = "0.7.4" +version = "0.7.5" dependencies = [ "serde", "serde_json", @@ -1115,7 +1115,7 @@ dependencies = [ [[package]] name = "deepseek-secrets" -version = "0.7.4" +version = "0.7.5" dependencies = [ "dirs", "keyring", @@ -1128,7 +1128,7 @@ dependencies = [ [[package]] name = "deepseek-state" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "chrono", @@ -1140,7 +1140,7 @@ dependencies = [ [[package]] name = "deepseek-tools" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "async-trait", @@ -1153,7 +1153,7 @@ dependencies = [ [[package]] name = "deepseek-tui" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "arboard", @@ -1213,7 +1213,7 @@ dependencies = [ [[package]] name = "deepseek-tui-cli" -version = "0.7.4" +version = "0.7.5" dependencies = [ "anyhow", "chrono", @@ -1236,7 +1236,7 @@ dependencies = [ [[package]] name = "deepseek-tui-core" -version = "0.7.4" +version = "0.7.5" [[package]] name = "deranged" diff --git a/Cargo.toml b/Cargo.toml index 15cc9514..e8034176 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.7.4" +version = "0.7.5" edition = "2024" license = "MIT" repository = "https://github.com/Hmbown/DeepSeek-TUI" diff --git a/config.example.toml b/config.example.toml index 8c8d054b..f8c454b9 100644 --- a/config.example.toml +++ b/config.example.toml @@ -200,9 +200,13 @@ exponential_base = 2.0 [context] enabled = false verbatim_window_turns = 16 +# Thresholds are based on the active request input estimate, not lifetime +# summed API usage. l1_threshold = 192000 l2_threshold = 384000 l3_threshold = 576000 +# Hard cycle also reserves the normal 262144-token output budget plus 1024 +# safety tokens against the model window. cycle_threshold = 768000 seam_model = "deepseek-v4-flash" diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index f1dc03bb..69a5dc23 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture" [dependencies] -deepseek-config = { path = "../config", version = "0.7.4" } +deepseek-config = { path = "../config", version = "0.7.5" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index d7a3d194..08249844 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect anyhow.workspace = true axum.workspace = true clap.workspace = true -deepseek-agent = { path = "../agent", version = "0.7.4" } -deepseek-config = { path = "../config", version = "0.7.4" } -deepseek-core = { path = "../core", version = "0.7.4" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" } -deepseek-hooks = { path = "../hooks", version = "0.7.4" } -deepseek-mcp = { path = "../mcp", version = "0.7.4" } -deepseek-protocol = { path = "../protocol", version = "0.7.4" } -deepseek-state = { path = "../state", version = "0.7.4" } -deepseek-tools = { path = "../tools", version = "0.7.4" } +deepseek-agent = { path = "../agent", version = "0.7.5" } +deepseek-config = { path = "../config", version = "0.7.5" } +deepseek-core = { path = "../core", version = "0.7.5" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" } +deepseek-hooks = { path = "../hooks", version = "0.7.5" } +deepseek-mcp = { path = "../mcp", version = "0.7.5" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } +deepseek-state = { path = "../state", version = "0.7.5" } +deepseek-tools = { path = "../tools", version = "0.7.5" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 5b51a9e7..8fa84e03 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -14,13 +14,13 @@ path = "src/main.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -deepseek-agent = { path = "../agent", version = "0.7.4" } -deepseek-app-server = { path = "../app-server", version = "0.7.4" } -deepseek-config = { path = "../config", version = "0.7.4" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" } -deepseek-mcp = { path = "../mcp", version = "0.7.4" } -deepseek-secrets = { path = "../secrets", version = "0.7.4" } -deepseek-state = { path = "../state", version = "0.7.4" } +deepseek-agent = { path = "../agent", version = "0.7.5" } +deepseek-app-server = { path = "../app-server", version = "0.7.5" } +deepseek-config = { path = "../config", version = "0.7.5" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" } +deepseek-mcp = { path = "../mcp", version = "0.7.5" } +deepseek-secrets = { path = "../secrets", version = "0.7.5" } +deepseek-state = { path = "../state", version = "0.7.5" } chrono.workspace = true dirs.workspace = true serde.workspace = true diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 0550e584..0f7890cf 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite [dependencies] anyhow.workspace = true -deepseek-secrets = { path = "../secrets", version = "0.7.4" } +deepseek-secrets = { path = "../secrets", version = "0.7.5" } dirs.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index b784d896..d02b5ff1 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture" [dependencies] anyhow.workspace = true chrono.workspace = true -deepseek-agent = { path = "../agent", version = "0.7.4" } -deepseek-config = { path = "../config", version = "0.7.4" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" } -deepseek-hooks = { path = "../hooks", version = "0.7.4" } -deepseek-mcp = { path = "../mcp", version = "0.7.4" } -deepseek-protocol = { path = "../protocol", version = "0.7.4" } -deepseek-state = { path = "../state", version = "0.7.4" } -deepseek-tools = { path = "../tools", version = "0.7.4" } +deepseek-agent = { path = "../agent", version = "0.7.5" } +deepseek-config = { path = "../config", version = "0.7.5" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" } +deepseek-hooks = { path = "../hooks", version = "0.7.5" } +deepseek-mcp = { path = "../mcp", version = "0.7.5" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } +deepseek-state = { path = "../state", version = "0.7.5" } +deepseek-tools = { path = "../tools", version = "0.7.5" } serde_json.workspace = true tokio.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index 720efe8e..2dd80569 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.7.4" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index 4947c5e0..16aa8219 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.7.4" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml index 1c850689..d6dac672 100644 --- a/crates/mcp/Cargo.toml +++ b/crates/mcp/Cargo.toml @@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.7.4" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } serde.workspace = true serde_json.workspace = true diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index fc160980..28afd3a2 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.7.4" } +deepseek-protocol = { path = "../protocol", version = "0.7.5" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index a1485cfc..3d5f9bcf 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -13,8 +13,8 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.100" arboard = "3.4" -deepseek-secrets = { path = "../secrets", version = "0.7.4" } -deepseek-tools = { path = "../tools", version = "0.7.4" } +deepseek-secrets = { path = "../secrets", version = "0.7.5" } +deepseek-tools = { path = "../tools", version = "0.7.5" } async-stream = "0.3.6" async-trait = "0.1" bytes = "1.11.0" diff --git a/crates/tui/src/commands/debug.rs b/crates/tui/src/commands/debug.rs index fa6215ed..593958d9 100644 --- a/crates/tui/src/commands/debug.rs +++ b/crates/tui/src/commands/debug.rs @@ -3,10 +3,40 @@ //! Debug commands: tokens, cost, system, context, undo, retry use super::CommandResult; -use crate::models::SystemPrompt; +use crate::compaction::estimate_input_tokens_conservative; +use crate::models::{SystemPrompt, context_window_for_model}; use crate::tui::app::{App, AppAction}; use crate::tui::history::HistoryCell; +fn token_count(value: Option) -> String { + value.map_or_else(|| "not reported".to_string(), |tokens| tokens.to_string()) +} + +fn active_context_summary(app: &App) -> String { + let estimated = + estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref()); + match context_window_for_model(&app.model) { + Some(window) => { + let used = estimated.min(window as usize); + let percent = (used as f64 / f64::from(window) * 100.0).clamp(0.0, 100.0); + format!("~{used} / {window} ({percent:.1}%)") + } + None => format!("~{estimated} / unknown window"), + } +} + +fn cache_summary(app: &App) -> String { + match ( + app.last_prompt_cache_hit_tokens, + app.last_prompt_cache_miss_tokens, + ) { + (Some(hit), Some(miss)) => format!("{hit} hit / {miss} miss"), + (Some(hit), None) => format!("{hit} hit / miss not reported"), + (None, Some(miss)) => format!("hit not reported / {miss} miss"), + (None, None) => "not reported".to_string(), + } +} + /// Show token usage for session pub fn tokens(app: &mut App) -> CommandResult { let message_count = app.api_messages.len(); @@ -15,12 +45,24 @@ pub fn tokens(app: &mut App) -> CommandResult { CommandResult::message(format!( "Token Usage:\n\ ─────────────────────────────\n\ - Total tokens: {}\n\ - Session cost: ${:.4}\n\ - API messages: {}\n\ - Chat messages: {}\n\ - Model: {}", - app.total_tokens, app.session_cost, message_count, chat_count, app.model, + Active context: {}\n\ + Last API input: {} (turn telemetry; may count repeated prefix across tool rounds)\n\ + Last API output: {}\n\ + Cache hit/miss: {} (telemetry/cost only)\n\ + Cumulative tokens: {} (session usage telemetry)\n\ + Approx session cost: ${:.4}\n\ + API messages: {}\n\ + Chat messages: {}\n\ + Model: {}", + active_context_summary(app), + token_count(app.last_prompt_tokens), + token_count(app.last_completion_tokens), + cache_summary(app), + app.total_tokens, + app.session_cost, + message_count, + chat_count, + app.model, )) } @@ -29,7 +71,8 @@ pub fn cost(app: &mut App) -> CommandResult { CommandResult::message(format!( "Session Cost:\n\ ─────────────────────────────\n\ - Total spent: ${:.4}\n\n\ + Approx total spent: ${:.4}\n\n\ + Cost estimates are approximate and use provider usage telemetry when available.\n\n\ DeepSeek API Pricing:\n\ ─────────────────────────────\n\ Pricing details are not configured in this CLI.", @@ -113,9 +156,16 @@ mod tests { let mut app = create_test_app(); app.total_tokens = 1234; app.session_cost = 0.05; + app.last_prompt_tokens = Some(100); + app.last_completion_tokens = Some(25); + app.last_prompt_cache_hit_tokens = Some(70); + app.last_prompt_cache_miss_tokens = Some(30); app.api_messages.push(Message { role: "user".to_string(), - content: vec![], + content: vec![ContentBlock::Text { + text: "test".to_string(), + cache_control: None, + }], }); app.history.push(HistoryCell::User { content: "test".to_string(), @@ -125,8 +175,13 @@ mod tests { assert!(result.message.is_some()); let msg = result.message.unwrap(); assert!(msg.contains("Token Usage")); - assert!(msg.contains("Total tokens:")); - assert!(msg.contains("Session cost:")); + assert!(msg.contains("Active context:")); + assert!(msg.contains("Last API input:")); + assert!(msg.contains("Last API output:")); + assert!(msg.contains("Cache hit/miss:")); + assert!(msg.contains("70 hit / 30 miss")); + assert!(msg.contains("Cumulative tokens:")); + assert!(msg.contains("Approx session cost:")); assert!(msg.contains("API messages:")); assert!(msg.contains("Chat messages:")); assert!(msg.contains("Model:")); @@ -140,7 +195,8 @@ mod tests { assert!(result.message.is_some()); let msg = result.message.unwrap(); assert!(msg.contains("Session Cost")); - assert!(msg.contains("Total spent:")); + assert!(msg.contains("Approx total spent:")); + assert!(msg.contains("approximate")); assert!(msg.contains("$0.1234")); } diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 1af64f15..3c825ded 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -445,7 +445,7 @@ pub struct ContextConfig { /// Verbatim window: last N turns never summarized. Default: 16. #[serde(default)] pub verbatim_window_turns: Option, - /// Soft seam thresholds (cumulative input+output tokens). + /// Soft seam thresholds based on the active request input estimate. #[serde(default)] pub l1_threshold: Option, #[serde(default)] diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index bcb665db..e056877c 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -354,9 +354,9 @@ fn should_transparently_retry_stream( /// Max output tokens requested for normal agent turns. Generous on purpose: /// V4 thinking models can produce tens of thousands of reasoning tokens on /// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M -/// context window. 256K leaves the model effectively unconstrained on -/// output without us imposing artificial per-turn caps that surfaced as the -/// assistant "stopping mid-response" when reasoning consumed the budget. +/// context window. v0.7.5 keeps this cap fixed instead of silently lowering +/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget +/// plus safety headroom before sending the next request. const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144; /// Keep this many most recent messages when emergency trimming is required. const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4; @@ -1199,6 +1199,10 @@ fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option u64 { + u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64) +} + fn is_context_length_error_message(message: &str) -> bool { crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput } @@ -2440,7 +2444,7 @@ impl Engine { /// Handle a turn using the DeepSeek API. #[allow(clippy::too_many_lines)] /// Run the pre-request layered-context checkpoint (#159). Checks whether - /// cumulative tokens have crossed a soft-seam threshold and, if so, + /// the active input estimate has crossed a soft-seam threshold and, if so, /// produces an `` block via Flash and appends it as an /// assistant message. Called from `handle_deepseek_turn` before each API /// request so the model always has the latest navigation aids. @@ -2452,18 +2456,8 @@ impl Engine { return; } - // Cumulative tokens: session total (all turns so far) + current - // estimated input (the messages that will be sent next). - let cumulative_input = self - .session - .total_usage - .input_tokens - .saturating_add(self.session.total_usage.output_tokens); - let cumulative_estimate = - cumulative_input.saturating_add(self.estimated_input_tokens() as u64); - let highest = seam_mgr.highest_level().await; - let Some(level) = seam_mgr.seam_level_for(cumulative_estimate as usize, highest) else { + let Some(level) = seam_mgr.seam_level_for(self.estimated_input_tokens(), highest) else { return; }; @@ -2563,8 +2557,8 @@ impl Engine { /// they're still running. async fn maybe_advance_cycle(&mut self, mode: AppMode) { if !should_advance_cycle( - self.session.total_usage.input_tokens, - self.session.total_usage.output_tokens, + self.estimated_input_tokens() as u64, + turn_response_headroom_tokens(), &self.session.model, &self.config.cycle, false, diff --git a/crates/tui/src/cycle_manager.rs b/crates/tui/src/cycle_manager.rs index 293e1bc8..b7b7a19e 100644 --- a/crates/tui/src/cycle_manager.rs +++ b/crates/tui/src/cycle_manager.rs @@ -29,9 +29,12 @@ //! //! ## Trigger //! -//! - Token threshold: **768K** by default (~75% of the 1M window). This is a -//! rare overflow safety net. Optional soft seams at 192K/384K/576K are -//! controlled by the opt-in layered context manager (#159). +//! - Token threshold: **768K** active input by default (~75% of the 1M window). +//! This is a rare overflow safety net. The trigger is based on the next +//! request's live input estimate, not lifetime summed API usage, with +//! assistant-output and safety headroom considered against the model window. +//! Optional soft seams at 192K/384K/576K are controlled by the opt-in layered +//! context manager (#159). //! - Phase guard: callers only invoke `should_advance_cycle` at clean turn //! boundaries (no in-flight tool, no streaming, no approval modal). //! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators @@ -48,7 +51,9 @@ use serde::{Deserialize, Serialize}; use crate::client::DeepSeekClient; use crate::llm_client::LlmClient; -use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt}; +use crate::models::{ + ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt, context_window_for_model, +}; use crate::tools::plan::{PlanSnapshot, SharedPlanState}; use crate::tools::subagent::{SharedSubAgentManager, SubAgentResult, SubAgentStatus}; use crate::tools::todo::{SharedTodoList, TodoListSnapshot}; @@ -151,14 +156,20 @@ pub struct CycleBriefing { /// Decide whether a cycle boundary should fire. /// -/// `usage` is the *cumulative* session input+output tokens (both `u64` to -/// match `SessionUsage`). `in_flight` is true when a tool is mid-execution, -/// stream is open, or an approval modal is pending — in those cases the -/// caller must wait until the next clean boundary. +/// `active_input_tokens` is the estimated token count of the next request's +/// current input, including previous assistant/tool output that is now part of +/// the transcript. `reserved_response_headroom_tokens` is the max output budget +/// plus any provider safety headroom reserved for that next request. Lifetime +/// API usage is intentionally not used here because it repeatedly counts the +/// same stable prefix across requests. +/// +/// `in_flight` is true when a tool is mid-execution, stream is open, or an +/// approval modal is pending — in those cases the caller must wait until the +/// next clean boundary. #[must_use] pub fn should_advance_cycle( - cumulative_input_tokens: u64, - cumulative_output_tokens: u64, + active_input_tokens: u64, + reserved_response_headroom_tokens: u64, model: &str, cfg: &CycleConfig, in_flight: bool, @@ -166,12 +177,14 @@ pub fn should_advance_cycle( if !cfg.enabled || in_flight { return false; } - let total = cumulative_input_tokens.saturating_add(cumulative_output_tokens); let threshold = cfg.threshold_for(model) as u64; if threshold == 0 { return false; } - total >= threshold + let trigger_floor = context_window_for_model(model) + .map(|window| u64::from(window).saturating_sub(reserved_response_headroom_tokens)) + .map_or(threshold, |window_floor| threshold.min(window_floor)); + active_input_tokens >= trigger_floor } /// Roll-up of state that survives a cycle boundary deterministically. @@ -759,12 +772,60 @@ mod tests { } #[test] - fn should_advance_combines_input_and_output() { + fn should_advance_considers_output_plus_safety_headroom() { let cfg = CycleConfig::default(); - // 400K + 400K = 800K > 768K threshold + // Below the 768K active-input threshold, but too close to the 1M + // model window once the next assistant response and safety headroom are + // included. assert!(should_advance_cycle( - 400_000, - 400_000, + 737_000, + 263_168, + "deepseek-v4-pro", + &cfg, + false + )); + } + + #[test] + fn should_not_count_lifetime_api_usage_as_active_context() { + let cfg = CycleConfig::default(); + assert!(!should_advance_cycle( + 120_000, + 64_000, + "deepseek-v4-pro", + &cfg, + false + )); + } + + #[test] + fn should_advance_v4_calibrates_threshold_against_output_reserve() { + let cfg = CycleConfig::default(); + let reserve = 263_168; + assert!(!should_advance_cycle( + 700_000, + reserve, + "deepseek-v4-pro", + &cfg, + false + )); + assert!(should_advance_cycle( + 738_000, + reserve, + "deepseek-v4-pro", + &cfg, + false + )); + assert!(should_advance_cycle( + 768_000, + reserve, + "deepseek-v4-pro", + &cfg, + false + )); + assert!(should_advance_cycle( + 900_000, + reserve, "deepseek-v4-pro", &cfg, false diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index 6ce7fce2..2d4e6d97 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -75,7 +75,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise - **Planning / tracking**: `update_plan` (high-level strategy), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `checklist_write` (granular progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `todo_*` aliases (legacy compatibility), `note` (persistent memory). - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`. -- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. +- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait. - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools. - **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse). - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`. @@ -108,6 +108,7 @@ Don't reach for `exec_shell` when: - You just need to read or write a file — `read_file` / `write_file` are faster and show up in the tool log. - The command is a single `cat`, `ls`, or `echo` — use `read_file`, `list_dir`, or just state the result. - You're tempted to pipe `curl` for a web lookup — `web_search` or `fetch_url` give structured results. +- The command may run for minutes, start a server, run a full test suite, or perform a scientific/release computation — use `task_shell_start` or `exec_shell` with `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. ### `agent_spawn` Don't reach for `agent_spawn` when: diff --git a/crates/tui/src/seam_manager.rs b/crates/tui/src/seam_manager.rs index 1ecc0c51..e18e8207 100644 --- a/crates/tui/src/seam_manager.rs +++ b/crates/tui/src/seam_manager.rs @@ -17,7 +17,7 @@ //! //! ## Soft seam levels //! -//! | Level | Trigger (tokens) | Covers messages | Density | +//! | Level | Active input trigger | Covers messages | Density | //! |-------|------------------|--------------------|----------------| //! | L1 | 192K | 0–128K | ~2,500 tokens | //! | L2 | 384K | 0–320K | ~1,800 tokens | @@ -45,7 +45,7 @@ use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPr /// Default seam model — Flash is cheap and fast, ideal for summarization. pub const DEFAULT_SEAM_MODEL: &str = "deepseek-v4-flash"; -/// Default thresholds (cumulative input+output tokens). +/// Default thresholds based on the active request input estimate. pub const DEFAULT_L1_THRESHOLD: usize = 192_000; pub const DEFAULT_L2_THRESHOLD: usize = 384_000; pub const DEFAULT_L3_THRESHOLD: usize = 576_000; @@ -66,7 +66,7 @@ pub struct SeamConfig { pub enabled: bool, /// Verbatim window: last N turns never summarized. pub verbatim_window_turns: usize, - /// Soft seam thresholds. + /// Soft seam thresholds based on the active request input estimate. pub l1_threshold: usize, pub l2_threshold: usize, pub l3_threshold: usize, @@ -143,29 +143,14 @@ impl SeamManager { } /// Determine which seam level (if any) should fire for the given - /// cumulative token count. Returns `None` when no seam is due. + /// active request input estimate. Returns `None` when no seam is due. #[must_use] pub fn seam_level_for( &self, - cumulative_tokens: usize, + active_input_tokens: usize, highest_existing_level: Option, ) -> Option { - if !self.config.enabled { - return None; - } - let highest = highest_existing_level.unwrap_or(0); - - // Each level fires at most once, and only in order. - if highest < 1 && cumulative_tokens >= self.config.l1_threshold { - return Some(1); - } - if highest < 2 && cumulative_tokens >= self.config.l2_threshold { - return Some(2); - } - if highest < 3 && cumulative_tokens >= self.config.l3_threshold { - return Some(3); - } - None + seam_level_for_active_input(&self.config, active_input_tokens, highest_existing_level) } /// Check whether the hard cycle boundary is crossed. @@ -174,8 +159,8 @@ impl SeamManager { /// Kept as the canonical boundary definition for future wiring. #[must_use] #[allow(dead_code)] - pub fn should_cycle(&self, cumulative_tokens: usize) -> bool { - self.config.enabled && cumulative_tokens >= self.config.cycle_threshold + pub fn should_cycle(&self, active_input_tokens: usize) -> bool { + self.config.enabled && active_input_tokens >= self.config.cycle_threshold } /// Compute the verbatim window: the last N message indices that must @@ -577,6 +562,30 @@ impl SeamManager { } } +#[must_use] +pub fn seam_level_for_active_input( + config: &SeamConfig, + active_input_tokens: usize, + highest_existing_level: Option, +) -> Option { + if !config.enabled { + return None; + } + let highest = highest_existing_level.unwrap_or(0); + + // Each level fires at most once, and only in order. + if highest < 1 && active_input_tokens >= config.l1_threshold { + return Some(1); + } + if highest < 2 && active_input_tokens >= config.l2_threshold { + return Some(2); + } + if highest < 3 && active_input_tokens >= config.l3_threshold { + return Some(3); + } + None +} + /// Truncate a string to max_chars, respecting Unicode boundaries. fn truncate_chars(text: &str, max_chars: usize) -> String { if max_chars == 0 { @@ -598,15 +607,29 @@ mod tests { // Test the pure logic functions only. let config = SeamConfig::default(); - // Test seam_level_for logic manually. - // Below L1 - assert!(config.enabled && 100_000 < config.l1_threshold); - // At L1 - assert!(192_000 >= config.l1_threshold); - // At L2 - assert!(384_000 >= config.l2_threshold); - // At L3 - assert!(576_000 >= config.l3_threshold); + assert_eq!(seam_level_for_active_input(&config, 100_000, None), None); + assert_eq!(seam_level_for_active_input(&config, 192_000, None), Some(1)); + assert_eq!( + seam_level_for_active_input(&config, 384_000, Some(1)), + Some(2) + ); + assert_eq!( + seam_level_for_active_input(&config, 576_000, Some(2)), + Some(3) + ); + } + + #[test] + fn seam_trigger_uses_active_request_size_not_lifetime_usage() { + let config = SeamConfig::default(); + let lifetime_prompt_usage = 900_000usize; + let active_request_input = 120_000usize; + + assert!(lifetime_prompt_usage >= config.l3_threshold); + assert_eq!( + seam_level_for_active_input(&config, active_request_input, None), + None + ); } #[test] diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs index c892b389..740f3c36 100644 --- a/crates/tui/src/tools/shell.rs +++ b/crates/tui/src/tools/shell.rs @@ -1287,6 +1287,10 @@ use crate::tools::spec::{ use async_trait::async_trait; use serde_json::json; +const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bounded commands. \ +The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \ +background: true, then poll with task_shell_wait or exec_shell_wait."; + async fn execute_foreground_via_background( context: &ToolContext, command: &str, @@ -1372,7 +1376,7 @@ impl ToolSpec for ExecShellTool { } fn description(&self) -> &'static str { - "Execute a shell command in the workspace directory. Returns stdout, stderr, and exit code." + "Execute a shell command in the workspace directory. Foreground mode is for bounded commands; use background=true or task_shell_start for long-running work, then poll/wait." } fn input_schema(&self) -> serde_json::Value { @@ -1389,7 +1393,7 @@ impl ToolSpec for ExecShellTool { }, "background": { "type": "boolean", - "description": "Run in background and return task_id (default: false)" + "description": "Run in background and return task_id (default: false). Prefer true for commands that may run for minutes; poll with exec_shell_wait or task_shell_wait." }, "interactive": { "type": "boolean", @@ -1599,7 +1603,7 @@ impl ToolSpec for ExecShellTool { ) } else if result.status == ShellStatus::TimedOut { format!( - "Command timed out after {timeout_ms}ms; process killed.\n\nSTDOUT:\n{}\n\nSTDERR:\n{}", + "Command timed out after {timeout_ms}ms; process killed.\n\n{FOREGROUND_TIMEOUT_RECOVERY_HINT}\n\nSTDOUT:\n{}\n\nSTDERR:\n{}", result.stdout, result.stderr ) } else { @@ -1609,44 +1613,60 @@ impl ToolSpec for ExecShellTool { ) }; + let mut metadata = json!({ + "exit_code": result.exit_code, + "status": format!("{:?}", result.status), + "duration_ms": result.duration_ms, + "sandboxed": result.sandboxed, + "sandbox_type": result.sandbox_type, + "sandbox_denied": result.sandbox_denied, + "task_id": result.task_id, + "stdout_len": result.stdout_len, + "stderr_len": result.stderr_len, + "stdout_truncated": result.stdout_truncated, + "stderr_truncated": result.stderr_truncated, + "stdout_omitted": result.stdout_omitted, + "stderr_omitted": result.stderr_omitted, + "summary": summary, + "stdout_summary": stdout_summary, + "stderr_summary": stderr_summary, + "safety_level": format!("{:?}", safety.level), + "interactive": interactive, + "canceled": was_cancelled, + "execpolicy": execpolicy_decision.as_ref().map(|decision| match decision { + ExecPolicyDecision::Allow => json!({ + "decision": "allow", + }), + ExecPolicyDecision::Deny(reason) => json!({ + "decision": "deny", + "reason": reason, + }), + ExecPolicyDecision::AskUser(reason) => json!({ + "decision": "ask_user", + "reason": reason, + }), + }), + }); + if result.status == ShellStatus::TimedOut && !background && !interactive { + metadata["foreground_timeout_recovery"] = json!({ + "process_killed": true, + "hint": FOREGROUND_TIMEOUT_RECOVERY_HINT, + "recommended_tools": [ + "task_shell_start", + "task_shell_wait", + "exec_shell", + "exec_shell_wait" + ], + "exec_shell_background": true, + "poll_with": ["task_shell_wait", "exec_shell_wait"] + }); + } + Ok(ToolResult { content: output, success: result.status == ShellStatus::Completed || result.status == ShellStatus::Running, - metadata: Some(json!({ - "exit_code": result.exit_code, - "status": format!("{:?}", result.status), - "duration_ms": result.duration_ms, - "sandboxed": result.sandboxed, - "sandbox_type": result.sandbox_type, - "sandbox_denied": result.sandbox_denied, - "task_id": result.task_id, - "stdout_len": result.stdout_len, - "stderr_len": result.stderr_len, - "stdout_truncated": result.stdout_truncated, - "stderr_truncated": result.stderr_truncated, - "stdout_omitted": result.stdout_omitted, - "stderr_omitted": result.stderr_omitted, - "summary": summary, - "stdout_summary": stdout_summary, - "stderr_summary": stderr_summary, - "safety_level": format!("{:?}", safety.level), - "interactive": interactive, - "canceled": was_cancelled, - "execpolicy": execpolicy_decision.as_ref().map(|decision| match decision { - ExecPolicyDecision::Allow => json!({ - "decision": "allow", - }), - ExecPolicyDecision::Deny(reason) => json!({ - "decision": "deny", - "reason": reason, - }), - ExecPolicyDecision::AskUser(reason) => json!({ - "decision": "ask_user", - "reason": reason, - }), - }), - })), + metadata: Some(metadata), }) } Err(e) => Ok(ToolResult::error(format!("Shell execution failed: {e}"))), diff --git a/crates/tui/src/tools/shell/tests.rs b/crates/tui/src/tools/shell/tests.rs index d6f30d1c..7fdb8bce 100644 --- a/crates/tui/src/tools/shell/tests.rs +++ b/crates/tui/src/tools/shell/tests.rs @@ -263,6 +263,47 @@ async fn test_exec_shell_metadata_includes_summaries() { assert!(meta.get("stdout_truncated").is_some()); } +#[tokio::test] +async fn test_exec_shell_foreground_timeout_guides_background_rerun() { + let tmp = tempdir().expect("tempdir"); + let ctx = ToolContext::new(tmp.path()); + let tool = ExecShellTool; + + let result = tool + .execute( + json!({ + "command": sleep_command(10), + "timeout_ms": 1000 + }), + &ctx, + ) + .await + .expect("execute"); + + assert!(!result.success); + assert!(result.content.contains("task_shell_start")); + assert!(result.content.contains("background: true")); + assert!(result.content.contains("process killed")); + let meta = result.metadata.expect("metadata"); + assert_eq!(meta.get("status").and_then(Value::as_str), Some("TimedOut")); + let recovery = meta + .get("foreground_timeout_recovery") + .expect("timeout recovery metadata"); + assert_eq!( + recovery + .get("exec_shell_background") + .and_then(Value::as_bool), + Some(true) + ); + assert!( + recovery + .get("hint") + .and_then(Value::as_str) + .unwrap_or_default() + .contains("exec_shell_wait") + ); +} + #[tokio::test] async fn test_exec_shell_foreground_cancel_kills_process() { let tmp = tempdir().expect("tempdir"); diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 93be0410..784cabf0 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -612,13 +612,15 @@ pub struct App { pub runtime_turn_id: Option, /// Current runtime turn status (if known). pub runtime_turn_status: Option, - /// Last prompt token usage + /// Provider-reported input tokens from the last completed turn. This is + /// telemetry/cost data and may sum repeated stable prefixes across tool + /// rounds; active context pressure is estimated from `api_messages`. pub last_prompt_tokens: Option, - /// Last completion token usage + /// Provider-reported output tokens from the last completed turn. pub last_completion_tokens: Option, - /// DeepSeek context-cache hit tokens from the last API call. + /// DeepSeek context-cache hit tokens from the last API call. Telemetry only. pub last_prompt_cache_hit_tokens: Option, - /// DeepSeek context-cache miss tokens from the last API call. + /// DeepSeek context-cache miss tokens from the last API call. Telemetry only. pub last_prompt_cache_miss_tokens: Option, /// Approximate input tokens spent re-sending prior `reasoning_content` on /// the last thinking-mode tool-calling turn (V4 §5.1.1 "Interleaved diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index b837e743..9f462bc2 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -4714,7 +4714,7 @@ fn footer_context_percent_spans(app: &App) -> Vec> { palette::TEXT_MUTED }; vec![Span::styled( - format!("ctx {percent:.0}%"), + format!("active ctx {percent:.0}%"), Style::default().fg(color), )] } @@ -4802,7 +4802,7 @@ fn footer_cache_spans(app: &App) -> Vec> { let percent = (f64::from(hit_tokens) / f64::from(total) * 100.0).clamp(0.0, 100.0); vec![Span::styled( - format!("cache {:.0}%", percent), + format!("cache hit {:.0}%", percent), Style::default().fg(palette::TEXT_MUTED), )] } diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 12726e09..27d2ed35 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -575,7 +575,7 @@ fn footer_auxiliary_spans_show_cache_when_compact() { app.last_prompt_cache_miss_tokens = Some(12_000); app.session_cost = 12.34; - let compact = spans_text(&footer_auxiliary_spans(&app, 12)); + let compact = spans_text(&footer_auxiliary_spans(&app, 14)); assert!(compact.contains("cache")); assert!(!compact.contains('$')); } @@ -589,7 +589,7 @@ fn footer_auxiliary_spans_show_cache_and_cost_when_roomy() { app.session_cost = 12.34; let roomy = spans_text(&footer_auxiliary_spans(&app, 32)); - assert!(roomy.contains("cache 75%")); + assert!(roomy.contains("cache hit 75%")); assert!(roomy.contains("$12.34")); assert!( !roomy.contains("ctx"), diff --git a/crates/tui/src/tui/widgets/header.rs b/crates/tui/src/tui/widgets/header.rs index 28b9b74e..76d0d526 100644 --- a/crates/tui/src/tui/widgets/header.rs +++ b/crates/tui/src/tui/widgets/header.rs @@ -31,7 +31,8 @@ pub struct HeaderData<'a> { pub context_window: Option, /// Accumulated session cost in USD. pub session_cost: f64, - /// Input tokens from the most recent API call (current context utilization). + /// Active context input tokens used for context utilization. Callers should + /// pass a sanitized live-context estimate, not cumulative API usage. pub last_prompt_tokens: Option, /// Short label for the current reasoning-effort tier (e.g. "max", "high", /// "off"). Rendered as a chip when space allows. @@ -90,12 +91,12 @@ impl<'a> HeaderData<'a> { total_tokens: u32, context_window: Option, session_cost: f64, - last_prompt_tokens: Option, + active_context_input_tokens: Option, ) -> Self { self.total_tokens = total_tokens; self.context_window = context_window; self.session_cost = session_cost; - self.last_prompt_tokens = last_prompt_tokens; + self.last_prompt_tokens = active_context_input_tokens; self } } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index fe31707e..f7408c84 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -157,6 +157,26 @@ Readability semantics: `crowded`, `refreshing`, `verifying`, and `resetting`; these are derived from capacity and compaction events without exposing internal formulas in normal UI. +### Token Quantities and Drivers + +DeepSeek V4 prefix caching makes token labels matter. These quantities are kept +separate: + +| Quantity | Meaning | Allowed to drive | +|---|---|---| +| Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. | +| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. | +| Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. | +| Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. | +| Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. | +| Cost estimate | Approximate spend from provider usage and configured DeepSeek rates. | Display only. | + +For the default V4 path, hard cycles fire when active input reaches the smaller +of the configured cycle threshold (`768000`) and the model window minus reserved +response headroom. Replacement compaction remains opt-in (`auto_compact = false` +by default), the Flash seam manager remains opt-in (`[context].enabled = false`), +and the capacity controller remains disabled unless configured. + ### Command Migration Notes If you are upgrading from older releases: @@ -196,7 +216,9 @@ If you are upgrading from older releases: - `[snapshots].enabled` (bool, default `true`) - `[snapshots].max_age_days` (int, default `7`) - snapshots live under `~/.deepseek/snapshots///.git` and never use the workspace's own `.git` directory -- `context.*` (optional): append-only Flash seam manager, currently opt-in: +- `context.*` (optional): append-only Flash seam manager, currently opt-in. + Thresholds use the active request input estimate, not lifetime summed API + usage: - `[context].enabled` (bool, default `false`) - `[context].verbatim_window_turns` (int, default `16`) - `[context].l1_threshold` (int, default `192000`) diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 9ba12f4a..5bcbeaf5 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -15,7 +15,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts for the same backing operation are a model trap — the LLM will alternate between them and the cache hit rate suffers. -## Final surface (v0.7.4) +## Current surface (v0.7.5) ### File operations @@ -40,19 +40,25 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts | Tool | Niche | |---|---| -| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands. | +| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands; timeout kills the process and returns a background-rerun hint. | | `exec_shell_wait` | Poll a background task for incremental output. | | `exec_shell_interact` | Send stdin to a running background task and read incremental output. | | `task_shell_start` | Start a long-running command in the background and return immediately. Preferred over foreground shell for diagnostics, tests, searches, and servers that may run for minutes. | | `task_shell_wait` | Poll a background command. If `gate` is supplied after completion, record structured gate evidence on the active durable task. | +When a foreground shell command times out, the process is not continued +silently. The tool result tells the model to rerun long work with +`task_shell_start` or `exec_shell` with `background = true`, then poll with +`task_shell_wait` or `exec_shell_wait`. + Interactive shell jobs are also visible through `/jobs`. The TUI job center is fed by the same shell manager as `exec_shell`/`task_shell_start`, and shows the command, cwd, elapsed time, status, output tail, process-local shell id, and linked durable task id when available. `/jobs show`, `/jobs poll`, `/jobs wait`, `/jobs stdin`, and `/jobs cancel` provide inspect, polling, stdin, and cancel -controls for live jobs. Jobs are process-local; after restart, detached entries -are marked stale rather than presented as live processes. +controls for live jobs. Jobs are process-local; after restart, live process +state is not reattached, and any remembered detached entries must be marked +stale rather than presented as live processes. ### MCP manager and palette discovery diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json index 3a4cc24f..f2750dc0 100644 --- a/npm/deepseek-tui/package.json +++ b/npm/deepseek-tui/package.json @@ -1,7 +1,7 @@ { "name": "deepseek-tui", - "version": "0.7.4", - "deepseekBinaryVersion": "0.7.4", + "version": "0.7.5", + "deepseekBinaryVersion": "0.7.5", "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.", "author": "Hmbown", "license": "MIT",