release: v0.7.5 — token-basis fixes, shell timeout recovery, context/cache policy

Issues #202, #203, #204, #205:

- Cycle/seam triggers use active request input size + response
  headroom reserve, not lifetime cumulative API usage.
- V4 hard-cycle headroom calibrated around fixed TURN_MAX_OUTPUT_TOKENS
  plus CONTEXT_HEADROOM_TOKENS safety buffer.
- /tokens, /cost, footer/header labels, and docs now separate
  active context, turn telemetry, cumulative usage, cache hit/miss,
  context percent, and cost.
- Foreground exec_shell timeout output tells the model the process
  was killed and suggests task_shell_start or background exec_shell
  plus poll/wait.
- Added regression tests for active-token basis, V4 headroom,
  seam trigger basis, footer label behavior, and shell timeout
  recovery metadata.
- Preserved #200/#201 policy: V4 default is append-only,
  prefix-cache preserving; replacement compaction, Flash seams,
  and capacity intervention remain opt-in.
This commit is contained in:
Hunter Bown
2026-04-29 10:13:27 -05:00
parent 0578eb701e
commit c2b2c284f6
28 changed files with 412 additions and 181 deletions
Generated
+14 -14
View File
@@ -1011,7 +1011,7 @@ dependencies = [
[[package]]
name = "deepseek-agent"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"deepseek-config",
"serde",
@@ -1019,7 +1019,7 @@ dependencies = [
[[package]]
name = "deepseek-app-server"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"axum",
@@ -1042,7 +1042,7 @@ dependencies = [
[[package]]
name = "deepseek-config"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"deepseek-secrets",
@@ -1055,7 +1055,7 @@ dependencies = [
[[package]]
name = "deepseek-core"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"chrono",
@@ -1074,7 +1074,7 @@ dependencies = [
[[package]]
name = "deepseek-execpolicy"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1083,7 +1083,7 @@ dependencies = [
[[package]]
name = "deepseek-hooks"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"async-trait",
@@ -1097,7 +1097,7 @@ dependencies = [
[[package]]
name = "deepseek-mcp"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1107,7 +1107,7 @@ dependencies = [
[[package]]
name = "deepseek-protocol"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"serde",
"serde_json",
@@ -1115,7 +1115,7 @@ dependencies = [
[[package]]
name = "deepseek-secrets"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"dirs",
"keyring",
@@ -1128,7 +1128,7 @@ dependencies = [
[[package]]
name = "deepseek-state"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"chrono",
@@ -1140,7 +1140,7 @@ dependencies = [
[[package]]
name = "deepseek-tools"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"async-trait",
@@ -1153,7 +1153,7 @@ dependencies = [
[[package]]
name = "deepseek-tui"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"arboard",
@@ -1213,7 +1213,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-cli"
version = "0.7.4"
version = "0.7.5"
dependencies = [
"anyhow",
"chrono",
@@ -1236,7 +1236,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-core"
version = "0.7.4"
version = "0.7.5"
[[package]]
name = "deranged"
+1 -1
View File
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
resolver = "2"
[workspace.package]
version = "0.7.4"
version = "0.7.5"
edition = "2024"
license = "MIT"
repository = "https://github.com/Hmbown/DeepSeek-TUI"
+4
View File
@@ -200,9 +200,13 @@ exponential_base = 2.0
[context]
enabled = false
verbatim_window_turns = 16
# Thresholds are based on the active request input estimate, not lifetime
# summed API usage.
l1_threshold = 192000
l2_threshold = 384000
l3_threshold = 576000
# Hard cycle also reserves the normal 262144-token output budget plus 1024
# safety tokens against the model window.
cycle_threshold = 768000
seam_model = "deepseek-v4-flash"
+1 -1
View File
@@ -7,5 +7,5 @@ repository.workspace = true
description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
[dependencies]
deepseek-config = { path = "../config", version = "0.7.4" }
deepseek-config = { path = "../config", version = "0.7.5" }
serde.workspace = true
+9 -9
View File
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
anyhow.workspace = true
axum.workspace = true
clap.workspace = true
deepseek-agent = { path = "../agent", version = "0.7.4" }
deepseek-config = { path = "../config", version = "0.7.4" }
deepseek-core = { path = "../core", version = "0.7.4" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
deepseek-hooks = { path = "../hooks", version = "0.7.4" }
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-state = { path = "../state", version = "0.7.4" }
deepseek-tools = { path = "../tools", version = "0.7.4" }
deepseek-agent = { path = "../agent", version = "0.7.5" }
deepseek-config = { path = "../config", version = "0.7.5" }
deepseek-core = { path = "../core", version = "0.7.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
deepseek-hooks = { path = "../hooks", version = "0.7.5" }
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
deepseek-state = { path = "../state", version = "0.7.5" }
deepseek-tools = { path = "../tools", version = "0.7.5" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+7 -7
View File
@@ -14,13 +14,13 @@ path = "src/main.rs"
anyhow.workspace = true
clap.workspace = true
clap_complete.workspace = true
deepseek-agent = { path = "../agent", version = "0.7.4" }
deepseek-app-server = { path = "../app-server", version = "0.7.4" }
deepseek-config = { path = "../config", version = "0.7.4" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
deepseek-state = { path = "../state", version = "0.7.4" }
deepseek-agent = { path = "../agent", version = "0.7.5" }
deepseek-app-server = { path = "../app-server", version = "0.7.5" }
deepseek-config = { path = "../config", version = "0.7.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
deepseek-state = { path = "../state", version = "0.7.5" }
chrono.workspace = true
dirs.workspace = true
serde.workspace = true
+1 -1
View File
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
[dependencies]
anyhow.workspace = true
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
dirs.workspace = true
serde.workspace = true
serde_json.workspace = true
+8 -8
View File
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
deepseek-agent = { path = "../agent", version = "0.7.4" }
deepseek-config = { path = "../config", version = "0.7.4" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
deepseek-hooks = { path = "../hooks", version = "0.7.4" }
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-state = { path = "../state", version = "0.7.4" }
deepseek-tools = { path = "../tools", version = "0.7.4" }
deepseek-agent = { path = "../agent", version = "0.7.5" }
deepseek-config = { path = "../config", version = "0.7.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
deepseek-hooks = { path = "../hooks", version = "0.7.5" }
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
deepseek-state = { path = "../state", version = "0.7.5" }
deepseek-tools = { path = "../tools", version = "0.7.5" }
serde_json.workspace = true
tokio.workspace = true
uuid.workspace = true
+1 -1
View File
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
[dependencies]
anyhow.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
serde.workspace = true
+1 -1
View File
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
anyhow.workspace = true
async-trait.workspace = true
chrono.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
+1 -1
View File
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo
[dependencies]
anyhow.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
serde.workspace = true
serde_json.workspace = true
+1 -1
View File
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+2 -2
View File
@@ -13,8 +13,8 @@ path = "src/main.rs"
[dependencies]
anyhow = "1.0.100"
arboard = "3.4"
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
deepseek-tools = { path = "../tools", version = "0.7.4" }
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
deepseek-tools = { path = "../tools", version = "0.7.5" }
async-stream = "0.3.6"
async-trait = "0.1"
bytes = "1.11.0"
+68 -12
View File
@@ -3,10 +3,40 @@
//! Debug commands: tokens, cost, system, context, undo, retry
use super::CommandResult;
use crate::models::SystemPrompt;
use crate::compaction::estimate_input_tokens_conservative;
use crate::models::{SystemPrompt, context_window_for_model};
use crate::tui::app::{App, AppAction};
use crate::tui::history::HistoryCell;
fn token_count(value: Option<u32>) -> String {
value.map_or_else(|| "not reported".to_string(), |tokens| tokens.to_string())
}
fn active_context_summary(app: &App) -> String {
let estimated =
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
match context_window_for_model(&app.model) {
Some(window) => {
let used = estimated.min(window as usize);
let percent = (used as f64 / f64::from(window) * 100.0).clamp(0.0, 100.0);
format!("~{used} / {window} ({percent:.1}%)")
}
None => format!("~{estimated} / unknown window"),
}
}
fn cache_summary(app: &App) -> String {
match (
app.last_prompt_cache_hit_tokens,
app.last_prompt_cache_miss_tokens,
) {
(Some(hit), Some(miss)) => format!("{hit} hit / {miss} miss"),
(Some(hit), None) => format!("{hit} hit / miss not reported"),
(None, Some(miss)) => format!("hit not reported / {miss} miss"),
(None, None) => "not reported".to_string(),
}
}
/// Show token usage for session
pub fn tokens(app: &mut App) -> CommandResult {
let message_count = app.api_messages.len();
@@ -15,12 +45,24 @@ pub fn tokens(app: &mut App) -> CommandResult {
CommandResult::message(format!(
"Token Usage:\n\
─────────────────────────────\n\
Total tokens: {}\n\
Session cost: ${:.4}\n\
API messages: {}\n\
Chat messages: {}\n\
Model: {}",
app.total_tokens, app.session_cost, message_count, chat_count, app.model,
Active context: {}\n\
Last API input: {} (turn telemetry; may count repeated prefix across tool rounds)\n\
Last API output: {}\n\
Cache hit/miss: {} (telemetry/cost only)\n\
Cumulative tokens: {} (session usage telemetry)\n\
Approx session cost: ${:.4}\n\
API messages: {}\n\
Chat messages: {}\n\
Model: {}",
active_context_summary(app),
token_count(app.last_prompt_tokens),
token_count(app.last_completion_tokens),
cache_summary(app),
app.total_tokens,
app.session_cost,
message_count,
chat_count,
app.model,
))
}
@@ -29,7 +71,8 @@ pub fn cost(app: &mut App) -> CommandResult {
CommandResult::message(format!(
"Session Cost:\n\
─────────────────────────────\n\
Total spent: ${:.4}\n\n\
Approx total spent: ${:.4}\n\n\
Cost estimates are approximate and use provider usage telemetry when available.\n\n\
DeepSeek API Pricing:\n\
─────────────────────────────\n\
Pricing details are not configured in this CLI.",
@@ -113,9 +156,16 @@ mod tests {
let mut app = create_test_app();
app.total_tokens = 1234;
app.session_cost = 0.05;
app.last_prompt_tokens = Some(100);
app.last_completion_tokens = Some(25);
app.last_prompt_cache_hit_tokens = Some(70);
app.last_prompt_cache_miss_tokens = Some(30);
app.api_messages.push(Message {
role: "user".to_string(),
content: vec![],
content: vec![ContentBlock::Text {
text: "test".to_string(),
cache_control: None,
}],
});
app.history.push(HistoryCell::User {
content: "test".to_string(),
@@ -125,8 +175,13 @@ mod tests {
assert!(result.message.is_some());
let msg = result.message.unwrap();
assert!(msg.contains("Token Usage"));
assert!(msg.contains("Total tokens:"));
assert!(msg.contains("Session cost:"));
assert!(msg.contains("Active context:"));
assert!(msg.contains("Last API input:"));
assert!(msg.contains("Last API output:"));
assert!(msg.contains("Cache hit/miss:"));
assert!(msg.contains("70 hit / 30 miss"));
assert!(msg.contains("Cumulative tokens:"));
assert!(msg.contains("Approx session cost:"));
assert!(msg.contains("API messages:"));
assert!(msg.contains("Chat messages:"));
assert!(msg.contains("Model:"));
@@ -140,7 +195,8 @@ mod tests {
assert!(result.message.is_some());
let msg = result.message.unwrap();
assert!(msg.contains("Session Cost"));
assert!(msg.contains("Total spent:"));
assert!(msg.contains("Approx total spent:"));
assert!(msg.contains("approximate"));
assert!(msg.contains("$0.1234"));
}
+1 -1
View File
@@ -445,7 +445,7 @@ pub struct ContextConfig {
/// Verbatim window: last N turns never summarized. Default: 16.
#[serde(default)]
pub verbatim_window_turns: Option<usize>,
/// Soft seam thresholds (cumulative input+output tokens).
/// Soft seam thresholds based on the active request input estimate.
#[serde(default)]
pub l1_threshold: Option<usize>,
#[serde(default)]
+11 -17
View File
@@ -354,9 +354,9 @@ fn should_transparently_retry_stream(
/// Max output tokens requested for normal agent turns. Generous on purpose:
/// V4 thinking models can produce tens of thousands of reasoning tokens on
/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
/// context window. 256K leaves the model effectively unconstrained on
/// output without us imposing artificial per-turn caps that surfaced as the
/// assistant "stopping mid-response" when reasoning consumed the budget.
/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
/// plus safety headroom before sending the next request.
const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
/// Keep this many most recent messages when emergency trimming is required.
const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
@@ -1199,6 +1199,10 @@ fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usi
.and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
}
fn turn_response_headroom_tokens() -> u64 {
u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
}
fn is_context_length_error_message(message: &str) -> bool {
crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
}
@@ -2440,7 +2444,7 @@ impl Engine {
/// Handle a turn using the DeepSeek API.
#[allow(clippy::too_many_lines)]
/// Run the pre-request layered-context checkpoint (#159). Checks whether
/// cumulative tokens have crossed a soft-seam threshold and, if so,
/// the active input estimate has crossed a soft-seam threshold and, if so,
/// produces an `<archived_context>` block via Flash and appends it as an
/// assistant message. Called from `handle_deepseek_turn` before each API
/// request so the model always has the latest navigation aids.
@@ -2452,18 +2456,8 @@ impl Engine {
return;
}
// Cumulative tokens: session total (all turns so far) + current
// estimated input (the messages that will be sent next).
let cumulative_input = self
.session
.total_usage
.input_tokens
.saturating_add(self.session.total_usage.output_tokens);
let cumulative_estimate =
cumulative_input.saturating_add(self.estimated_input_tokens() as u64);
let highest = seam_mgr.highest_level().await;
let Some(level) = seam_mgr.seam_level_for(cumulative_estimate as usize, highest) else {
let Some(level) = seam_mgr.seam_level_for(self.estimated_input_tokens(), highest) else {
return;
};
@@ -2563,8 +2557,8 @@ impl Engine {
/// they're still running.
async fn maybe_advance_cycle(&mut self, mode: AppMode) {
if !should_advance_cycle(
self.session.total_usage.input_tokens,
self.session.total_usage.output_tokens,
self.estimated_input_tokens() as u64,
turn_response_headroom_tokens(),
&self.session.model,
&self.config.cycle,
false,
+77 -16
View File
@@ -29,9 +29,12 @@
//!
//! ## Trigger
//!
//! - Token threshold: **768K** by default (~75% of the 1M window). This is a
//! rare overflow safety net. Optional soft seams at 192K/384K/576K are
//! controlled by the opt-in layered context manager (#159).
//! - Token threshold: **768K** active input by default (~75% of the 1M window).
//! This is a rare overflow safety net. The trigger is based on the next
//! request's live input estimate, not lifetime summed API usage, with
//! assistant-output and safety headroom considered against the model window.
//! Optional soft seams at 192K/384K/576K are controlled by the opt-in layered
//! context manager (#159).
//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
//! boundaries (no in-flight tool, no streaming, no approval modal).
//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
@@ -48,7 +51,9 @@ use serde::{Deserialize, Serialize};
use crate::client::DeepSeekClient;
use crate::llm_client::LlmClient;
use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt};
use crate::models::{
ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt, context_window_for_model,
};
use crate::tools::plan::{PlanSnapshot, SharedPlanState};
use crate::tools::subagent::{SharedSubAgentManager, SubAgentResult, SubAgentStatus};
use crate::tools::todo::{SharedTodoList, TodoListSnapshot};
@@ -151,14 +156,20 @@ pub struct CycleBriefing {
/// Decide whether a cycle boundary should fire.
///
/// `usage` is the *cumulative* session input+output tokens (both `u64` to
/// match `SessionUsage`). `in_flight` is true when a tool is mid-execution,
/// stream is open, or an approval modal is pending — in those cases the
/// caller must wait until the next clean boundary.
/// `active_input_tokens` is the estimated token count of the next request's
/// current input, including previous assistant/tool output that is now part of
/// the transcript. `reserved_response_headroom_tokens` is the max output budget
/// plus any provider safety headroom reserved for that next request. Lifetime
/// API usage is intentionally not used here because it repeatedly counts the
/// same stable prefix across requests.
///
/// `in_flight` is true when a tool is mid-execution, stream is open, or an
/// approval modal is pending — in those cases the caller must wait until the
/// next clean boundary.
#[must_use]
pub fn should_advance_cycle(
cumulative_input_tokens: u64,
cumulative_output_tokens: u64,
active_input_tokens: u64,
reserved_response_headroom_tokens: u64,
model: &str,
cfg: &CycleConfig,
in_flight: bool,
@@ -166,12 +177,14 @@ pub fn should_advance_cycle(
if !cfg.enabled || in_flight {
return false;
}
let total = cumulative_input_tokens.saturating_add(cumulative_output_tokens);
let threshold = cfg.threshold_for(model) as u64;
if threshold == 0 {
return false;
}
total >= threshold
let trigger_floor = context_window_for_model(model)
.map(|window| u64::from(window).saturating_sub(reserved_response_headroom_tokens))
.map_or(threshold, |window_floor| threshold.min(window_floor));
active_input_tokens >= trigger_floor
}
/// Roll-up of state that survives a cycle boundary deterministically.
@@ -759,12 +772,60 @@ mod tests {
}
#[test]
fn should_advance_combines_input_and_output() {
fn should_advance_considers_output_plus_safety_headroom() {
let cfg = CycleConfig::default();
// 400K + 400K = 800K > 768K threshold
// Below the 768K active-input threshold, but too close to the 1M
// model window once the next assistant response and safety headroom are
// included.
assert!(should_advance_cycle(
400_000,
400_000,
737_000,
263_168,
"deepseek-v4-pro",
&cfg,
false
));
}
#[test]
fn should_not_count_lifetime_api_usage_as_active_context() {
let cfg = CycleConfig::default();
assert!(!should_advance_cycle(
120_000,
64_000,
"deepseek-v4-pro",
&cfg,
false
));
}
#[test]
fn should_advance_v4_calibrates_threshold_against_output_reserve() {
let cfg = CycleConfig::default();
let reserve = 263_168;
assert!(!should_advance_cycle(
700_000,
reserve,
"deepseek-v4-pro",
&cfg,
false
));
assert!(should_advance_cycle(
738_000,
reserve,
"deepseek-v4-pro",
&cfg,
false
));
assert!(should_advance_cycle(
768_000,
reserve,
"deepseek-v4-pro",
&cfg,
false
));
assert!(should_advance_cycle(
900_000,
reserve,
"deepseek-v4-pro",
&cfg,
false
+2 -1
View File
@@ -75,7 +75,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
- **Planning / tracking**: `update_plan` (high-level strategy), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `checklist_write` (granular progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `todo_*` aliases (legacy compatibility), `note` (persistent memory).
- **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`.
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`.
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait.
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools.
- **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse).
- **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`.
@@ -108,6 +108,7 @@ Don't reach for `exec_shell` when:
- You just need to read or write a file — `read_file` / `write_file` are faster and show up in the tool log.
- The command is a single `cat`, `ls`, or `echo` — use `read_file`, `list_dir`, or just state the result.
- You're tempted to pipe `curl` for a web lookup — `web_search` or `fetch_url` give structured results.
- The command may run for minutes, start a server, run a full test suite, or perform a scientific/release computation — use `task_shell_start` or `exec_shell` with `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`.
### `agent_spawn`
Don't reach for `agent_spawn` when:
+55 -32
View File
@@ -17,7 +17,7 @@
//!
//! ## Soft seam levels
//!
//! | Level | Trigger (tokens) | Covers messages | Density |
//! | Level | Active input trigger | Covers messages | Density |
//! |-------|------------------|--------------------|----------------|
//! | L1 | 192K | 0128K | ~2,500 tokens |
//! | L2 | 384K | 0320K | ~1,800 tokens |
@@ -45,7 +45,7 @@ use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPr
/// Default seam model — Flash is cheap and fast, ideal for summarization.
pub const DEFAULT_SEAM_MODEL: &str = "deepseek-v4-flash";
/// Default thresholds (cumulative input+output tokens).
/// Default thresholds based on the active request input estimate.
pub const DEFAULT_L1_THRESHOLD: usize = 192_000;
pub const DEFAULT_L2_THRESHOLD: usize = 384_000;
pub const DEFAULT_L3_THRESHOLD: usize = 576_000;
@@ -66,7 +66,7 @@ pub struct SeamConfig {
pub enabled: bool,
/// Verbatim window: last N turns never summarized.
pub verbatim_window_turns: usize,
/// Soft seam thresholds.
/// Soft seam thresholds based on the active request input estimate.
pub l1_threshold: usize,
pub l2_threshold: usize,
pub l3_threshold: usize,
@@ -143,29 +143,14 @@ impl SeamManager {
}
/// Determine which seam level (if any) should fire for the given
/// cumulative token count. Returns `None` when no seam is due.
/// active request input estimate. Returns `None` when no seam is due.
#[must_use]
pub fn seam_level_for(
&self,
cumulative_tokens: usize,
active_input_tokens: usize,
highest_existing_level: Option<u8>,
) -> Option<u8> {
if !self.config.enabled {
return None;
}
let highest = highest_existing_level.unwrap_or(0);
// Each level fires at most once, and only in order.
if highest < 1 && cumulative_tokens >= self.config.l1_threshold {
return Some(1);
}
if highest < 2 && cumulative_tokens >= self.config.l2_threshold {
return Some(2);
}
if highest < 3 && cumulative_tokens >= self.config.l3_threshold {
return Some(3);
}
None
seam_level_for_active_input(&self.config, active_input_tokens, highest_existing_level)
}
/// Check whether the hard cycle boundary is crossed.
@@ -174,8 +159,8 @@ impl SeamManager {
/// Kept as the canonical boundary definition for future wiring.
#[must_use]
#[allow(dead_code)]
pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
pub fn should_cycle(&self, active_input_tokens: usize) -> bool {
self.config.enabled && active_input_tokens >= self.config.cycle_threshold
}
/// Compute the verbatim window: the last N message indices that must
@@ -577,6 +562,30 @@ impl SeamManager {
}
}
#[must_use]
pub fn seam_level_for_active_input(
config: &SeamConfig,
active_input_tokens: usize,
highest_existing_level: Option<u8>,
) -> Option<u8> {
if !config.enabled {
return None;
}
let highest = highest_existing_level.unwrap_or(0);
// Each level fires at most once, and only in order.
if highest < 1 && active_input_tokens >= config.l1_threshold {
return Some(1);
}
if highest < 2 && active_input_tokens >= config.l2_threshold {
return Some(2);
}
if highest < 3 && active_input_tokens >= config.l3_threshold {
return Some(3);
}
None
}
/// Truncate a string to max_chars, respecting Unicode boundaries.
fn truncate_chars(text: &str, max_chars: usize) -> String {
if max_chars == 0 {
@@ -598,15 +607,29 @@ mod tests {
// Test the pure logic functions only.
let config = SeamConfig::default();
// Test seam_level_for logic manually.
// Below L1
assert!(config.enabled && 100_000 < config.l1_threshold);
// At L1
assert!(192_000 >= config.l1_threshold);
// At L2
assert!(384_000 >= config.l2_threshold);
// At L3
assert!(576_000 >= config.l3_threshold);
assert_eq!(seam_level_for_active_input(&config, 100_000, None), None);
assert_eq!(seam_level_for_active_input(&config, 192_000, None), Some(1));
assert_eq!(
seam_level_for_active_input(&config, 384_000, Some(1)),
Some(2)
);
assert_eq!(
seam_level_for_active_input(&config, 576_000, Some(2)),
Some(3)
);
}
#[test]
fn seam_trigger_uses_active_request_size_not_lifetime_usage() {
let config = SeamConfig::default();
let lifetime_prompt_usage = 900_000usize;
let active_request_input = 120_000usize;
assert!(lifetime_prompt_usage >= config.l3_threshold);
assert_eq!(
seam_level_for_active_input(&config, active_request_input, None),
None
);
}
#[test]
+57 -37
View File
@@ -1287,6 +1287,10 @@ use crate::tools::spec::{
use async_trait::async_trait;
use serde_json::json;
const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bounded commands. \
The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \
background: true, then poll with task_shell_wait or exec_shell_wait.";
async fn execute_foreground_via_background(
context: &ToolContext,
command: &str,
@@ -1372,7 +1376,7 @@ impl ToolSpec for ExecShellTool {
}
fn description(&self) -> &'static str {
"Execute a shell command in the workspace directory. Returns stdout, stderr, and exit code."
"Execute a shell command in the workspace directory. Foreground mode is for bounded commands; use background=true or task_shell_start for long-running work, then poll/wait."
}
fn input_schema(&self) -> serde_json::Value {
@@ -1389,7 +1393,7 @@ impl ToolSpec for ExecShellTool {
},
"background": {
"type": "boolean",
"description": "Run in background and return task_id (default: false)"
"description": "Run in background and return task_id (default: false). Prefer true for commands that may run for minutes; poll with exec_shell_wait or task_shell_wait."
},
"interactive": {
"type": "boolean",
@@ -1599,7 +1603,7 @@ impl ToolSpec for ExecShellTool {
)
} else if result.status == ShellStatus::TimedOut {
format!(
"Command timed out after {timeout_ms}ms; process killed.\n\nSTDOUT:\n{}\n\nSTDERR:\n{}",
"Command timed out after {timeout_ms}ms; process killed.\n\n{FOREGROUND_TIMEOUT_RECOVERY_HINT}\n\nSTDOUT:\n{}\n\nSTDERR:\n{}",
result.stdout, result.stderr
)
} else {
@@ -1609,44 +1613,60 @@ impl ToolSpec for ExecShellTool {
)
};
let mut metadata = json!({
"exit_code": result.exit_code,
"status": format!("{:?}", result.status),
"duration_ms": result.duration_ms,
"sandboxed": result.sandboxed,
"sandbox_type": result.sandbox_type,
"sandbox_denied": result.sandbox_denied,
"task_id": result.task_id,
"stdout_len": result.stdout_len,
"stderr_len": result.stderr_len,
"stdout_truncated": result.stdout_truncated,
"stderr_truncated": result.stderr_truncated,
"stdout_omitted": result.stdout_omitted,
"stderr_omitted": result.stderr_omitted,
"summary": summary,
"stdout_summary": stdout_summary,
"stderr_summary": stderr_summary,
"safety_level": format!("{:?}", safety.level),
"interactive": interactive,
"canceled": was_cancelled,
"execpolicy": execpolicy_decision.as_ref().map(|decision| match decision {
ExecPolicyDecision::Allow => json!({
"decision": "allow",
}),
ExecPolicyDecision::Deny(reason) => json!({
"decision": "deny",
"reason": reason,
}),
ExecPolicyDecision::AskUser(reason) => json!({
"decision": "ask_user",
"reason": reason,
}),
}),
});
if result.status == ShellStatus::TimedOut && !background && !interactive {
metadata["foreground_timeout_recovery"] = json!({
"process_killed": true,
"hint": FOREGROUND_TIMEOUT_RECOVERY_HINT,
"recommended_tools": [
"task_shell_start",
"task_shell_wait",
"exec_shell",
"exec_shell_wait"
],
"exec_shell_background": true,
"poll_with": ["task_shell_wait", "exec_shell_wait"]
});
}
Ok(ToolResult {
content: output,
success: result.status == ShellStatus::Completed
|| result.status == ShellStatus::Running,
metadata: Some(json!({
"exit_code": result.exit_code,
"status": format!("{:?}", result.status),
"duration_ms": result.duration_ms,
"sandboxed": result.sandboxed,
"sandbox_type": result.sandbox_type,
"sandbox_denied": result.sandbox_denied,
"task_id": result.task_id,
"stdout_len": result.stdout_len,
"stderr_len": result.stderr_len,
"stdout_truncated": result.stdout_truncated,
"stderr_truncated": result.stderr_truncated,
"stdout_omitted": result.stdout_omitted,
"stderr_omitted": result.stderr_omitted,
"summary": summary,
"stdout_summary": stdout_summary,
"stderr_summary": stderr_summary,
"safety_level": format!("{:?}", safety.level),
"interactive": interactive,
"canceled": was_cancelled,
"execpolicy": execpolicy_decision.as_ref().map(|decision| match decision {
ExecPolicyDecision::Allow => json!({
"decision": "allow",
}),
ExecPolicyDecision::Deny(reason) => json!({
"decision": "deny",
"reason": reason,
}),
ExecPolicyDecision::AskUser(reason) => json!({
"decision": "ask_user",
"reason": reason,
}),
}),
})),
metadata: Some(metadata),
})
}
Err(e) => Ok(ToolResult::error(format!("Shell execution failed: {e}"))),
+41
View File
@@ -263,6 +263,47 @@ async fn test_exec_shell_metadata_includes_summaries() {
assert!(meta.get("stdout_truncated").is_some());
}
#[tokio::test]
async fn test_exec_shell_foreground_timeout_guides_background_rerun() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path());
let tool = ExecShellTool;
let result = tool
.execute(
json!({
"command": sleep_command(10),
"timeout_ms": 1000
}),
&ctx,
)
.await
.expect("execute");
assert!(!result.success);
assert!(result.content.contains("task_shell_start"));
assert!(result.content.contains("background: true"));
assert!(result.content.contains("process killed"));
let meta = result.metadata.expect("metadata");
assert_eq!(meta.get("status").and_then(Value::as_str), Some("TimedOut"));
let recovery = meta
.get("foreground_timeout_recovery")
.expect("timeout recovery metadata");
assert_eq!(
recovery
.get("exec_shell_background")
.and_then(Value::as_bool),
Some(true)
);
assert!(
recovery
.get("hint")
.and_then(Value::as_str)
.unwrap_or_default()
.contains("exec_shell_wait")
);
}
#[tokio::test]
async fn test_exec_shell_foreground_cancel_kills_process() {
let tmp = tempdir().expect("tempdir");
+6 -4
View File
@@ -612,13 +612,15 @@ pub struct App {
pub runtime_turn_id: Option<String>,
/// Current runtime turn status (if known).
pub runtime_turn_status: Option<String>,
/// Last prompt token usage
/// Provider-reported input tokens from the last completed turn. This is
/// telemetry/cost data and may sum repeated stable prefixes across tool
/// rounds; active context pressure is estimated from `api_messages`.
pub last_prompt_tokens: Option<u32>,
/// Last completion token usage
/// Provider-reported output tokens from the last completed turn.
pub last_completion_tokens: Option<u32>,
/// DeepSeek context-cache hit tokens from the last API call.
/// DeepSeek context-cache hit tokens from the last API call. Telemetry only.
pub last_prompt_cache_hit_tokens: Option<u32>,
/// DeepSeek context-cache miss tokens from the last API call.
/// DeepSeek context-cache miss tokens from the last API call. Telemetry only.
pub last_prompt_cache_miss_tokens: Option<u32>,
/// Approximate input tokens spent re-sending prior `reasoning_content` on
/// the last thinking-mode tool-calling turn (V4 §5.1.1 "Interleaved
+2 -2
View File
@@ -4714,7 +4714,7 @@ fn footer_context_percent_spans(app: &App) -> Vec<Span<'static>> {
palette::TEXT_MUTED
};
vec![Span::styled(
format!("ctx {percent:.0}%"),
format!("active ctx {percent:.0}%"),
Style::default().fg(color),
)]
}
@@ -4802,7 +4802,7 @@ fn footer_cache_spans(app: &App) -> Vec<Span<'static>> {
let percent = (f64::from(hit_tokens) / f64::from(total) * 100.0).clamp(0.0, 100.0);
vec![Span::styled(
format!("cache {:.0}%", percent),
format!("cache hit {:.0}%", percent),
Style::default().fg(palette::TEXT_MUTED),
)]
}
+2 -2
View File
@@ -575,7 +575,7 @@ fn footer_auxiliary_spans_show_cache_when_compact() {
app.last_prompt_cache_miss_tokens = Some(12_000);
app.session_cost = 12.34;
let compact = spans_text(&footer_auxiliary_spans(&app, 12));
let compact = spans_text(&footer_auxiliary_spans(&app, 14));
assert!(compact.contains("cache"));
assert!(!compact.contains('$'));
}
@@ -589,7 +589,7 @@ fn footer_auxiliary_spans_show_cache_and_cost_when_roomy() {
app.session_cost = 12.34;
let roomy = spans_text(&footer_auxiliary_spans(&app, 32));
assert!(roomy.contains("cache 75%"));
assert!(roomy.contains("cache hit 75%"));
assert!(roomy.contains("$12.34"));
assert!(
!roomy.contains("ctx"),
+4 -3
View File
@@ -31,7 +31,8 @@ pub struct HeaderData<'a> {
pub context_window: Option<u32>,
/// Accumulated session cost in USD.
pub session_cost: f64,
/// Input tokens from the most recent API call (current context utilization).
/// Active context input tokens used for context utilization. Callers should
/// pass a sanitized live-context estimate, not cumulative API usage.
pub last_prompt_tokens: Option<u32>,
/// Short label for the current reasoning-effort tier (e.g. "max", "high",
/// "off"). Rendered as a chip when space allows.
@@ -90,12 +91,12 @@ impl<'a> HeaderData<'a> {
total_tokens: u32,
context_window: Option<u32>,
session_cost: f64,
last_prompt_tokens: Option<u32>,
active_context_input_tokens: Option<u32>,
) -> Self {
self.total_tokens = total_tokens;
self.context_window = context_window;
self.session_cost = session_cost;
self.last_prompt_tokens = last_prompt_tokens;
self.last_prompt_tokens = active_context_input_tokens;
self
}
}
+23 -1
View File
@@ -157,6 +157,26 @@ Readability semantics:
`crowded`, `refreshing`, `verifying`, and `resetting`; these are derived from
capacity and compaction events without exposing internal formulas in normal UI.
### Token Quantities and Drivers
DeepSeek V4 prefix caching makes token labels matter. These quantities are kept
separate:
| Quantity | Meaning | Allowed to drive |
|---|---|---|
| Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. |
| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. |
| Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. |
| Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. |
| Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. |
| Cost estimate | Approximate spend from provider usage and configured DeepSeek rates. | Display only. |
For the default V4 path, hard cycles fire when active input reaches the smaller
of the configured cycle threshold (`768000`) and the model window minus reserved
response headroom. Replacement compaction remains opt-in (`auto_compact = false`
by default), the Flash seam manager remains opt-in (`[context].enabled = false`),
and the capacity controller remains disabled unless configured.
### Command Migration Notes
If you are upgrading from older releases:
@@ -196,7 +216,9 @@ If you are upgrading from older releases:
- `[snapshots].enabled` (bool, default `true`)
- `[snapshots].max_age_days` (int, default `7`)
- snapshots live under `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git` and never use the workspace's own `.git` directory
- `context.*` (optional): append-only Flash seam manager, currently opt-in:
- `context.*` (optional): append-only Flash seam manager, currently opt-in.
Thresholds use the active request input estimate, not lifetime summed API
usage:
- `[context].enabled` (bool, default `false`)
- `[context].verbatim_window_turns` (int, default `16`)
- `[context].l1_threshold` (int, default `192000`)
+10 -4
View File
@@ -15,7 +15,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
for the same backing operation are a model trap — the LLM will alternate
between them and the cache hit rate suffers.
## Final surface (v0.7.4)
## Current surface (v0.7.5)
### File operations
@@ -40,19 +40,25 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
| Tool | Niche |
|---|---|
| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands. |
| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands; timeout kills the process and returns a background-rerun hint. |
| `exec_shell_wait` | Poll a background task for incremental output. |
| `exec_shell_interact` | Send stdin to a running background task and read incremental output. |
| `task_shell_start` | Start a long-running command in the background and return immediately. Preferred over foreground shell for diagnostics, tests, searches, and servers that may run for minutes. |
| `task_shell_wait` | Poll a background command. If `gate` is supplied after completion, record structured gate evidence on the active durable task. |
When a foreground shell command times out, the process is not continued
silently. The tool result tells the model to rerun long work with
`task_shell_start` or `exec_shell` with `background = true`, then poll with
`task_shell_wait` or `exec_shell_wait`.
Interactive shell jobs are also visible through `/jobs`. The TUI job center is
fed by the same shell manager as `exec_shell`/`task_shell_start`, and shows the
command, cwd, elapsed time, status, output tail, process-local shell id, and
linked durable task id when available. `/jobs show`, `/jobs poll`, `/jobs wait`,
`/jobs stdin`, and `/jobs cancel` provide inspect, polling, stdin, and cancel
controls for live jobs. Jobs are process-local; after restart, detached entries
are marked stale rather than presented as live processes.
controls for live jobs. Jobs are process-local; after restart, live process
state is not reattached, and any remembered detached entries must be marked
stale rather than presented as live processes.
### MCP manager and palette discovery
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "deepseek-tui",
"version": "0.7.4",
"deepseekBinaryVersion": "0.7.4",
"version": "0.7.5",
"deepseekBinaryVersion": "0.7.5",
"description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
"author": "Hmbown",
"license": "MIT",