release: v0.7.5 — token-basis fixes, shell timeout recovery, context/cache policy
Issues #202, #203, #204, #205: - Cycle/seam triggers use active request input size + response headroom reserve, not lifetime cumulative API usage. - V4 hard-cycle headroom calibrated around fixed TURN_MAX_OUTPUT_TOKENS plus CONTEXT_HEADROOM_TOKENS safety buffer. - /tokens, /cost, footer/header labels, and docs now separate active context, turn telemetry, cumulative usage, cache hit/miss, context percent, and cost. - Foreground exec_shell timeout output tells the model the process was killed and suggests task_shell_start or background exec_shell plus poll/wait. - Added regression tests for active-token basis, V4 headroom, seam trigger basis, footer label behavior, and shell timeout recovery metadata. - Preserved #200/#201 policy: V4 default is append-only, prefix-cache preserving; replacement compaction, Flash seams, and capacity intervention remain opt-in.
This commit is contained in:
Generated
+14
-14
@@ -1011,7 +1011,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-agent"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"deepseek-config",
|
||||
"serde",
|
||||
@@ -1019,7 +1019,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-app-server"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"axum",
|
||||
@@ -1042,7 +1042,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-config"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"deepseek-secrets",
|
||||
@@ -1055,7 +1055,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-core"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -1074,7 +1074,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-execpolicy"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"deepseek-protocol",
|
||||
@@ -1083,7 +1083,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-hooks"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -1097,7 +1097,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-mcp"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"deepseek-protocol",
|
||||
@@ -1107,7 +1107,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-protocol"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -1115,7 +1115,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-secrets"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
"keyring",
|
||||
@@ -1128,7 +1128,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-state"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -1140,7 +1140,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-tools"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
@@ -1153,7 +1153,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-tui"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arboard",
|
||||
@@ -1213,7 +1213,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-tui-cli"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"chrono",
|
||||
@@ -1236,7 +1236,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "deepseek-tui-core"
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
|
||||
+1
-1
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.7.4"
|
||||
version = "0.7.5"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
repository = "https://github.com/Hmbown/DeepSeek-TUI"
|
||||
|
||||
@@ -200,9 +200,13 @@ exponential_base = 2.0
|
||||
[context]
|
||||
enabled = false
|
||||
verbatim_window_turns = 16
|
||||
# Thresholds are based on the active request input estimate, not lifetime
|
||||
# summed API usage.
|
||||
l1_threshold = 192000
|
||||
l2_threshold = 384000
|
||||
l3_threshold = 576000
|
||||
# Hard cycle also reserves the normal 262144-token output budget plus 1024
|
||||
# safety tokens against the model window.
|
||||
cycle_threshold = 768000
|
||||
seam_model = "deepseek-v4-flash"
|
||||
|
||||
|
||||
@@ -7,5 +7,5 @@ repository.workspace = true
|
||||
description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
|
||||
|
||||
[dependencies]
|
||||
deepseek-config = { path = "../config", version = "0.7.4" }
|
||||
deepseek-config = { path = "../config", version = "0.7.5" }
|
||||
serde.workspace = true
|
||||
|
||||
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
|
||||
anyhow.workspace = true
|
||||
axum.workspace = true
|
||||
clap.workspace = true
|
||||
deepseek-agent = { path = "../agent", version = "0.7.4" }
|
||||
deepseek-config = { path = "../config", version = "0.7.4" }
|
||||
deepseek-core = { path = "../core", version = "0.7.4" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
|
||||
deepseek-hooks = { path = "../hooks", version = "0.7.4" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-state = { path = "../state", version = "0.7.4" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.4" }
|
||||
deepseek-agent = { path = "../agent", version = "0.7.5" }
|
||||
deepseek-config = { path = "../config", version = "0.7.5" }
|
||||
deepseek-core = { path = "../core", version = "0.7.5" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
|
||||
deepseek-hooks = { path = "../hooks", version = "0.7.5" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
deepseek-state = { path = "../state", version = "0.7.5" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.5" }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -14,13 +14,13 @@ path = "src/main.rs"
|
||||
anyhow.workspace = true
|
||||
clap.workspace = true
|
||||
clap_complete.workspace = true
|
||||
deepseek-agent = { path = "../agent", version = "0.7.4" }
|
||||
deepseek-app-server = { path = "../app-server", version = "0.7.4" }
|
||||
deepseek-config = { path = "../config", version = "0.7.4" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
|
||||
deepseek-state = { path = "../state", version = "0.7.4" }
|
||||
deepseek-agent = { path = "../agent", version = "0.7.5" }
|
||||
deepseek-app-server = { path = "../app-server", version = "0.7.5" }
|
||||
deepseek-config = { path = "../config", version = "0.7.5" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
|
||||
deepseek-state = { path = "../state", version = "0.7.5" }
|
||||
chrono.workspace = true
|
||||
dirs.workspace = true
|
||||
serde.workspace = true
|
||||
|
||||
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
|
||||
dirs.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
chrono.workspace = true
|
||||
deepseek-agent = { path = "../agent", version = "0.7.4" }
|
||||
deepseek-config = { path = "../config", version = "0.7.4" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.4" }
|
||||
deepseek-hooks = { path = "../hooks", version = "0.7.4" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-state = { path = "../state", version = "0.7.4" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.4" }
|
||||
deepseek-agent = { path = "../agent", version = "0.7.5" }
|
||||
deepseek-config = { path = "../config", version = "0.7.5" }
|
||||
deepseek-execpolicy = { path = "../execpolicy", version = "0.7.5" }
|
||||
deepseek-hooks = { path = "../hooks", version = "0.7.5" }
|
||||
deepseek-mcp = { path = "../mcp", version = "0.7.5" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
deepseek-state = { path = "../state", version = "0.7.5" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.5" }
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
uuid.workspace = true
|
||||
|
||||
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
serde.workspace = true
|
||||
|
||||
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
chrono.workspace = true
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
reqwest.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo
|
||||
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
|
||||
[dependencies]
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.4" }
|
||||
deepseek-protocol = { path = "../protocol", version = "0.7.5" }
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
tokio.workspace = true
|
||||
|
||||
@@ -13,8 +13,8 @@ path = "src/main.rs"
|
||||
[dependencies]
|
||||
anyhow = "1.0.100"
|
||||
arboard = "3.4"
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.4" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.4" }
|
||||
deepseek-secrets = { path = "../secrets", version = "0.7.5" }
|
||||
deepseek-tools = { path = "../tools", version = "0.7.5" }
|
||||
async-stream = "0.3.6"
|
||||
async-trait = "0.1"
|
||||
bytes = "1.11.0"
|
||||
|
||||
@@ -3,10 +3,40 @@
|
||||
//! Debug commands: tokens, cost, system, context, undo, retry
|
||||
|
||||
use super::CommandResult;
|
||||
use crate::models::SystemPrompt;
|
||||
use crate::compaction::estimate_input_tokens_conservative;
|
||||
use crate::models::{SystemPrompt, context_window_for_model};
|
||||
use crate::tui::app::{App, AppAction};
|
||||
use crate::tui::history::HistoryCell;
|
||||
|
||||
fn token_count(value: Option<u32>) -> String {
|
||||
value.map_or_else(|| "not reported".to_string(), |tokens| tokens.to_string())
|
||||
}
|
||||
|
||||
fn active_context_summary(app: &App) -> String {
|
||||
let estimated =
|
||||
estimate_input_tokens_conservative(&app.api_messages, app.system_prompt.as_ref());
|
||||
match context_window_for_model(&app.model) {
|
||||
Some(window) => {
|
||||
let used = estimated.min(window as usize);
|
||||
let percent = (used as f64 / f64::from(window) * 100.0).clamp(0.0, 100.0);
|
||||
format!("~{used} / {window} ({percent:.1}%)")
|
||||
}
|
||||
None => format!("~{estimated} / unknown window"),
|
||||
}
|
||||
}
|
||||
|
||||
fn cache_summary(app: &App) -> String {
|
||||
match (
|
||||
app.last_prompt_cache_hit_tokens,
|
||||
app.last_prompt_cache_miss_tokens,
|
||||
) {
|
||||
(Some(hit), Some(miss)) => format!("{hit} hit / {miss} miss"),
|
||||
(Some(hit), None) => format!("{hit} hit / miss not reported"),
|
||||
(None, Some(miss)) => format!("hit not reported / {miss} miss"),
|
||||
(None, None) => "not reported".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Show token usage for session
|
||||
pub fn tokens(app: &mut App) -> CommandResult {
|
||||
let message_count = app.api_messages.len();
|
||||
@@ -15,12 +45,24 @@ pub fn tokens(app: &mut App) -> CommandResult {
|
||||
CommandResult::message(format!(
|
||||
"Token Usage:\n\
|
||||
─────────────────────────────\n\
|
||||
Total tokens: {}\n\
|
||||
Session cost: ${:.4}\n\
|
||||
API messages: {}\n\
|
||||
Chat messages: {}\n\
|
||||
Model: {}",
|
||||
app.total_tokens, app.session_cost, message_count, chat_count, app.model,
|
||||
Active context: {}\n\
|
||||
Last API input: {} (turn telemetry; may count repeated prefix across tool rounds)\n\
|
||||
Last API output: {}\n\
|
||||
Cache hit/miss: {} (telemetry/cost only)\n\
|
||||
Cumulative tokens: {} (session usage telemetry)\n\
|
||||
Approx session cost: ${:.4}\n\
|
||||
API messages: {}\n\
|
||||
Chat messages: {}\n\
|
||||
Model: {}",
|
||||
active_context_summary(app),
|
||||
token_count(app.last_prompt_tokens),
|
||||
token_count(app.last_completion_tokens),
|
||||
cache_summary(app),
|
||||
app.total_tokens,
|
||||
app.session_cost,
|
||||
message_count,
|
||||
chat_count,
|
||||
app.model,
|
||||
))
|
||||
}
|
||||
|
||||
@@ -29,7 +71,8 @@ pub fn cost(app: &mut App) -> CommandResult {
|
||||
CommandResult::message(format!(
|
||||
"Session Cost:\n\
|
||||
─────────────────────────────\n\
|
||||
Total spent: ${:.4}\n\n\
|
||||
Approx total spent: ${:.4}\n\n\
|
||||
Cost estimates are approximate and use provider usage telemetry when available.\n\n\
|
||||
DeepSeek API Pricing:\n\
|
||||
─────────────────────────────\n\
|
||||
Pricing details are not configured in this CLI.",
|
||||
@@ -113,9 +156,16 @@ mod tests {
|
||||
let mut app = create_test_app();
|
||||
app.total_tokens = 1234;
|
||||
app.session_cost = 0.05;
|
||||
app.last_prompt_tokens = Some(100);
|
||||
app.last_completion_tokens = Some(25);
|
||||
app.last_prompt_cache_hit_tokens = Some(70);
|
||||
app.last_prompt_cache_miss_tokens = Some(30);
|
||||
app.api_messages.push(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![],
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "test".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
app.history.push(HistoryCell::User {
|
||||
content: "test".to_string(),
|
||||
@@ -125,8 +175,13 @@ mod tests {
|
||||
assert!(result.message.is_some());
|
||||
let msg = result.message.unwrap();
|
||||
assert!(msg.contains("Token Usage"));
|
||||
assert!(msg.contains("Total tokens:"));
|
||||
assert!(msg.contains("Session cost:"));
|
||||
assert!(msg.contains("Active context:"));
|
||||
assert!(msg.contains("Last API input:"));
|
||||
assert!(msg.contains("Last API output:"));
|
||||
assert!(msg.contains("Cache hit/miss:"));
|
||||
assert!(msg.contains("70 hit / 30 miss"));
|
||||
assert!(msg.contains("Cumulative tokens:"));
|
||||
assert!(msg.contains("Approx session cost:"));
|
||||
assert!(msg.contains("API messages:"));
|
||||
assert!(msg.contains("Chat messages:"));
|
||||
assert!(msg.contains("Model:"));
|
||||
@@ -140,7 +195,8 @@ mod tests {
|
||||
assert!(result.message.is_some());
|
||||
let msg = result.message.unwrap();
|
||||
assert!(msg.contains("Session Cost"));
|
||||
assert!(msg.contains("Total spent:"));
|
||||
assert!(msg.contains("Approx total spent:"));
|
||||
assert!(msg.contains("approximate"));
|
||||
assert!(msg.contains("$0.1234"));
|
||||
}
|
||||
|
||||
|
||||
@@ -445,7 +445,7 @@ pub struct ContextConfig {
|
||||
/// Verbatim window: last N turns never summarized. Default: 16.
|
||||
#[serde(default)]
|
||||
pub verbatim_window_turns: Option<usize>,
|
||||
/// Soft seam thresholds (cumulative input+output tokens).
|
||||
/// Soft seam thresholds based on the active request input estimate.
|
||||
#[serde(default)]
|
||||
pub l1_threshold: Option<usize>,
|
||||
#[serde(default)]
|
||||
|
||||
@@ -354,9 +354,9 @@ fn should_transparently_retry_stream(
|
||||
/// Max output tokens requested for normal agent turns. Generous on purpose:
|
||||
/// V4 thinking models can produce tens of thousands of reasoning tokens on
|
||||
/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
|
||||
/// context window. 256K leaves the model effectively unconstrained on
|
||||
/// output without us imposing artificial per-turn caps that surfaced as the
|
||||
/// assistant "stopping mid-response" when reasoning consumed the budget.
|
||||
/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
|
||||
/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
|
||||
/// plus safety headroom before sending the next request.
|
||||
const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
|
||||
/// Keep this many most recent messages when emergency trimming is required.
|
||||
const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
|
||||
@@ -1199,6 +1199,10 @@ fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usi
|
||||
.and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
|
||||
}
|
||||
|
||||
fn turn_response_headroom_tokens() -> u64 {
|
||||
u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
|
||||
}
|
||||
|
||||
fn is_context_length_error_message(message: &str) -> bool {
|
||||
crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
|
||||
}
|
||||
@@ -2440,7 +2444,7 @@ impl Engine {
|
||||
/// Handle a turn using the DeepSeek API.
|
||||
#[allow(clippy::too_many_lines)]
|
||||
/// Run the pre-request layered-context checkpoint (#159). Checks whether
|
||||
/// cumulative tokens have crossed a soft-seam threshold and, if so,
|
||||
/// the active input estimate has crossed a soft-seam threshold and, if so,
|
||||
/// produces an `<archived_context>` block via Flash and appends it as an
|
||||
/// assistant message. Called from `handle_deepseek_turn` before each API
|
||||
/// request so the model always has the latest navigation aids.
|
||||
@@ -2452,18 +2456,8 @@ impl Engine {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cumulative tokens: session total (all turns so far) + current
|
||||
// estimated input (the messages that will be sent next).
|
||||
let cumulative_input = self
|
||||
.session
|
||||
.total_usage
|
||||
.input_tokens
|
||||
.saturating_add(self.session.total_usage.output_tokens);
|
||||
let cumulative_estimate =
|
||||
cumulative_input.saturating_add(self.estimated_input_tokens() as u64);
|
||||
|
||||
let highest = seam_mgr.highest_level().await;
|
||||
let Some(level) = seam_mgr.seam_level_for(cumulative_estimate as usize, highest) else {
|
||||
let Some(level) = seam_mgr.seam_level_for(self.estimated_input_tokens(), highest) else {
|
||||
return;
|
||||
};
|
||||
|
||||
@@ -2563,8 +2557,8 @@ impl Engine {
|
||||
/// they're still running.
|
||||
async fn maybe_advance_cycle(&mut self, mode: AppMode) {
|
||||
if !should_advance_cycle(
|
||||
self.session.total_usage.input_tokens,
|
||||
self.session.total_usage.output_tokens,
|
||||
self.estimated_input_tokens() as u64,
|
||||
turn_response_headroom_tokens(),
|
||||
&self.session.model,
|
||||
&self.config.cycle,
|
||||
false,
|
||||
|
||||
@@ -29,9 +29,12 @@
|
||||
//!
|
||||
//! ## Trigger
|
||||
//!
|
||||
//! - Token threshold: **768K** by default (~75% of the 1M window). This is a
|
||||
//! rare overflow safety net. Optional soft seams at 192K/384K/576K are
|
||||
//! controlled by the opt-in layered context manager (#159).
|
||||
//! - Token threshold: **768K** active input by default (~75% of the 1M window).
|
||||
//! This is a rare overflow safety net. The trigger is based on the next
|
||||
//! request's live input estimate, not lifetime summed API usage, with
|
||||
//! assistant-output and safety headroom considered against the model window.
|
||||
//! Optional soft seams at 192K/384K/576K are controlled by the opt-in layered
|
||||
//! context manager (#159).
|
||||
//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
|
||||
//! boundaries (no in-flight tool, no streaming, no approval modal).
|
||||
//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
|
||||
@@ -48,7 +51,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::client::DeepSeekClient;
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt};
|
||||
use crate::models::{
|
||||
ContentBlock, Message, MessageRequest, SystemBlock, SystemPrompt, context_window_for_model,
|
||||
};
|
||||
use crate::tools::plan::{PlanSnapshot, SharedPlanState};
|
||||
use crate::tools::subagent::{SharedSubAgentManager, SubAgentResult, SubAgentStatus};
|
||||
use crate::tools::todo::{SharedTodoList, TodoListSnapshot};
|
||||
@@ -151,14 +156,20 @@ pub struct CycleBriefing {
|
||||
|
||||
/// Decide whether a cycle boundary should fire.
|
||||
///
|
||||
/// `usage` is the *cumulative* session input+output tokens (both `u64` to
|
||||
/// match `SessionUsage`). `in_flight` is true when a tool is mid-execution,
|
||||
/// stream is open, or an approval modal is pending — in those cases the
|
||||
/// caller must wait until the next clean boundary.
|
||||
/// `active_input_tokens` is the estimated token count of the next request's
|
||||
/// current input, including previous assistant/tool output that is now part of
|
||||
/// the transcript. `reserved_response_headroom_tokens` is the max output budget
|
||||
/// plus any provider safety headroom reserved for that next request. Lifetime
|
||||
/// API usage is intentionally not used here because it repeatedly counts the
|
||||
/// same stable prefix across requests.
|
||||
///
|
||||
/// `in_flight` is true when a tool is mid-execution, stream is open, or an
|
||||
/// approval modal is pending — in those cases the caller must wait until the
|
||||
/// next clean boundary.
|
||||
#[must_use]
|
||||
pub fn should_advance_cycle(
|
||||
cumulative_input_tokens: u64,
|
||||
cumulative_output_tokens: u64,
|
||||
active_input_tokens: u64,
|
||||
reserved_response_headroom_tokens: u64,
|
||||
model: &str,
|
||||
cfg: &CycleConfig,
|
||||
in_flight: bool,
|
||||
@@ -166,12 +177,14 @@ pub fn should_advance_cycle(
|
||||
if !cfg.enabled || in_flight {
|
||||
return false;
|
||||
}
|
||||
let total = cumulative_input_tokens.saturating_add(cumulative_output_tokens);
|
||||
let threshold = cfg.threshold_for(model) as u64;
|
||||
if threshold == 0 {
|
||||
return false;
|
||||
}
|
||||
total >= threshold
|
||||
let trigger_floor = context_window_for_model(model)
|
||||
.map(|window| u64::from(window).saturating_sub(reserved_response_headroom_tokens))
|
||||
.map_or(threshold, |window_floor| threshold.min(window_floor));
|
||||
active_input_tokens >= trigger_floor
|
||||
}
|
||||
|
||||
/// Roll-up of state that survives a cycle boundary deterministically.
|
||||
@@ -759,12 +772,60 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_advance_combines_input_and_output() {
|
||||
fn should_advance_considers_output_plus_safety_headroom() {
|
||||
let cfg = CycleConfig::default();
|
||||
// 400K + 400K = 800K > 768K threshold
|
||||
// Below the 768K active-input threshold, but too close to the 1M
|
||||
// model window once the next assistant response and safety headroom are
|
||||
// included.
|
||||
assert!(should_advance_cycle(
|
||||
400_000,
|
||||
400_000,
|
||||
737_000,
|
||||
263_168,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_not_count_lifetime_api_usage_as_active_context() {
|
||||
let cfg = CycleConfig::default();
|
||||
assert!(!should_advance_cycle(
|
||||
120_000,
|
||||
64_000,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn should_advance_v4_calibrates_threshold_against_output_reserve() {
|
||||
let cfg = CycleConfig::default();
|
||||
let reserve = 263_168;
|
||||
assert!(!should_advance_cycle(
|
||||
700_000,
|
||||
reserve,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
));
|
||||
assert!(should_advance_cycle(
|
||||
738_000,
|
||||
reserve,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
));
|
||||
assert!(should_advance_cycle(
|
||||
768_000,
|
||||
reserve,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
));
|
||||
assert!(should_advance_cycle(
|
||||
900_000,
|
||||
reserve,
|
||||
"deepseek-v4-pro",
|
||||
&cfg,
|
||||
false
|
||||
|
||||
@@ -75,7 +75,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
|
||||
|
||||
- **Planning / tracking**: `update_plan` (high-level strategy), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `checklist_write` (granular progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `todo_*` aliases (legacy compatibility), `note` (persistent memory).
|
||||
- **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`.
|
||||
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`.
|
||||
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait.
|
||||
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools.
|
||||
- **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse).
|
||||
- **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`.
|
||||
@@ -108,6 +108,7 @@ Don't reach for `exec_shell` when:
|
||||
- You just need to read or write a file — `read_file` / `write_file` are faster and show up in the tool log.
|
||||
- The command is a single `cat`, `ls`, or `echo` — use `read_file`, `list_dir`, or just state the result.
|
||||
- You're tempted to pipe `curl` for a web lookup — `web_search` or `fetch_url` give structured results.
|
||||
- The command may run for minutes, start a server, run a full test suite, or perform a scientific/release computation — use `task_shell_start` or `exec_shell` with `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`.
|
||||
|
||||
### `agent_spawn`
|
||||
Don't reach for `agent_spawn` when:
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
//!
|
||||
//! ## Soft seam levels
|
||||
//!
|
||||
//! | Level | Trigger (tokens) | Covers messages | Density |
|
||||
//! | Level | Active input trigger | Covers messages | Density |
|
||||
//! |-------|------------------|--------------------|----------------|
|
||||
//! | L1 | 192K | 0–128K | ~2,500 tokens |
|
||||
//! | L2 | 384K | 0–320K | ~1,800 tokens |
|
||||
@@ -45,7 +45,7 @@ use crate::models::{ContentBlock, Message, MessageRequest, SystemBlock, SystemPr
|
||||
/// Default seam model — Flash is cheap and fast, ideal for summarization.
|
||||
pub const DEFAULT_SEAM_MODEL: &str = "deepseek-v4-flash";
|
||||
|
||||
/// Default thresholds (cumulative input+output tokens).
|
||||
/// Default thresholds based on the active request input estimate.
|
||||
pub const DEFAULT_L1_THRESHOLD: usize = 192_000;
|
||||
pub const DEFAULT_L2_THRESHOLD: usize = 384_000;
|
||||
pub const DEFAULT_L3_THRESHOLD: usize = 576_000;
|
||||
@@ -66,7 +66,7 @@ pub struct SeamConfig {
|
||||
pub enabled: bool,
|
||||
/// Verbatim window: last N turns never summarized.
|
||||
pub verbatim_window_turns: usize,
|
||||
/// Soft seam thresholds.
|
||||
/// Soft seam thresholds based on the active request input estimate.
|
||||
pub l1_threshold: usize,
|
||||
pub l2_threshold: usize,
|
||||
pub l3_threshold: usize,
|
||||
@@ -143,29 +143,14 @@ impl SeamManager {
|
||||
}
|
||||
|
||||
/// Determine which seam level (if any) should fire for the given
|
||||
/// cumulative token count. Returns `None` when no seam is due.
|
||||
/// active request input estimate. Returns `None` when no seam is due.
|
||||
#[must_use]
|
||||
pub fn seam_level_for(
|
||||
&self,
|
||||
cumulative_tokens: usize,
|
||||
active_input_tokens: usize,
|
||||
highest_existing_level: Option<u8>,
|
||||
) -> Option<u8> {
|
||||
if !self.config.enabled {
|
||||
return None;
|
||||
}
|
||||
let highest = highest_existing_level.unwrap_or(0);
|
||||
|
||||
// Each level fires at most once, and only in order.
|
||||
if highest < 1 && cumulative_tokens >= self.config.l1_threshold {
|
||||
return Some(1);
|
||||
}
|
||||
if highest < 2 && cumulative_tokens >= self.config.l2_threshold {
|
||||
return Some(2);
|
||||
}
|
||||
if highest < 3 && cumulative_tokens >= self.config.l3_threshold {
|
||||
return Some(3);
|
||||
}
|
||||
None
|
||||
seam_level_for_active_input(&self.config, active_input_tokens, highest_existing_level)
|
||||
}
|
||||
|
||||
/// Check whether the hard cycle boundary is crossed.
|
||||
@@ -174,8 +159,8 @@ impl SeamManager {
|
||||
/// Kept as the canonical boundary definition for future wiring.
|
||||
#[must_use]
|
||||
#[allow(dead_code)]
|
||||
pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
|
||||
self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
|
||||
pub fn should_cycle(&self, active_input_tokens: usize) -> bool {
|
||||
self.config.enabled && active_input_tokens >= self.config.cycle_threshold
|
||||
}
|
||||
|
||||
/// Compute the verbatim window: the last N message indices that must
|
||||
@@ -577,6 +562,30 @@ impl SeamManager {
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn seam_level_for_active_input(
|
||||
config: &SeamConfig,
|
||||
active_input_tokens: usize,
|
||||
highest_existing_level: Option<u8>,
|
||||
) -> Option<u8> {
|
||||
if !config.enabled {
|
||||
return None;
|
||||
}
|
||||
let highest = highest_existing_level.unwrap_or(0);
|
||||
|
||||
// Each level fires at most once, and only in order.
|
||||
if highest < 1 && active_input_tokens >= config.l1_threshold {
|
||||
return Some(1);
|
||||
}
|
||||
if highest < 2 && active_input_tokens >= config.l2_threshold {
|
||||
return Some(2);
|
||||
}
|
||||
if highest < 3 && active_input_tokens >= config.l3_threshold {
|
||||
return Some(3);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Truncate a string to max_chars, respecting Unicode boundaries.
|
||||
fn truncate_chars(text: &str, max_chars: usize) -> String {
|
||||
if max_chars == 0 {
|
||||
@@ -598,15 +607,29 @@ mod tests {
|
||||
// Test the pure logic functions only.
|
||||
let config = SeamConfig::default();
|
||||
|
||||
// Test seam_level_for logic manually.
|
||||
// Below L1
|
||||
assert!(config.enabled && 100_000 < config.l1_threshold);
|
||||
// At L1
|
||||
assert!(192_000 >= config.l1_threshold);
|
||||
// At L2
|
||||
assert!(384_000 >= config.l2_threshold);
|
||||
// At L3
|
||||
assert!(576_000 >= config.l3_threshold);
|
||||
assert_eq!(seam_level_for_active_input(&config, 100_000, None), None);
|
||||
assert_eq!(seam_level_for_active_input(&config, 192_000, None), Some(1));
|
||||
assert_eq!(
|
||||
seam_level_for_active_input(&config, 384_000, Some(1)),
|
||||
Some(2)
|
||||
);
|
||||
assert_eq!(
|
||||
seam_level_for_active_input(&config, 576_000, Some(2)),
|
||||
Some(3)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seam_trigger_uses_active_request_size_not_lifetime_usage() {
|
||||
let config = SeamConfig::default();
|
||||
let lifetime_prompt_usage = 900_000usize;
|
||||
let active_request_input = 120_000usize;
|
||||
|
||||
assert!(lifetime_prompt_usage >= config.l3_threshold);
|
||||
assert_eq!(
|
||||
seam_level_for_active_input(&config, active_request_input, None),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1287,6 +1287,10 @@ use crate::tools::spec::{
|
||||
use async_trait::async_trait;
|
||||
use serde_json::json;
|
||||
|
||||
const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bounded commands. \
|
||||
The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \
|
||||
background: true, then poll with task_shell_wait or exec_shell_wait.";
|
||||
|
||||
async fn execute_foreground_via_background(
|
||||
context: &ToolContext,
|
||||
command: &str,
|
||||
@@ -1372,7 +1376,7 @@ impl ToolSpec for ExecShellTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Execute a shell command in the workspace directory. Returns stdout, stderr, and exit code."
|
||||
"Execute a shell command in the workspace directory. Foreground mode is for bounded commands; use background=true or task_shell_start for long-running work, then poll/wait."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> serde_json::Value {
|
||||
@@ -1389,7 +1393,7 @@ impl ToolSpec for ExecShellTool {
|
||||
},
|
||||
"background": {
|
||||
"type": "boolean",
|
||||
"description": "Run in background and return task_id (default: false)"
|
||||
"description": "Run in background and return task_id (default: false). Prefer true for commands that may run for minutes; poll with exec_shell_wait or task_shell_wait."
|
||||
},
|
||||
"interactive": {
|
||||
"type": "boolean",
|
||||
@@ -1599,7 +1603,7 @@ impl ToolSpec for ExecShellTool {
|
||||
)
|
||||
} else if result.status == ShellStatus::TimedOut {
|
||||
format!(
|
||||
"Command timed out after {timeout_ms}ms; process killed.\n\nSTDOUT:\n{}\n\nSTDERR:\n{}",
|
||||
"Command timed out after {timeout_ms}ms; process killed.\n\n{FOREGROUND_TIMEOUT_RECOVERY_HINT}\n\nSTDOUT:\n{}\n\nSTDERR:\n{}",
|
||||
result.stdout, result.stderr
|
||||
)
|
||||
} else {
|
||||
@@ -1609,44 +1613,60 @@ impl ToolSpec for ExecShellTool {
|
||||
)
|
||||
};
|
||||
|
||||
let mut metadata = json!({
|
||||
"exit_code": result.exit_code,
|
||||
"status": format!("{:?}", result.status),
|
||||
"duration_ms": result.duration_ms,
|
||||
"sandboxed": result.sandboxed,
|
||||
"sandbox_type": result.sandbox_type,
|
||||
"sandbox_denied": result.sandbox_denied,
|
||||
"task_id": result.task_id,
|
||||
"stdout_len": result.stdout_len,
|
||||
"stderr_len": result.stderr_len,
|
||||
"stdout_truncated": result.stdout_truncated,
|
||||
"stderr_truncated": result.stderr_truncated,
|
||||
"stdout_omitted": result.stdout_omitted,
|
||||
"stderr_omitted": result.stderr_omitted,
|
||||
"summary": summary,
|
||||
"stdout_summary": stdout_summary,
|
||||
"stderr_summary": stderr_summary,
|
||||
"safety_level": format!("{:?}", safety.level),
|
||||
"interactive": interactive,
|
||||
"canceled": was_cancelled,
|
||||
"execpolicy": execpolicy_decision.as_ref().map(|decision| match decision {
|
||||
ExecPolicyDecision::Allow => json!({
|
||||
"decision": "allow",
|
||||
}),
|
||||
ExecPolicyDecision::Deny(reason) => json!({
|
||||
"decision": "deny",
|
||||
"reason": reason,
|
||||
}),
|
||||
ExecPolicyDecision::AskUser(reason) => json!({
|
||||
"decision": "ask_user",
|
||||
"reason": reason,
|
||||
}),
|
||||
}),
|
||||
});
|
||||
if result.status == ShellStatus::TimedOut && !background && !interactive {
|
||||
metadata["foreground_timeout_recovery"] = json!({
|
||||
"process_killed": true,
|
||||
"hint": FOREGROUND_TIMEOUT_RECOVERY_HINT,
|
||||
"recommended_tools": [
|
||||
"task_shell_start",
|
||||
"task_shell_wait",
|
||||
"exec_shell",
|
||||
"exec_shell_wait"
|
||||
],
|
||||
"exec_shell_background": true,
|
||||
"poll_with": ["task_shell_wait", "exec_shell_wait"]
|
||||
});
|
||||
}
|
||||
|
||||
Ok(ToolResult {
|
||||
content: output,
|
||||
success: result.status == ShellStatus::Completed
|
||||
|| result.status == ShellStatus::Running,
|
||||
metadata: Some(json!({
|
||||
"exit_code": result.exit_code,
|
||||
"status": format!("{:?}", result.status),
|
||||
"duration_ms": result.duration_ms,
|
||||
"sandboxed": result.sandboxed,
|
||||
"sandbox_type": result.sandbox_type,
|
||||
"sandbox_denied": result.sandbox_denied,
|
||||
"task_id": result.task_id,
|
||||
"stdout_len": result.stdout_len,
|
||||
"stderr_len": result.stderr_len,
|
||||
"stdout_truncated": result.stdout_truncated,
|
||||
"stderr_truncated": result.stderr_truncated,
|
||||
"stdout_omitted": result.stdout_omitted,
|
||||
"stderr_omitted": result.stderr_omitted,
|
||||
"summary": summary,
|
||||
"stdout_summary": stdout_summary,
|
||||
"stderr_summary": stderr_summary,
|
||||
"safety_level": format!("{:?}", safety.level),
|
||||
"interactive": interactive,
|
||||
"canceled": was_cancelled,
|
||||
"execpolicy": execpolicy_decision.as_ref().map(|decision| match decision {
|
||||
ExecPolicyDecision::Allow => json!({
|
||||
"decision": "allow",
|
||||
}),
|
||||
ExecPolicyDecision::Deny(reason) => json!({
|
||||
"decision": "deny",
|
||||
"reason": reason,
|
||||
}),
|
||||
ExecPolicyDecision::AskUser(reason) => json!({
|
||||
"decision": "ask_user",
|
||||
"reason": reason,
|
||||
}),
|
||||
}),
|
||||
})),
|
||||
metadata: Some(metadata),
|
||||
})
|
||||
}
|
||||
Err(e) => Ok(ToolResult::error(format!("Shell execution failed: {e}"))),
|
||||
|
||||
@@ -263,6 +263,47 @@ async fn test_exec_shell_metadata_includes_summaries() {
|
||||
assert!(meta.get("stdout_truncated").is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exec_shell_foreground_timeout_guides_background_rerun() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = ToolContext::new(tmp.path());
|
||||
let tool = ExecShellTool;
|
||||
|
||||
let result = tool
|
||||
.execute(
|
||||
json!({
|
||||
"command": sleep_command(10),
|
||||
"timeout_ms": 1000
|
||||
}),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.expect("execute");
|
||||
|
||||
assert!(!result.success);
|
||||
assert!(result.content.contains("task_shell_start"));
|
||||
assert!(result.content.contains("background: true"));
|
||||
assert!(result.content.contains("process killed"));
|
||||
let meta = result.metadata.expect("metadata");
|
||||
assert_eq!(meta.get("status").and_then(Value::as_str), Some("TimedOut"));
|
||||
let recovery = meta
|
||||
.get("foreground_timeout_recovery")
|
||||
.expect("timeout recovery metadata");
|
||||
assert_eq!(
|
||||
recovery
|
||||
.get("exec_shell_background")
|
||||
.and_then(Value::as_bool),
|
||||
Some(true)
|
||||
);
|
||||
assert!(
|
||||
recovery
|
||||
.get("hint")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default()
|
||||
.contains("exec_shell_wait")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exec_shell_foreground_cancel_kills_process() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
|
||||
@@ -612,13 +612,15 @@ pub struct App {
|
||||
pub runtime_turn_id: Option<String>,
|
||||
/// Current runtime turn status (if known).
|
||||
pub runtime_turn_status: Option<String>,
|
||||
/// Last prompt token usage
|
||||
/// Provider-reported input tokens from the last completed turn. This is
|
||||
/// telemetry/cost data and may sum repeated stable prefixes across tool
|
||||
/// rounds; active context pressure is estimated from `api_messages`.
|
||||
pub last_prompt_tokens: Option<u32>,
|
||||
/// Last completion token usage
|
||||
/// Provider-reported output tokens from the last completed turn.
|
||||
pub last_completion_tokens: Option<u32>,
|
||||
/// DeepSeek context-cache hit tokens from the last API call.
|
||||
/// DeepSeek context-cache hit tokens from the last API call. Telemetry only.
|
||||
pub last_prompt_cache_hit_tokens: Option<u32>,
|
||||
/// DeepSeek context-cache miss tokens from the last API call.
|
||||
/// DeepSeek context-cache miss tokens from the last API call. Telemetry only.
|
||||
pub last_prompt_cache_miss_tokens: Option<u32>,
|
||||
/// Approximate input tokens spent re-sending prior `reasoning_content` on
|
||||
/// the last thinking-mode tool-calling turn (V4 §5.1.1 "Interleaved
|
||||
|
||||
@@ -4714,7 +4714,7 @@ fn footer_context_percent_spans(app: &App) -> Vec<Span<'static>> {
|
||||
palette::TEXT_MUTED
|
||||
};
|
||||
vec![Span::styled(
|
||||
format!("ctx {percent:.0}%"),
|
||||
format!("active ctx {percent:.0}%"),
|
||||
Style::default().fg(color),
|
||||
)]
|
||||
}
|
||||
@@ -4802,7 +4802,7 @@ fn footer_cache_spans(app: &App) -> Vec<Span<'static>> {
|
||||
|
||||
let percent = (f64::from(hit_tokens) / f64::from(total) * 100.0).clamp(0.0, 100.0);
|
||||
vec![Span::styled(
|
||||
format!("cache {:.0}%", percent),
|
||||
format!("cache hit {:.0}%", percent),
|
||||
Style::default().fg(palette::TEXT_MUTED),
|
||||
)]
|
||||
}
|
||||
|
||||
@@ -575,7 +575,7 @@ fn footer_auxiliary_spans_show_cache_when_compact() {
|
||||
app.last_prompt_cache_miss_tokens = Some(12_000);
|
||||
app.session_cost = 12.34;
|
||||
|
||||
let compact = spans_text(&footer_auxiliary_spans(&app, 12));
|
||||
let compact = spans_text(&footer_auxiliary_spans(&app, 14));
|
||||
assert!(compact.contains("cache"));
|
||||
assert!(!compact.contains('$'));
|
||||
}
|
||||
@@ -589,7 +589,7 @@ fn footer_auxiliary_spans_show_cache_and_cost_when_roomy() {
|
||||
app.session_cost = 12.34;
|
||||
|
||||
let roomy = spans_text(&footer_auxiliary_spans(&app, 32));
|
||||
assert!(roomy.contains("cache 75%"));
|
||||
assert!(roomy.contains("cache hit 75%"));
|
||||
assert!(roomy.contains("$12.34"));
|
||||
assert!(
|
||||
!roomy.contains("ctx"),
|
||||
|
||||
@@ -31,7 +31,8 @@ pub struct HeaderData<'a> {
|
||||
pub context_window: Option<u32>,
|
||||
/// Accumulated session cost in USD.
|
||||
pub session_cost: f64,
|
||||
/// Input tokens from the most recent API call (current context utilization).
|
||||
/// Active context input tokens used for context utilization. Callers should
|
||||
/// pass a sanitized live-context estimate, not cumulative API usage.
|
||||
pub last_prompt_tokens: Option<u32>,
|
||||
/// Short label for the current reasoning-effort tier (e.g. "max", "high",
|
||||
/// "off"). Rendered as a chip when space allows.
|
||||
@@ -90,12 +91,12 @@ impl<'a> HeaderData<'a> {
|
||||
total_tokens: u32,
|
||||
context_window: Option<u32>,
|
||||
session_cost: f64,
|
||||
last_prompt_tokens: Option<u32>,
|
||||
active_context_input_tokens: Option<u32>,
|
||||
) -> Self {
|
||||
self.total_tokens = total_tokens;
|
||||
self.context_window = context_window;
|
||||
self.session_cost = session_cost;
|
||||
self.last_prompt_tokens = last_prompt_tokens;
|
||||
self.last_prompt_tokens = active_context_input_tokens;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
+23
-1
@@ -157,6 +157,26 @@ Readability semantics:
|
||||
`crowded`, `refreshing`, `verifying`, and `resetting`; these are derived from
|
||||
capacity and compaction events without exposing internal formulas in normal UI.
|
||||
|
||||
### Token Quantities and Drivers
|
||||
|
||||
DeepSeek V4 prefix caching makes token labels matter. These quantities are kept
|
||||
separate:
|
||||
|
||||
| Quantity | Meaning | Allowed to drive |
|
||||
|---|---|---|
|
||||
| Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. |
|
||||
| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. |
|
||||
| Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. |
|
||||
| Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. |
|
||||
| Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. |
|
||||
| Cost estimate | Approximate spend from provider usage and configured DeepSeek rates. | Display only. |
|
||||
|
||||
For the default V4 path, hard cycles fire when active input reaches the smaller
|
||||
of the configured cycle threshold (`768000`) and the model window minus reserved
|
||||
response headroom. Replacement compaction remains opt-in (`auto_compact = false`
|
||||
by default), the Flash seam manager remains opt-in (`[context].enabled = false`),
|
||||
and the capacity controller remains disabled unless configured.
|
||||
|
||||
### Command Migration Notes
|
||||
|
||||
If you are upgrading from older releases:
|
||||
@@ -196,7 +216,9 @@ If you are upgrading from older releases:
|
||||
- `[snapshots].enabled` (bool, default `true`)
|
||||
- `[snapshots].max_age_days` (int, default `7`)
|
||||
- snapshots live under `~/.deepseek/snapshots/<project_hash>/<worktree_hash>/.git` and never use the workspace's own `.git` directory
|
||||
- `context.*` (optional): append-only Flash seam manager, currently opt-in:
|
||||
- `context.*` (optional): append-only Flash seam manager, currently opt-in.
|
||||
Thresholds use the active request input estimate, not lifetime summed API
|
||||
usage:
|
||||
- `[context].enabled` (bool, default `false`)
|
||||
- `[context].verbatim_window_turns` (int, default `16`)
|
||||
- `[context].l1_threshold` (int, default `192000`)
|
||||
|
||||
+10
-4
@@ -15,7 +15,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
|
||||
for the same backing operation are a model trap — the LLM will alternate
|
||||
between them and the cache hit rate suffers.
|
||||
|
||||
## Final surface (v0.7.4)
|
||||
## Current surface (v0.7.5)
|
||||
|
||||
### File operations
|
||||
|
||||
@@ -40,19 +40,25 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
|
||||
|
||||
| Tool | Niche |
|
||||
|---|---|
|
||||
| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands. |
|
||||
| `exec_shell` | Run a shell command. Foreground runs are cancellable, but use them only for bounded commands; timeout kills the process and returns a background-rerun hint. |
|
||||
| `exec_shell_wait` | Poll a background task for incremental output. |
|
||||
| `exec_shell_interact` | Send stdin to a running background task and read incremental output. |
|
||||
| `task_shell_start` | Start a long-running command in the background and return immediately. Preferred over foreground shell for diagnostics, tests, searches, and servers that may run for minutes. |
|
||||
| `task_shell_wait` | Poll a background command. If `gate` is supplied after completion, record structured gate evidence on the active durable task. |
|
||||
|
||||
When a foreground shell command times out, the process is not continued
|
||||
silently. The tool result tells the model to rerun long work with
|
||||
`task_shell_start` or `exec_shell` with `background = true`, then poll with
|
||||
`task_shell_wait` or `exec_shell_wait`.
|
||||
|
||||
Interactive shell jobs are also visible through `/jobs`. The TUI job center is
|
||||
fed by the same shell manager as `exec_shell`/`task_shell_start`, and shows the
|
||||
command, cwd, elapsed time, status, output tail, process-local shell id, and
|
||||
linked durable task id when available. `/jobs show`, `/jobs poll`, `/jobs wait`,
|
||||
`/jobs stdin`, and `/jobs cancel` provide inspect, polling, stdin, and cancel
|
||||
controls for live jobs. Jobs are process-local; after restart, detached entries
|
||||
are marked stale rather than presented as live processes.
|
||||
controls for live jobs. Jobs are process-local; after restart, live process
|
||||
state is not reattached, and any remembered detached entries must be marked
|
||||
stale rather than presented as live processes.
|
||||
|
||||
### MCP manager and palette discovery
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "deepseek-tui",
|
||||
"version": "0.7.4",
|
||||
"deepseekBinaryVersion": "0.7.4",
|
||||
"version": "0.7.5",
|
||||
"deepseekBinaryVersion": "0.7.5",
|
||||
"description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
|
||||
"author": "Hmbown",
|
||||
"license": "MIT",
|
||||
|
||||
Reference in New Issue
Block a user