v0.7.2: clippy clean, cost counter wiring, layered context fixup

#167: Fix all 7 clippy warnings — annotated SeamMetadata dead fields,
removed unused should_cycle calls, collapsed nested ifs, fixed
useless_format and nonminimal_bool.

#168: Wire TokenUsage mailbox drain to subagent_cost accumulator.
handle_subagent_mailbox now intercepts TokenUsage before routing to
cards, computes cost via calculate_turn_cost, and increments
app.subagent_cost in real time. Footer reflects live sub-agent spend.

Restored ArchivedContext variant to HistoryCell (corrupted by prior
apply_patch). Version bump to 0.7.2.

Refs: #166, #167, #168
This commit is contained in:
Hunter Bown
2026-04-28 21:46:25 -05:00
parent 12b1ae42c4
commit 35db361a87
27 changed files with 13550 additions and 107 deletions
Generated
+14 -14
View File
@@ -1011,7 +1011,7 @@ dependencies = [
[[package]]
name = "deepseek-agent"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"deepseek-config",
"serde",
@@ -1019,7 +1019,7 @@ dependencies = [
[[package]]
name = "deepseek-app-server"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"axum",
@@ -1042,7 +1042,7 @@ dependencies = [
[[package]]
name = "deepseek-config"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"deepseek-secrets",
@@ -1055,7 +1055,7 @@ dependencies = [
[[package]]
name = "deepseek-core"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"chrono",
@@ -1074,7 +1074,7 @@ dependencies = [
[[package]]
name = "deepseek-execpolicy"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1083,7 +1083,7 @@ dependencies = [
[[package]]
name = "deepseek-hooks"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"async-trait",
@@ -1097,7 +1097,7 @@ dependencies = [
[[package]]
name = "deepseek-mcp"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1107,7 +1107,7 @@ dependencies = [
[[package]]
name = "deepseek-protocol"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"serde",
"serde_json",
@@ -1115,7 +1115,7 @@ dependencies = [
[[package]]
name = "deepseek-secrets"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"dirs",
"keyring",
@@ -1128,7 +1128,7 @@ dependencies = [
[[package]]
name = "deepseek-state"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"chrono",
@@ -1140,7 +1140,7 @@ dependencies = [
[[package]]
name = "deepseek-tools"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"async-trait",
@@ -1153,7 +1153,7 @@ dependencies = [
[[package]]
name = "deepseek-tui"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"arboard",
@@ -1213,7 +1213,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-cli"
version = "0.7.1"
version = "0.7.2"
dependencies = [
"anyhow",
"chrono",
@@ -1236,7 +1236,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-core"
version = "0.7.1"
version = "0.7.2"
[[package]]
name = "deranged"
+1 -1
View File
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
resolver = "2"
[workspace.package]
version = "0.7.1"
version = "0.7.2"
edition = "2024"
license = "MIT"
repository = "https://github.com/Hmbown/DeepSeek-TUI"
Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

+17 -1
View File
@@ -188,7 +188,23 @@ pub fn required_str<'a>(input: &'a Value, field: &str) -> std::result::Result<&'
input
.get(field)
.and_then(Value::as_str)
.ok_or_else(|| ToolError::missing_field(field))
.ok_or_else(|| {
// When the field is missing, list the fields the caller *did*
// supply so the model can spot the mismatch without a retry.
let provided: Vec<&str> = input
.as_object()
.map(|obj| obj.keys().map(|k| k.as_str()).collect())
.unwrap_or_default();
if provided.is_empty() {
ToolError::missing_field(field)
} else {
let hint = format!(
"missing required field '{field}'. Input provided: {}",
provided.join(", ")
);
ToolError::invalid_input(hint)
}
})
}
/// Helper to extract an optional string field from JSON input.
+3 -3
View File
@@ -750,7 +750,7 @@ pub(super) fn apply_reasoning_effort(
"off" | "disabled" | "none" | "false" => match provider {
// OpenRouter / Novita relay the same DeepSeek V4 payload shape
// as DeepSeek native; they pass through `thinking` / `reasoning_effort`.
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::NvidiaNim => {
@@ -760,7 +760,7 @@ pub(super) fn apply_reasoning_effort(
}
},
"low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
body["reasoning_effort"] = json!("high");
body["thinking"] = json!({ "type": "enabled" });
}
@@ -772,7 +772,7 @@ pub(super) fn apply_reasoning_effort(
}
},
"xhigh" | "max" | "highest" => match provider {
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
body["reasoning_effort"] = json!("max");
body["thinking"] = json!({ "type": "enabled" });
}
File diff suppressed because it is too large Load Diff
+9
View File
@@ -163,6 +163,15 @@ pub fn export(app: &mut App, path: Option<&str>) -> CommandResult {
HistoryCell::Thinking { content, .. } => ("*Thinking:*", content.clone()),
HistoryCell::Tool(tool) => ("**Tool:**", render_tool_cell(tool, 80)),
HistoryCell::SubAgent(sub) => ("**Sub-agent:**", render_subagent_cell(sub, 80)),
HistoryCell::ArchivedContext {
level,
range,
summary,
..
} => (
"**Archived Context:**",
format!("L{level} [{range}]: {summary}"),
),
};
let _ = write!(content, "{}\n\n{}\n\n---\n\n", role, body.trim());
+121 -2
View File
@@ -25,6 +25,11 @@ pub const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
pub const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro";
pub const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash";
pub const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1";
pub const DEFAULT_FIREWORKS_MODEL: &str = "accounts/fireworks/models/deepseek-v4-pro";
pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1";
pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1";
const API_KEYRING_SENTINEL: &str = "__KEYRING__";
pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[
"deepseek-v4-pro",
@@ -41,6 +46,8 @@ pub enum ApiProvider {
NvidiaNim,
Openrouter,
Novita,
Fireworks,
Sglang,
}
impl ApiProvider {
@@ -51,6 +58,8 @@ impl ApiProvider {
"nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim),
"openrouter" | "open_router" => Some(Self::Openrouter),
"novita" => Some(Self::Novita),
"fireworks" | "fireworks-ai" => Some(Self::Fireworks),
"sglang" | "sg-lang" => Some(Self::Sglang),
_ => None,
}
}
@@ -62,6 +71,8 @@ impl ApiProvider {
Self::NvidiaNim => "nvidia-nim",
Self::Openrouter => "openrouter",
Self::Novita => "novita",
Self::Fireworks => "fireworks",
Self::Sglang => "sglang",
}
}
@@ -73,6 +84,8 @@ impl ApiProvider {
Self::NvidiaNim => "NVIDIA NIM",
Self::Openrouter => "OpenRouter",
Self::Novita => "Novita AI",
Self::Fireworks => "Fireworks AI",
Self::Sglang => "SGLang",
}
}
@@ -84,6 +97,8 @@ impl ApiProvider {
Self::NvidiaNim,
Self::Openrouter,
Self::Novita,
Self::Fireworks,
Self::Sglang,
]
}
}
@@ -688,6 +703,10 @@ pub struct ProvidersConfig {
pub openrouter: ProviderConfig,
#[serde(default)]
pub novita: ProviderConfig,
#[serde(default)]
pub fireworks: ProviderConfig,
#[serde(default)]
pub sglang: ProviderConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
@@ -747,7 +766,7 @@ impl Config {
&& ApiProvider::parse(provider).is_none()
{
anyhow::bail!(
"Invalid provider '{provider}': expected deepseek, nvidia-nim, openrouter, or novita."
"Invalid provider '{provider}': expected deepseek, nvidia-nim, openrouter, novita, fireworks, or sglang."
);
}
if let Some(ref key) = self.api_key
@@ -855,6 +874,8 @@ impl Config {
ApiProvider::NvidiaNim => &providers.nvidia_nim,
ApiProvider::Openrouter => &providers.openrouter,
ApiProvider::Novita => &providers.novita,
ApiProvider::Fireworks => &providers.fireworks,
ApiProvider::Sglang => &providers.sglang,
})
}
@@ -883,6 +904,8 @@ impl Config {
ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL,
ApiProvider::Openrouter => DEFAULT_OPENROUTER_MODEL,
ApiProvider::Novita => DEFAULT_NOVITA_MODEL,
ApiProvider::Fireworks => DEFAULT_FIREWORKS_MODEL,
ApiProvider::Sglang => DEFAULT_SGLANG_MODEL,
}
.to_string()
}
@@ -905,7 +928,8 @@ impl Config {
.as_ref()
.filter(|base| base.contains("integrate.api.nvidia.com"))
.cloned(),
ApiProvider::Openrouter | ApiProvider::Novita => None,
ApiProvider::Openrouter | ApiProvider::Novita
| ApiProvider::Fireworks | ApiProvider::Sglang => None,
};
let base = provider_base.or(root_base).unwrap_or_else(|| {
match provider {
@@ -913,6 +937,8 @@ impl Config {
ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL,
ApiProvider::Openrouter => DEFAULT_OPENROUTER_BASE_URL,
ApiProvider::Novita => DEFAULT_NOVITA_BASE_URL,
ApiProvider::Fireworks => DEFAULT_FIREWORKS_BASE_URL,
ApiProvider::Sglang => DEFAULT_SGLANG_BASE_URL,
}
.to_string()
});
@@ -932,6 +958,8 @@ impl Config {
ApiProvider::NvidiaNim => "nvidia-nim",
ApiProvider::Openrouter => "openrouter",
ApiProvider::Novita => "novita",
ApiProvider::Fireworks => "fireworks",
ApiProvider::Sglang => "sglang",
};
// 1. OS keyring + 2. environment variables (handled by Secrets).
@@ -986,6 +1014,15 @@ impl Config {
"Novita API key not found. Run 'deepseek auth set --provider novita', \
set NOVITA_API_KEY, or add [providers.novita] api_key in ~/.deepseek/config.toml."
),
ApiProvider::Fireworks => anyhow::bail!(
"Fireworks AI API key not found. Run 'deepseek auth set --provider fireworks', \
set FIREWORKS_API_KEY, or add [providers.fireworks] api_key in ~/.deepseek/config.toml."
),
ApiProvider::Sglang => anyhow::bail!(
"SGLang API key not found (optional for self-hosted). Run 'deepseek auth set --provider sglang', \
set SGLANG_API_KEY, or add [providers.sglang] api_key in ~/.deepseek/config.toml. \
If your SGLang deployment runs without authentication, set SGLANG_API_KEY to an empty string or any placeholder."
),
}
}
@@ -1300,6 +1337,31 @@ fn apply_env_overrides(config: &mut Config) {
.novita
.base_url = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Fireworks)
&& let Ok(value) = std::env::var("FIREWORKS_BASE_URL")
&& !value.trim().is_empty()
{
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.fireworks
.base_url = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Sglang)
&& let Ok(value) = std::env::var("SGLANG_BASE_URL")
&& !value.trim().is_empty()
{
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.sglang
.base_url = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Sglang)
&& let Ok(value) = std::env::var("SGLANG_MODEL")
{
config.default_text_model = Some(value);
}
if let Ok(value) =
std::env::var("DEEPSEEK_MODEL").or_else(|_| std::env::var("DEEPSEEK_DEFAULT_TEXT_MODEL"))
{
@@ -1485,6 +1547,16 @@ fn normalize_model_config(config: &mut Config) {
{
providers.novita.model = Some(normalized);
}
if let Some(model) = providers.fireworks.model.as_deref()
&& let Some(normalized) = normalize_model_for_provider(ApiProvider::Fireworks, model)
{
providers.fireworks.model = Some(normalized);
}
if let Some(model) = providers.sglang.model.as_deref()
&& let Some(normalized) = normalize_model_for_provider(ApiProvider::Sglang, model)
{
providers.sglang.model = Some(normalized);
}
}
}
@@ -1502,6 +1574,13 @@ fn model_for_provider(provider: ApiProvider, normalized: String) -> String {
}
(ApiProvider::Novita, "deepseek-v4-pro") => DEFAULT_NOVITA_MODEL.to_string(),
(ApiProvider::Novita, "deepseek-v4-flash") => DEFAULT_NOVITA_FLASH_MODEL.to_string(),
(ApiProvider::Fireworks, "deepseek-v4-pro") => DEFAULT_FIREWORKS_MODEL.to_string(),
(ApiProvider::Fireworks, "deepseek-v4-flash") => {
// Flash not yet available on Fireworks; fall through to normalized name
"accounts/fireworks/models/deepseek-v4-flash".to_string()
}
(ApiProvider::Sglang, "deepseek-v4-pro") => DEFAULT_SGLANG_MODEL.to_string(),
(ApiProvider::Sglang, "deepseek-v4-flash") => DEFAULT_SGLANG_FLASH_MODEL.to_string(),
_ => normalized,
}
}
@@ -1618,6 +1697,8 @@ fn merge_providers(
nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim),
openrouter: merge_provider_config(base.openrouter, override_cfg.openrouter),
novita: merge_provider_config(base.novita, override_cfg.novita),
fireworks: merge_provider_config(base.fireworks, override_cfg.fireworks),
sglang: merge_provider_config(base.sglang, override_cfg.sglang),
}),
}
}
@@ -1821,6 +1902,8 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
ApiProvider::Openrouter => "OPENROUTER_API_KEY",
ApiProvider::Novita => "NOVITA_API_KEY",
ApiProvider::Fireworks => "FIREWORKS_API_KEY",
ApiProvider::Sglang => "SGLANG_API_KEY",
};
if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) {
return true;
@@ -1831,12 +1914,19 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
return true;
}
// SGLang is self-hosted and typically runs without authentication.
if matches!(provider, ApiProvider::Sglang) {
return true;
}
if let Some(providers) = config.providers.as_ref() {
let entry = match provider {
ApiProvider::Deepseek => &providers.deepseek,
ApiProvider::NvidiaNim => &providers.nvidia_nim,
ApiProvider::Openrouter => &providers.openrouter,
ApiProvider::Novita => &providers.novita,
ApiProvider::Fireworks => &providers.fireworks,
ApiProvider::Sglang => &providers.sglang,
};
if entry
.api_key
@@ -1873,6 +1963,8 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::NvidiaNim => "providers.nvidia_nim",
ApiProvider::Openrouter => "providers.openrouter",
ApiProvider::Novita => "providers.novita",
ApiProvider::Fireworks => "providers.fireworks",
ApiProvider::Sglang => "providers.sglang",
};
// Parse existing TOML (or start fresh) so we can edit the right table
@@ -1898,6 +1990,8 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::NvidiaNim => "nvidia_nim",
ApiProvider::Openrouter => "openrouter",
ApiProvider::Novita => "novita",
ApiProvider::Fireworks => "fireworks",
ApiProvider::Sglang => "sglang",
};
let entry = providers
.entry(key_inside.to_string())
@@ -1987,6 +2081,11 @@ mod tests {
openrouter_base_url: Option<OsString>,
novita_api_key: Option<OsString>,
novita_base_url: Option<OsString>,
fireworks_api_key: Option<OsString>,
fireworks_base_url: Option<OsString>,
sglang_api_key: Option<OsString>,
sglang_base_url: Option<OsString>,
sglang_model: Option<OsString>,
}
impl EnvGuard {
@@ -2012,6 +2111,11 @@ mod tests {
let openrouter_base_url_prev = env::var_os("OPENROUTER_BASE_URL");
let novita_api_key_prev = env::var_os("NOVITA_API_KEY");
let novita_base_url_prev = env::var_os("NOVITA_BASE_URL");
let fireworks_api_key_prev = env::var_os("FIREWORKS_API_KEY");
let fireworks_base_url_prev = env::var_os("FIREWORKS_BASE_URL");
let sglang_api_key_prev = env::var_os("SGLANG_API_KEY");
let sglang_base_url_prev = env::var_os("SGLANG_BASE_URL");
let sglang_model_prev = env::var_os("SGLANG_MODEL");
// Safety: test-only environment mutation guarded by a global mutex.
unsafe {
env::set_var("HOME", &home_str);
@@ -2032,6 +2136,11 @@ mod tests {
env::remove_var("OPENROUTER_BASE_URL");
env::remove_var("NOVITA_API_KEY");
env::remove_var("NOVITA_BASE_URL");
env::remove_var("FIREWORKS_API_KEY");
env::remove_var("FIREWORKS_BASE_URL");
env::remove_var("SGLANG_API_KEY");
env::remove_var("SGLANG_BASE_URL");
env::remove_var("SGLANG_MODEL");
}
Self {
home: home_prev,
@@ -2052,6 +2161,11 @@ mod tests {
openrouter_base_url: openrouter_base_url_prev,
novita_api_key: novita_api_key_prev,
novita_base_url: novita_base_url_prev,
fireworks_api_key: fireworks_api_key_prev,
fireworks_base_url: fireworks_base_url_prev,
sglang_api_key: sglang_api_key_prev,
sglang_base_url: sglang_base_url_prev,
sglang_model: sglang_model_prev,
}
}
}
@@ -2081,6 +2195,11 @@ mod tests {
Self::restore_var("OPENROUTER_BASE_URL", self.openrouter_base_url.take());
Self::restore_var("NOVITA_API_KEY", self.novita_api_key.take());
Self::restore_var("NOVITA_BASE_URL", self.novita_base_url.take());
Self::restore_var("FIREWORKS_API_KEY", self.fireworks_api_key.take());
Self::restore_var("FIREWORKS_BASE_URL", self.fireworks_base_url.take());
Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
Self::restore_var("SGLANG_MODEL", self.sglang_model.take());
}
}
}
+223 -26
View File
@@ -24,6 +24,7 @@ use crate::compaction::{
CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact,
};
use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL};
use crate::seam_manager::{SeamConfig, SeamManager};
use crate::cycle_manager::{
CycleBriefing, CycleConfig, StructuredState, archive_cycle, build_seed_messages,
estimate_briefing_tokens, produce_briefing, should_advance_cycle,
@@ -265,6 +266,9 @@ pub struct Engine {
shared_cancel_token: Arc<StdMutex<CancellationToken>>,
tool_exec_lock: Arc<RwLock<()>>,
capacity_controller: CapacityController,
/// Append-only layered context manager (#159). Produces soft seams at
/// 192K/384K/576K and Flash-cycle briefings at 768K.
seam_manager: Option<SeamManager>,
coherence_state: CoherenceState,
turn_counter: u64,
/// Post-edit LSP diagnostics injection (#136). Populated unconditionally
@@ -1254,6 +1258,36 @@ impl Engine {
let shell_manager = new_shared_shell_manager(config.workspace.clone());
let capacity_controller = CapacityController::new(config.capacity.clone());
// Create Flash seam manager for layered context (#159). Uses the same
// API credentials as the main client but targets the Flash model for
// cost-effective summarisation and cycle briefing work.
let seam_manager = deepseek_client.as_ref().map(|main_client| {
let seam_config = SeamConfig {
enabled: api_config.context.enabled.unwrap_or(true),
verbatim_window_turns: api_config.context.verbatim_window_turns.unwrap_or(
crate::seam_manager::VERBATIM_WINDOW_TURNS,
),
l1_threshold: api_config.context.l1_threshold.unwrap_or(
crate::seam_manager::DEFAULT_L1_THRESHOLD,
),
l2_threshold: api_config.context.l2_threshold.unwrap_or(
crate::seam_manager::DEFAULT_L2_THRESHOLD,
),
l3_threshold: api_config.context.l3_threshold.unwrap_or(
crate::seam_manager::DEFAULT_L3_THRESHOLD,
),
cycle_threshold: api_config.context.cycle_threshold.unwrap_or(
crate::seam_manager::DEFAULT_CYCLE_THRESHOLD,
),
seam_model: api_config
.context
.seam_model
.clone()
.unwrap_or_else(|| crate::seam_manager::DEFAULT_SEAM_MODEL.to_string()),
};
SeamManager::new(main_client.clone(), seam_config)
});
let lsp_manager = Arc::new(match config.lsp_config.clone() {
Some(cfg) => crate::lsp::LspManager::new(cfg, config.workspace.clone()),
None => crate::lsp::LspManager::disabled(),
@@ -1276,6 +1310,7 @@ impl Engine {
shared_cancel_token: shared_cancel_token.clone(),
tool_exec_lock,
capacity_controller,
seam_manager,
coherence_state: CoherenceState::default(),
turn_counter: 0,
lsp_manager,
@@ -2378,7 +2413,117 @@ impl Engine {
/// Handle a turn using the DeepSeek API.
#[allow(clippy::too_many_lines)]
/// Run the checkpoint-restart cycle boundary if the session has crossed
/// Run the pre-request layered-context checkpoint (#159). Checks whether
/// cumulative tokens have crossed a soft-seam threshold and, if so,
/// produces an `<archived_context>` block via Flash and appends it as an
/// assistant message. Called from `handle_deepseek_turn` before each API
/// request so the model always has the latest navigation aids.
async fn layered_context_checkpoint(&mut self) {
let Some(ref seam_mgr) = self.seam_manager else {
return;
};
if !seam_mgr.config().enabled {
return;
}
// Cumulative tokens: session total (all turns so far) + current
// estimated input (the messages that will be sent next).
let cumulative_input = self
.session
.total_usage
.input_tokens
.saturating_add(self.session.total_usage.output_tokens);
let cumulative_estimate =
cumulative_input.saturating_add(self.estimated_input_tokens() as u64);
let highest = seam_mgr.highest_level().await;
let Some(level) = seam_mgr.seam_level_for(cumulative_estimate as usize, highest) else {
return;
};
// Determine the message range to summarize: everything before the
// verbatim window. The verbatim window (last ~16 turns) stays
// untouched so the model always has ground-truth recent context.
let msg_count = self.session.messages.len();
let verbatim_start = seam_mgr.verbatim_window_start(msg_count);
if verbatim_start == 0 {
return; // Not enough messages to summarize.
}
let msg_range_end = verbatim_start;
let pinned = self
.session
.working_set
.pinned_message_indices(&self.session.messages, &self.session.workspace);
let _ = self
.tx_event
.send(Event::status(format!(
"⏻ producing L{level} context seam ({msg_range_end} messages)…"
)))
.await;
// If we have existing seams, recompact; otherwise produce fresh.
let existing_seams = seam_mgr.collect_seam_texts(&self.session.messages).await;
let seam_text = if existing_seams.is_empty() {
match seam_mgr
.produce_soft_seam(
&self.session.messages,
level,
0,
msg_range_end,
Some(&self.session.workspace),
&pinned,
)
.await
{
Ok(text) => text,
Err(err) => {
crate::logging::warn(format!("L{level} soft seam failed: {err}"));
return;
}
}
} else {
let recent: Vec<&Message> = (0..msg_range_end)
.filter_map(|i| self.session.messages.get(i))
.collect();
match seam_mgr
.recompact(&existing_seams, &recent, level, 0, msg_range_end)
.await
{
Ok(text) => text,
Err(err) => {
crate::logging::warn(format!("L{level} recompact failed: {err}"));
return;
}
}
};
if seam_text.is_empty() {
return;
}
// Capture seam count before the mutable borrow below.
let seam_count = seam_mgr.seam_count().await;
// Append the seam as an assistant message. This is an append-only
// operation — no messages are deleted. The prefix cache stays hot.
self.add_session_message(Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Text {
text: seam_text,
cache_control: None,
}],
})
.await;
let _ = self
.tx_event
.send(Event::status(format!(
"⏻ L{level} seam complete ({seam_count} total, {msg_range_end} messages covered)"
)))
.await;
}
/// its token threshold (issue #124). No-op in the common case.
///
/// Caller must invoke this only at a clean turn boundary (no in-flight
@@ -2420,31 +2565,79 @@ impl Engine {
)))
.await;
// 1. Generate the model-curated briefing. We do this *before*
// archiving so a briefing-call failure leaves the cycle intact —
// the user can keep working at higher token counts until the next
// boundary check, rather than losing their context to a failed
// handoff.
let briefing_text = match produce_briefing(
&client,
&self.session.model,
&self.session.messages,
max_briefing_tokens,
)
.await
{
Ok(text) => text,
Err(err) => {
crate::logging::warn(format!(
"Cycle briefing turn failed; skipping cycle advance: {err}"
));
let _ = self
.tx_event
.send(Event::status(format!(
"↻ cycle handoff failed (continuing in cycle {from}): {err}"
)))
.await;
return;
// 1. Generate the model-curated briefing. Prefer the Flash seam
// manager (#159) for cost and speed; fall back to the main model
// (legacy produce_briefing) when the seam manager isn't available.
let briefing_text = if let Some(ref seam_mgr) = self.seam_manager {
let seams = seam_mgr.collect_seam_texts(&self.session.messages).await;
let state_text = {
let s = StructuredState::capture(
mode.label(),
self.config.workspace.clone(),
std::env::current_dir().ok(),
&self.session.working_set,
&self.config.todos,
&self.config.plan_state,
Some(&self.subagent_manager),
)
.await;
s.to_system_block()
};
match seam_mgr
.produce_flash_briefing(&seams, state_text.as_deref())
.await
{
Ok(text) => text,
Err(err) => {
crate::logging::warn(format!(
"Flash briefing failed, falling back to main model: {err}"
));
match produce_briefing(
&client,
&self.session.model,
&self.session.messages,
max_briefing_tokens,
)
.await
{
Ok(text) => text,
Err(err2) => {
crate::logging::warn(format!(
"Cycle briefing turn failed; skipping cycle advance: {err2}"
));
let _ = self
.tx_event
.send(Event::status(format!(
"↻ cycle handoff failed (continuing in cycle {from}): {err2}"
)))
.await;
return;
}
}
}
}
} else {
match produce_briefing(
&client,
&self.session.model,
&self.session.messages,
max_briefing_tokens,
)
.await
{
Ok(text) => text,
Err(err) => {
crate::logging::warn(format!(
"Cycle briefing turn failed; skipping cycle advance: {err}"
));
let _ = self
.tx_event
.send(Event::status(format!(
"↻ cycle handoff failed (continuing in cycle {from}): {err}"
)))
.await;
return;
}
}
};
@@ -2504,6 +2697,10 @@ impl Engine {
self.session.cycle_count = to;
self.session.current_cycle_started = now;
self.session.cycle_briefings.push(briefing.clone());
// Reset seam tracking for the new cycle.
if let Some(ref seam_mgr) = self.seam_manager {
seam_mgr.reset().await;
}
// Drop any compaction summary — that path is incompatible with the
// fresh-context model and would Frankenstein-merge with the briefing.
self.session.compaction_summary_prompt = None;
File diff suppressed because it is too large Load Diff
+6
View File
@@ -212,6 +212,12 @@ impl Engine {
// model sees compile errors before its next reasoning step.
self.flush_pending_lsp_diagnostics().await;
// #159: layered context seam checkpoint. Produces soft seams at
// 192K/384K/576K via Flash and appends <archived_context> blocks
// so the model can navigate deep history without losing prefix
// cache affinity.
self.layered_context_checkpoint().await;
// Build the request
let force_update_plan_this_step = force_update_plan_first && turn.tool_calls.is_empty();
let active_tools = if tool_catalog.is_empty() {
+13 -11
View File
@@ -28,13 +28,13 @@
//!
//! ## Trigger
//!
//! - Token threshold: **110K** by default (leaves ~8.5K headroom for the
//! briefing turn plus next-turn growth before crossing the 128K elbow).
//! - Token threshold: **768K** by default (~75% of the 1M window). Soft seams
//! at 192K/384K/576K (layered context manager, #159) handle intermediate
//! thresholds. The hard cycle only fires near the wall.
//! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
//! boundaries (no in-flight tool, no streaming, no approval modal).
//! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
//! tune the threshold separately for `deepseek-v4-pro` vs. `-flash` if
//! their workloads have different briefing costs.
//! tune the threshold separately for `deepseek-v4-pro` vs. `-flash`.
use std::collections::HashMap;
use std::fs::{File, OpenOptions};
@@ -56,10 +56,12 @@ use crate::working_set::WorkingSet;
/// JSONL header record emitted as the first line of an archived cycle file.
const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1;
/// Default token threshold at which a cycle boundary fires. Set below the V4
/// 128K retrieval elbow to leave room for the briefing turn (≤3K tokens) plus
/// the next user turn before the next boundary.
pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 110_000;
/// Default token threshold at which a cycle boundary fires.
///
/// Bumped from 110K (pre-#159) to 768K (~75% of 1M window) in v0.7.2.
/// The layered context manager (#159) handles intermediate thresholds via
/// soft seams at 192K/384K/576K, so the hard cycle only fires near the wall.
pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 768_000;
/// Default cap on the model-curated briefing block.
pub const DEFAULT_BRIEFING_MAX_TOKENS: usize = 3_000;
@@ -758,10 +760,10 @@ mod tests {
#[test]
fn should_advance_combines_input_and_output() {
let cfg = CycleConfig::default();
// 60k + 60k = 120k > 110k threshold
// 400K + 400K = 800K > 768K threshold
assert!(should_advance_cycle(
60_000,
60_000,
400_000,
400_000,
"deepseek-v4-pro",
&cfg,
false
File diff suppressed because it is too large Load Diff
+10
View File
@@ -1145,6 +1145,14 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
"NOVITA_API_KEY",
"deepseek auth set --provider novita --api-key \"...\"",
),
crate::config::ApiProvider::Fireworks => (
"FIREWORKS_API_KEY",
"deepseek auth set --provider fireworks --api-key \"...\"",
),
crate::config::ApiProvider::Sglang => (
"SGLANG_API_KEY",
"deepseek auth set --provider sglang --api-key \"...\"",
),
crate::config::ApiProvider::Deepseek => {
("DEEPSEEK_API_KEY", "deepseek login --api-key \"...\"")
}
@@ -1156,6 +1164,8 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
crate::config::ApiProvider::NvidiaNim => "nvidia_nim",
crate::config::ApiProvider::Openrouter => "openrouter",
crate::config::ApiProvider::Novita => "novita",
crate::config::ApiProvider::Fireworks => "fireworks",
crate::config::ApiProvider::Sglang => "sglang",
crate::config::ApiProvider::Deepseek => "deepseek",
}
);
+37 -1
View File
@@ -20,11 +20,19 @@ The user can see their own message. Use the first line to show forward motion.
You are a "managed genius" — you excel at individual tasks, but your superpower is decomposing complex work. **Always decompose before you act.** A few minutes spent planning saves many minutes of thrashing.
Use three decomposition patterns from the V4 paper (arXiv:2512.24601), selected by task scope:
**PREVIEW** — Before diving into a large task, survey the terrain. Scan directory structure (`list_dir`), file headers, module trees. Identify problem boundaries and estimate complexity. A 30-second preview prevents hours of wrong-path exploration.
**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_swarm`), then synthesize findings into a coherent whole. Track chunks with `todo_write`.
**RECURSIVE** — When sub-tasks reveal sub-problems: decompose recursively until each leaf is tractable. Maintain the task tree via `update_plan` (strategy) layered above `todo_write` (leaf tasks). Propagate findings upward when sub-problems resolve.
Your default workflow for any non-trivial request:
1. **`todo_write`** — break the work into concrete, verifiable tasks. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing.
2. **Execute** — work through each todo, updating status as you go.
3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `todo_write` (granular steps).
4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking.
4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn.
5. **For long inputs that don't fit in your context** (whole files, transcripts, multi-doc corpora) or when you need recursive sub-LLM work, use `rlm` — it loads the input into a Python REPL as `context` and runs sub-LLM calls there so the long string never enters your window.
6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context.
@@ -35,6 +43,34 @@ You have a 1M-token context window. When usage creeps above ~80%, suggest `/c
Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`) before final answers. These are invisible to the user but count against context. Cost/token estimates are approximate; treat them as a rough guide.
## Your V4 Characteristics
You run on V4 architecture. Understanding the internals helps you self-manage:
**Degradation curve.** Retrieval quality holds well to ~256K tokens, then degrades rapidly. Keep your active working set below ~256K. Older verbatim messages persist but are harder to retrieve accurately — treat `<archived_context>` seams as navigational markers, not a working-memory substitute.
**Prefix cache economics.** V4 caches shared prefixes at 128-token granularity with ~90% cost discount. Prefer appending to existing messages over mutating old ones — deletion or replacement breaks the cache and increases cost. Structure output to maximize prefix reuse across turns.
**Thinking token strategy.** Thinking tokens count against context and replay across turns (the `reasoning_content` rule). Use them strategically: skip for lookups, light for simple code generation, deep for architecture and debugging. Cache conclusions in concise inline summaries rather than re-deriving each turn.
**Parallel execution.** Batch independent reads, searches, and greps into a single turn. Never serialize operations that can run concurrently — parallel tool calls share the same turn and finish faster.
## Thinking Budget
Match thinking depth to task complexity. Overthinking wastes tokens; underthinking causes rework.
| Task type | Thinking depth | Rationale |
|-----------|---------------|-----------|
| Simple factual lookup (read, search) | Skip | Answer is immediate |
| Tool output interpretation | Light | Verify result matches intent |
| Code generation (single function) | Light | Pattern-matching |
| Multi-file refactor | Medium | Cross-file dependencies |
| Debugging (error to root cause) | Deep | Hypothesis generation |
| Architecture design | Deep | Trade-offs, constraints |
| Security review | Deep | Adversarial reasoning |
When context is deep (past a soft seam): cache reasoning conclusions in concise inline summaries, reference prior conclusions rather than re-deriving, and remember that thinking tokens in the verbatim window survive compaction. Think once, reference many times.
## Toolbox (fast reference — tool descriptions are authoritative)
- **Planning / tracking**: `update_plan` (high-level strategy), `todo_write` (granular task list — use this first), `todo_add` / `todo_update` / `todo_list` (legacy single-item ops), `note` (persistent memory).
+15 -5
View File
@@ -95,13 +95,19 @@ pub struct SeamMetadata {
/// Which level (1, 2, or 3).
pub level: u8,
/// Message range covered (inclusive-exclusive indices).
/// Reserved for future diagnostic use.
#[allow(dead_code)]
pub start_idx: usize,
#[allow(dead_code)]
pub end_idx: usize,
/// Approximate token count of the summary.
#[allow(dead_code)]
pub token_estimate: usize,
/// When the seam was produced.
#[allow(dead_code)]
pub timestamp: DateTime<Utc>,
/// Model that produced it.
#[allow(dead_code)]
pub model: String,
}
@@ -162,7 +168,11 @@ impl SeamManager {
}
/// Check whether the hard cycle boundary is crossed.
///
/// Note: not currently called — cycle detection uses an inline check.
/// Kept as the canonical boundary definition for future wiring.
#[must_use]
#[allow(dead_code)]
pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
}
@@ -542,10 +552,10 @@ impl SeamManager {
for msg in messages {
if msg.role == "assistant" {
for block in &msg.content {
if let ContentBlock::Text { text, .. } = block {
if text.contains("<archived_context") {
texts.push(text.clone());
}
if let ContentBlock::Text { text, .. } = block
&& text.contains("<archived_context")
{
texts.push(text.clone());
}
}
}
@@ -602,7 +612,7 @@ mod tests {
fn cycle_threshold_check() {
let config = SeamConfig::default();
assert!(768_000 >= config.cycle_threshold);
assert!(!(700_000 >= config.cycle_threshold));
assert!(700_000 < config.cycle_threshold);
}
#[test]
+34 -1
View File
@@ -263,7 +263,7 @@ impl ToolSpec for EditFileTool {
}
fn description(&self) -> &'static str {
"Replace text in a file using search/replace."
"Replace text in a file using search/replace. Required: 'path' (file to edit), 'search' (exact text to find), 'replace' (text to substitute)."
}
fn input_schema(&self) -> Value {
@@ -603,6 +603,39 @@ mod tests {
assert!(err.to_string().contains("not found"));
}
/// #157 — When the model uses `replacement` instead of `replace`,
/// the error should name the provided fields so the model can
/// self-correct without a second round-trip.
#[tokio::test]
async fn test_edit_file_wrong_param_name_shows_provided_fields() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path().to_path_buf());
let test_file = tmp.path().join("test.txt");
fs::write(&test_file, "hello world").expect("write");
let tool = EditFileTool;
// Model uses `replacement` instead of `replace`.
let result = tool
.execute(
json!({"path": "test.txt", "search": "hello", "replacement": "hi"}),
&ctx,
)
.await;
assert!(result.is_err());
let err = result.unwrap_err().to_string();
// The error must name both the missing field AND the provided ones.
assert!(
err.contains("missing required field 'replace'"),
"error must name the missing field: {err}"
);
assert!(
err.contains("Input provided:") || err.contains("provided:"),
"error must list the fields the model did supply: {err}"
);
}
#[tokio::test]
async fn test_list_dir_tool() {
let tmp = tempdir().expect("tempdir");
+31 -1
View File
@@ -58,6 +58,15 @@ pub enum MailboxMessage {
Failed { agent_id: String, error: String },
/// Cancellation propagated to this agent.
Cancelled { agent_id: String },
/// Incremental token usage from a sub-agent's API call.
/// Published after each turn so the parent's cost counter updates live.
TokenUsage {
agent_id: String,
/// Prompt tokens consumed (input, including cached).
prompt_tokens: u32,
/// Completion tokens consumed (output).
completion_tokens: u32,
},
}
impl MailboxMessage {
@@ -72,7 +81,8 @@ impl MailboxMessage {
| Self::ToolCallCompleted { agent_id, .. }
| Self::Completed { agent_id, .. }
| Self::Failed { agent_id, .. }
| Self::Cancelled { agent_id } => agent_id,
| Self::Cancelled { agent_id }
| Self::TokenUsage { agent_id, .. } => agent_id,
Self::ChildSpawned { child_id, .. } => child_id,
}
}
@@ -90,6 +100,18 @@ impl MailboxMessage {
status: status.into(),
}
}
pub(crate) fn token_usage(
agent_id: impl Into<String>,
prompt_tokens: u32,
completion_tokens: u32,
) -> Self {
Self::TokenUsage {
agent_id: agent_id.into(),
prompt_tokens,
completion_tokens,
}
}
}
/// One delivery: a sequence number plus the message. The sequence is
@@ -434,6 +456,14 @@ mod tests {
},
"a8",
),
(
MailboxMessage::TokenUsage {
agent_id: "a9".into(),
prompt_tokens: 100,
completion_tokens: 50,
},
"a9",
),
];
for (msg, expected) in cases {
assert_eq!(msg.agent_id(), expected, "extract failed for {msg:?}");
+10
View File
@@ -2725,6 +2725,16 @@ async fn run_subagent(
};
let mut tool_uses = Vec::new();
// Report token usage so the parent's cost counter updates live.
if let Some(mb) = runtime.mailbox.as_ref() {
let _ = mb.send(MailboxMessage::token_usage(
&agent_id,
response.usage.input_tokens,
response.usage.output_tokens,
));
}
for block in &response.content {
match block {
ContentBlock::Text { text, .. } if !text.trim().is_empty() => {
+3
View File
@@ -520,6 +520,8 @@ pub struct App {
pub tool_log: Vec<String>,
/// Session cost tracking
pub session_cost: f64,
/// Running cost from active sub-agents (updated live via mailbox).
pub subagent_cost: f64,
/// Active skill to apply to next user message
pub active_skill: Option<String>,
/// Tool call cells by tool id (for cells already finalized in `history`).
@@ -923,6 +925,7 @@ impl App {
todos: new_shared_todo_list(),
tool_log: Vec::new(),
session_cost: 0.0,
subagent_cost: 0.0,
active_skill: None,
tool_cells: HashMap::new(),
tool_details_by_cell: HashMap::new(),
+204 -1
View File
@@ -97,6 +97,25 @@ pub enum HistoryCell {
streaming: bool,
duration_secs: Option<f32>,
},
/// An `<archived_context>` seam block produced by the Flash seam manager
/// (issue #159). Rendered dimmed/italic with a level + range label so
/// the user can see at a glance where context seams exist.
ArchivedContext {
/// Seam level (1, 2, 3, or 0 for cycle-level).
level: u8,
/// Message range covered (e.g. "msg 0-128").
range: String,
/// Token estimate string (e.g. "~2500").
tokens: String,
/// Density label (e.g. "~2,500 tokens").
density: String,
/// Model that produced the summary.
model: String,
/// RFC 3339 timestamp.
timestamp: String,
/// The summary text content.
summary: String,
},
Tool(ToolCell),
/// Live in-transcript card for sub-agent activity (issue #128). Owns
/// either a single `DelegateCard` or a multi-worker `FanoutCard`; the
@@ -188,6 +207,9 @@ impl HistoryCell {
} => render_thinking(content, width, *streaming, *duration_secs, false, false),
HistoryCell::Tool(cell) => cell.lines_with_motion(width, false),
HistoryCell::SubAgent(cell) => cell.lines(width),
HistoryCell::ArchivedContext { .. } => {
render_archived_context(self, width, false)
}
}
}
@@ -249,6 +271,9 @@ impl HistoryCell {
),
HistoryCell::System { .. } | HistoryCell::Error { .. } => self.lines(width),
HistoryCell::SubAgent(cell) => cell.lines(width),
HistoryCell::ArchivedContext { .. } => {
render_archived_context(self, width, options.low_motion)
}
}
}
@@ -293,6 +318,9 @@ impl HistoryCell {
),
HistoryCell::Tool(cell) => cell.transcript_lines(width),
HistoryCell::SubAgent(cell) => cell.lines(width),
HistoryCell::ArchivedContext { .. } => {
render_archived_context(self, width, true)
}
}
}
@@ -317,6 +345,172 @@ impl HistoryCell {
}
}
/// Parse an `<archived_context>` block from an assistant Text block.
///
/// Returns `Some(HistoryCell::ArchivedContext)` when the text contains a
/// well-formed `<archived_context>...</archived_context>` block, or `None`
/// if the text is regular assistant content.
fn parse_archived_context(text: &str) -> Option<HistoryCell> {
let text = text.trim();
if !text.starts_with("<archived_context") || !text.ends_with("</archived_context>") {
return None;
}
let tag_end = text.find('>')?;
let tag = &text[..tag_end];
let level = tag
.split(' ')
.find(|part| part.starts_with("level="))
.and_then(|part| part.split('"').nth(1))
.and_then(|v| v.parse::<u8>().ok())
.unwrap_or(0);
let range = tag
.split(' ')
.find(|part| part.starts_with("range="))
.and_then(|part| part.split('"').nth(1))
.unwrap_or("")
.to_string();
let tokens = tag
.split(' ')
.find(|part| part.starts_with("tokens="))
.and_then(|part| part.split('"').nth(1))
.unwrap_or("")
.to_string();
let density = tag
.split(' ')
.find(|part| part.starts_with("density="))
.and_then(|part| part.split('"').nth(1))
.unwrap_or("")
.to_string();
let model = tag
.split(' ')
.find(|part| part.starts_with("model="))
.and_then(|part| part.split('"').nth(1))
.unwrap_or("")
.to_string();
let timestamp = tag
.split(' ')
.find(|part| part.starts_with("timestamp="))
.and_then(|part| part.split('"').nth(1))
.unwrap_or("")
.to_string();
let close_tag = text.rfind("</archived_context>")?;
let summary_start = tag_end + 1;
let summary = text[summary_start..close_tag].trim().to_string();
Some(HistoryCell::ArchivedContext {
level,
range,
tokens,
density,
model,
timestamp,
summary,
})
}
/// Render an `<archived_context>` block with dimmed/italic styling.
fn render_archived_context(cell: &HistoryCell, width: u16, _low_motion: bool) -> Vec<Line<'static>> {
let HistoryCell::ArchivedContext {
level,
range,
tokens,
density,
model,
timestamp,
summary,
} = cell
else {
return Vec::new();
};
let body = if summary.is_empty() {
"(no summary)".to_string()
} else {
summary.clone()
};
let label = format!("Context L{level}");
let label_style = Style::default()
.fg(palette::TEXT_DIM)
.add_modifier(Modifier::BOLD);
let body_style = Style::default()
.fg(palette::TEXT_DIM)
.italic();
let content_width = width.saturating_sub(4).max(1);
let mut lines = Vec::new();
let range_display = if range.is_empty() {
String::new()
} else {
range.to_string()
};
let mut header = format!("{label} {range_display}");
if !tokens.is_empty() {
header.push_str(&format!(" {tokens}"));
}
if !density.is_empty() && density != tokens {
header.push_str(&format!(" {density}"));
}
lines.push(Line::from(Span::styled(header, label_style)));
let model_display = if model.is_empty() {
String::new()
} else {
format!("via {model}")
};
let ts_display = if timestamp.is_empty() {
String::new()
} else {
timestamp.clone()
};
let mut sub = String::new();
if !model_display.is_empty() {
sub.push_str(&model_display);
}
if !ts_display.is_empty() {
if !sub.is_empty() {
sub.push_str(" · ");
}
sub.push_str(&ts_display);
}
if !sub.is_empty() {
lines.push(Line::from(Span::styled(
sub,
Style::default().fg(palette::TEXT_MUTED),
)));
}
let rendered = crate::tui::markdown_render::render_markdown(&body, content_width, body_style);
for (idx, line) in rendered.into_iter().enumerate() {
if idx == 0 {
let mut spans = vec![Span::styled(
"",
Style::default().fg(palette::TEXT_DIM),
)];
spans.extend(line.spans);
lines.push(Line::from(spans));
} else {
let mut spans = vec![Span::raw(" ")];
spans.extend(line.spans);
lines.push(Line::from(spans));
}
}
lines.push(Line::from(""));
lines
}
/// Convert a message into history cells for rendering.
#[must_use]
pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
@@ -324,7 +518,15 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
for block in &msg.content {
match block {
ContentBlock::Text { text, .. } => match msg.role.as_str() {
ContentBlock::Text { text, .. } => {
// Check if this is an `<archived_context>` block.
if msg.role == "assistant"
&& let Some(archived) = parse_archived_context(text)
{
cells.push(archived);
continue;
}
match msg.role.as_str() {
"user" => {
if let Some(HistoryCell::User { content }) = cells.last_mut() {
if !content.is_empty() {
@@ -363,6 +565,7 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
}
}
_ => {}
}
},
ContentBlock::Thinking { thinking } => {
if let Some(HistoryCell::Thinking { content, .. }) = cells.last_mut() {
+4 -2
View File
@@ -90,6 +90,8 @@ impl ProviderPickerView {
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
ApiProvider::Openrouter => "OPENROUTER_API_KEY",
ApiProvider::Novita => "NOVITA_API_KEY",
ApiProvider::Fireworks => "FIREWORKS_API_KEY",
ApiProvider::Sglang => "SGLANG_API_KEY",
}
}
@@ -339,7 +341,7 @@ mod tests {
}
#[test]
fn picker_lists_all_four_providers() {
fn picker_lists_all_six_providers() {
let config = Config::default();
let picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
let names: Vec<_> = picker
@@ -349,7 +351,7 @@ mod tests {
.collect();
assert_eq!(
names,
vec!["DeepSeek", "NVIDIA NIM", "OpenRouter", "Novita AI"]
vec!["DeepSeek", "NVIDIA NIM", "OpenRouter", "Novita AI", "Fireworks AI", "SGLang"]
);
}
+1
View File
@@ -184,6 +184,7 @@ impl TranscriptViewCache {
| HistoryCell::Error { .. }
| HistoryCell::Tool(_)
| HistoryCell::SubAgent(_)
| HistoryCell::ArchivedContext { .. }
),
is_tool_groupable,
});
+30 -6
View File
@@ -3408,6 +3408,8 @@ fn render(f: &mut Frame, app: &mut App) {
crate::config::ApiProvider::NvidiaNim => Some("NIM"),
crate::config::ApiProvider::Openrouter => Some("OR"),
crate::config::ApiProvider::Novita => Some("Novita"),
crate::config::ApiProvider::Fireworks => Some("Fireworks"),
crate::config::ApiProvider::Sglang => Some("SGLang"),
};
let header_data = HeaderData::new(
app.mode,
@@ -3965,6 +3967,8 @@ async fn apply_provider_picker_api_key(
ApiProvider::NvidiaNim => &mut providers.nvidia_nim,
ApiProvider::Openrouter => &mut providers.openrouter,
ApiProvider::Novita => &mut providers.novita,
ApiProvider::Fireworks => &mut providers.fireworks,
ApiProvider::Sglang => &mut providers.sglang,
};
entry.api_key = Some(api_key);
}
@@ -4277,7 +4281,7 @@ fn render_footer(f: &mut Frame, area: Rect, app: &mut App) {
// `working...` pulse stays even in low-motion mode so the user still
// sees that something is happening.
if !app.low_motion {
let strip_frame = now_ms / 150;
let strip_frame = now_ms;
props.working_strip_frame = Some(strip_frame);
}
} else if props.state_label == "ready"
@@ -4482,9 +4486,9 @@ fn render_footer_from(
} else {
Vec::new()
};
let cost = if has(S::Cost) && app.session_cost > 0.001 {
let cost = if has(S::Cost) && app.session_cost + app.subagent_cost > 0.001 {
vec![Span::styled(
format!("${:.2}", app.session_cost),
format!("${:.2}", app.session_cost + app.subagent_cost),
Style::default().fg(palette::TEXT_MUTED),
)]
} else {
@@ -4576,9 +4580,9 @@ fn footer_auxiliary_spans(app: &App, max_width: usize) -> Vec<Span<'static>> {
let agents_spans = crate::tui::widgets::footer_agents_chip(running_agent_count(app));
let replay_spans = footer_reasoning_replay_spans(app);
let cache_spans = footer_cache_spans(app);
let cost_spans = if app.session_cost > 0.001 {
let cost_spans = if app.session_cost + app.subagent_cost > 0.001 {
vec![Span::styled(
format!("${:.2}", app.session_cost),
format!("${:.2}", app.session_cost + app.subagent_cost),
Style::default().fg(palette::TEXT_MUTED),
)]
} else {
@@ -4633,7 +4637,11 @@ fn footer_cache_spans(app: &App) -> Vec<Span<'static>> {
let Some(hit_tokens) = app.last_prompt_cache_hit_tokens else {
return Vec::new();
};
let miss_tokens = app.last_prompt_cache_miss_tokens.unwrap_or(0);
let miss_tokens = app.last_prompt_cache_miss_tokens.unwrap_or_else(|| {
app.last_prompt_tokens
.unwrap_or(0)
.saturating_sub(hit_tokens)
});
let total = hit_tokens.saturating_add(miss_tokens);
if total == 0 {
return Vec::new();
@@ -5329,6 +5337,7 @@ fn open_tool_details_pager(app: &mut App) -> bool {
HistoryCell::Thinking { .. } => "Reasoning".to_string(),
HistoryCell::Tool(_) => "Message".to_string(),
HistoryCell::SubAgent(_) => "Sub-agent".to_string(),
HistoryCell::ArchivedContext { .. } => "Archived Context".to_string(),
};
let width = app
.last_transcript_area
@@ -5492,6 +5501,21 @@ fn handle_subagent_mailbox(app: &mut App, _seq: u64, message: &MailboxMessage) {
DelegateCard, FanoutCard, apply_to_delegate, apply_to_fanout,
};
// Accumulate sub-agent token costs for the real-time footer counter (#166).
if let MailboxMessage::TokenUsage {
prompt_tokens,
completion_tokens,
..
} = message
{
if let Some(cost) =
crate::pricing::calculate_turn_cost(&app.model, *prompt_tokens, *completion_tokens)
{
app.subagent_cost += cost;
}
return; // No card visual change needed; the footer handles display.
}
// Resolve (or allocate) the target cell for this envelope. ChildSpawned
// is special — it always belongs to the active fanout card if one
// exists; otherwise it seeds a new one.
File diff suppressed because it is too large Load Diff
+12
View File
@@ -387,6 +387,12 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool
// to a sibling fanout card, not this one.
return false;
}
MailboxMessage::TokenUsage { .. } => {
// Cost accumulation happens in handle_subagent_mailbox (ui.rs)
// before this apply function is called; TokenUsage never reaches
// this arm in practice.
return false;
}
}
true
}
@@ -421,6 +427,12 @@ pub fn apply_to_fanout(card: &mut FanoutCard, msg: &MailboxMessage) -> bool {
card.upsert_worker(child_id, AgentLifecycle::Pending);
true
}
MailboxMessage::TokenUsage { .. } => {
// Cost accumulation happens in handle_subagent_mailbox (ui.rs)
// before this apply function is called; TokenUsage never reaches
// this arm in practice.
true
}
}
}
+37 -32
View File
@@ -59,15 +59,16 @@ pub struct FooterProps {
}
/// One frame of the footer's water-spout animation. `col` is the cell index
/// inside the strip, `width` the strip's total width, `frame` the discrete
/// 150 ms tick counter. Returns the glyph that should appear in that cell on
/// inside the strip, `width` the strip's total width, `frame` the raw
/// millisecond counter. Returns the glyph that should appear in that cell on
/// that frame.
///
/// Visual: two crests sweep across a calm water surface (`─`). The opener
/// `⌒` rises, then a soft `‿` trails behind. Crest A advances every 4 ticks
/// (~600 ms), crest B every 6 ticks (~900 ms) — independent speeds give the
/// criss-cross fountain feel. Every 17 ticks (~2.5 s) the phase of crest B
/// jitters by one column so the pattern never settles into a strict beat.
/// `⌒` rises, then a soft `‿` trails behind. Crest A advances one column
/// every ~600 ms (4 × 150 ms), crest B every ~900 ms (6 × 150 ms) —
/// independent speeds give the criss-cross fountain feel. The positions
/// are computed from `frame / 150.0` (fractional) so crests slide smoothly
/// rather than jumping in discrete 150 ms steps.
///
/// All math is pure given (col, width, frame) so unit tests can pin frames.
#[must_use]
@@ -76,17 +77,22 @@ pub fn footer_working_strip_glyph_at(col: usize, width: usize, frame: u64) -> ch
return ' ';
}
// Number of 150 ms ticks since epoch — fractional so crests move
// continuously rather than teleporting every 4-6 ticks.
let frame_f = frame as f64 / 150.0;
// Crest is two glyphs wide: the leading `⌒` followed by a trailing `‿`.
const CREST_SPAN: i64 = 2;
// Cycle wide enough that each crest enters and exits cleanly.
let cycle = (width as i64).max(CREST_SPAN) + CREST_SPAN * 2;
let frame_i = frame as i64;
// Crest A advances one column every 4 ticks; B every 6.
let pos_a = frame_i.div_euclid(4).rem_euclid(cycle) - CREST_SPAN;
// Phase jitter: every 17 ticks, nudge B by one column so the two crests
// never lock into a fixed offset.
let jitter = frame_i.div_euclid(17).rem_euclid(3);
let pos_b = (frame_i.div_euclid(6) + jitter + (cycle / 3) + 5).rem_euclid(cycle) - CREST_SPAN;
// Crest A advances one column every ~300 ms (2 × 150 ms ticks).
let pos_a = (frame_f / 2.0).round() as i64 % cycle - CREST_SPAN;
// Phase jitter: every ~2.5 s (17 ticks), nudge B by one column so the
// two crests never lock into a fixed offset.
let jitter = (frame_f / 17.0).round() as i64 % 3;
// Crest B advances one column every ~450 ms (3 × 150 ms ticks).
let pos_b =
((frame_f / 3.0).round() as i64 + jitter + (cycle / 3) + 5).rem_euclid(cycle) - CREST_SPAN;
crest_glyph_for(col as i64, pos_a)
.or_else(|| crest_glyph_for(col as i64, pos_b))
@@ -687,16 +693,16 @@ mod tests {
#[test]
fn working_strip_glyph_is_deterministic_per_frame() {
// Same (col, width, frame) → same glyph. Stepping by one full
// crest-A tick (4 ticks ≈ 600 ms) is the minimum guaranteed
// animation step.
let a = super::footer_working_strip_string(40, 1);
let b = super::footer_working_strip_string(40, 1);
// Same (col, width, frame) → same glyph. Frames are now raw
// milliseconds; 150 ms apart represents one tick.
let a = super::footer_working_strip_string(40, 150);
let b = super::footer_working_strip_string(40, 150);
assert_eq!(a, b, "deterministic given the same frame");
let c = super::footer_working_strip_string(40, 5);
// 750 ms → 5 ticks, crest A advances every 2 ticks → ≥2 steps.
let c = super::footer_working_strip_string(40, 750);
assert_ne!(
a, c,
"advancing one full crest-A step must change the strip",
"advancing 4 ticks must change the strip",
);
}
@@ -713,7 +719,7 @@ mod tests {
FooterWidget::new(props.clone()).render(area, &mut buf);
let idle: String = (0..area.width).map(|x| buf[(x, 0)].symbol()).collect();
props.working_strip_frame = Some(13);
props.working_strip_frame = Some(600);
let mut buf2 = ratatui::buffer::Buffer::empty(area);
FooterWidget::new(props).render(area, &mut buf2);
let active: String = (0..area.width).map(|x| buf2[(x, 0)].symbol()).collect();
@@ -732,12 +738,11 @@ mod tests {
#[test]
fn working_strip_advances_position_within_full_crest_step() {
// Crest A advances one column every 4 ticks; B every 6. Stepping by
// 12 ticks guarantees both have moved at least one column,
// independent of the jitter cadence (17).
// Crest A advances every 2 ticks (300 ms), B every 3 (450 ms).
// 900 ms (6 ticks) guarantees crest A has advanced at least 3 columns.
let width = 60;
let f0 = super::footer_working_strip_string(width, 0);
let f12 = super::footer_working_strip_string(width, 12);
let f900 = super::footer_working_strip_string(width, 900);
// Collect the columns that hold a crest opener `⌒` in each frame.
let openers = |s: &str| -> Vec<usize> {
s.chars()
@@ -747,20 +752,20 @@ mod tests {
};
assert_ne!(
openers(&f0),
openers(&f12),
"crest opener columns must shift across a 12-tick window",
openers(&f900),
"crest opener columns must shift across a 900ms window",
);
}
#[test]
fn working_strip_renders_paired_crest_glyphs() {
// The `⌒‿` pair is the visual centrepiece — a soft rise followed by
// a gentle dip. Sweep enough ticks that a crest is guaranteed to
// land fully inside a 60-cell strip at some point.
// a gentle dip. Sweep enough time (in ms) that a crest is guaranteed
// to land fully inside a 60-cell strip at some point.
let width = 60;
let mut saw_pair = false;
for frame in 0..120 {
let s = super::footer_working_strip_string(width, frame);
for frame_ms in (0..24_000).step_by(150) {
let s = super::footer_working_strip_string(width, frame_ms);
if s.contains("\u{2312}\u{203F}") {
saw_pair = true;
break;
@@ -768,7 +773,7 @@ mod tests {
}
assert!(
saw_pair,
"expected `⌒‿` pair somewhere in the first 120 ticks",
"expected `⌒‿` pair somewhere in the first 24s of animation",
);
}