v0.7.2: clippy clean, cost counter wiring, layered context fixup

#167: Fix all 7 clippy warnings — annotated SeamMetadata dead fields, removed unused should_cycle calls, collapsed nested ifs, fixed useless_format and nonminimal_bool. #168: Wire TokenUsage mailbox drain to subagent_cost accumulator. handle_subagent_mailbox now intercepts TokenUsage before routing to cards, computes cost via calculate_turn_cost, and increments app.subagent_cost in real time. Footer reflects live sub-agent spend. Restored ArchivedContext variant to HistoryCell (corrupted by prior apply_patch). Version bump to 0.7.2. Refs: #166, #167, #168
2026-04-28 21:46:25 -05:00
parent 12b1ae42c4
commit 35db361a87
27 changed files with 13550 additions and 107 deletions
@@ -1011,7 +1011,7 @@ dependencies = [

 [[package]]
 name = "deepseek-agent"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "deepseek-config",
 "serde",
@@ -1019,7 +1019,7 @@ dependencies = [

 [[package]]
 name = "deepseek-app-server"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "axum",
@@ -1042,7 +1042,7 @@ dependencies = [

 [[package]]
 name = "deepseek-config"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "deepseek-secrets",
@@ -1055,7 +1055,7 @@ dependencies = [

 [[package]]
 name = "deepseek-core"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1074,7 +1074,7 @@ dependencies = [

 [[package]]
 name = "deepseek-execpolicy"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -1083,7 +1083,7 @@ dependencies = [

 [[package]]
 name = "deepseek-hooks"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -1097,7 +1097,7 @@ dependencies = [

 [[package]]
 name = "deepseek-mcp"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -1107,7 +1107,7 @@ dependencies = [

 [[package]]
 name = "deepseek-protocol"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "serde",
 "serde_json",
@@ -1115,7 +1115,7 @@ dependencies = [

 [[package]]
 name = "deepseek-secrets"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "dirs",
 "keyring",
@@ -1128,7 +1128,7 @@ dependencies = [

 [[package]]
 name = "deepseek-state"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1140,7 +1140,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tools"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -1153,7 +1153,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "arboard",
@@ -1213,7 +1213,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-cli"
-version = "0.7.1"
+version = "0.7.2"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1236,7 +1236,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-core"
-version = "0.7.1"
+version = "0.7.2"

 [[package]]
 name = "deranged"
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"

 [workspace.package]
-version = "0.7.1"
+version = "0.7.2"
 edition = "2024"
 license = "MIT"
 repository = "https://github.com/Hmbown/DeepSeek-TUI"
@@ -188,7 +188,23 @@ pub fn required_str<'a>(input: &'a Value, field: &str) -> std::result::Result<&'
    input
        .get(field)
        .and_then(Value::as_str)
-        .ok_or_else(|| ToolError::missing_field(field))
+        .ok_or_else(|| {
+            // When the field is missing, list the fields the caller *did*
+            // supply so the model can spot the mismatch without a retry.
+            let provided: Vec<&str> = input
+                .as_object()
+                .map(|obj| obj.keys().map(|k| k.as_str()).collect())
+                .unwrap_or_default();
+            if provided.is_empty() {
+                ToolError::missing_field(field)
+            } else {
+                let hint = format!(
+                    "missing required field '{field}'. Input provided: {}",
+                    provided.join(", ")
+                );
+                ToolError::invalid_input(hint)
+            }
+        })
 }

 /// Helper to extract an optional string field from JSON input.
@@ -750,7 +750,7 @@ pub(super) fn apply_reasoning_effort(
        "off" | "disabled" | "none" | "false" => match provider {
            // OpenRouter / Novita relay the same DeepSeek V4 payload shape
            // as DeepSeek native; they pass through `thinking` / `reasoning_effort`.
-            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
+            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
                body["thinking"] = json!({ "type": "disabled" });
            }
            ApiProvider::NvidiaNim => {
@@ -760,7 +760,7 @@ pub(super) fn apply_reasoning_effort(
            }
        },
        "low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
-            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
+            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
                body["reasoning_effort"] = json!("high");
                body["thinking"] = json!({ "type": "enabled" });
            }
@@ -772,7 +772,7 @@ pub(super) fn apply_reasoning_effort(
            }
        },
        "xhigh" | "max" | "highest" => match provider {
-            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita => {
+            ApiProvider::Deepseek | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks | ApiProvider::Sglang => {
                body["reasoning_effort"] = json!("max");
                body["thinking"] = json!({ "type": "enabled" });
            }
@@ -163,6 +163,15 @@ pub fn export(app: &mut App, path: Option<&str>) -> CommandResult {
            HistoryCell::Thinking { content, .. } => ("*Thinking:*", content.clone()),
            HistoryCell::Tool(tool) => ("**Tool:**", render_tool_cell(tool, 80)),
            HistoryCell::SubAgent(sub) => ("**Sub-agent:**", render_subagent_cell(sub, 80)),
+            HistoryCell::ArchivedContext {
+                level,
+                range,
+                summary,
+                ..
+            } => (
+                "**Archived Context:**",
+                format!("L{level} [{range}]: {summary}"),
+            ),
        };

        let _ = write!(content, "{}\n\n{}\n\n---\n\n", role, body.trim());
@@ -25,6 +25,11 @@ pub const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
 pub const DEFAULT_NOVITA_MODEL: &str = "deepseek/deepseek-v4-pro";
 pub const DEFAULT_NOVITA_FLASH_MODEL: &str = "deepseek/deepseek-v4-flash";
 pub const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1";
+pub const DEFAULT_FIREWORKS_MODEL: &str = "accounts/fireworks/models/deepseek-v4-pro";
+pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1";
+pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
+pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
+pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1";
 const API_KEYRING_SENTINEL: &str = "__KEYRING__";
 pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[
    "deepseek-v4-pro",
@@ -41,6 +46,8 @@ pub enum ApiProvider {
    NvidiaNim,
    Openrouter,
    Novita,
+    Fireworks,
+    Sglang,
 }

 impl ApiProvider {
@@ -51,6 +58,8 @@ impl ApiProvider {
            "nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim),
            "openrouter" | "open_router" => Some(Self::Openrouter),
            "novita" => Some(Self::Novita),
+            "fireworks" | "fireworks-ai" => Some(Self::Fireworks),
+            "sglang" | "sg-lang" => Some(Self::Sglang),
            _ => None,
        }
    }
@@ -62,6 +71,8 @@ impl ApiProvider {
            Self::NvidiaNim => "nvidia-nim",
            Self::Openrouter => "openrouter",
            Self::Novita => "novita",
+            Self::Fireworks => "fireworks",
+            Self::Sglang => "sglang",
        }
    }

@@ -73,6 +84,8 @@ impl ApiProvider {
            Self::NvidiaNim => "NVIDIA NIM",
            Self::Openrouter => "OpenRouter",
            Self::Novita => "Novita AI",
+            Self::Fireworks => "Fireworks AI",
+            Self::Sglang => "SGLang",
        }
    }

@@ -84,6 +97,8 @@ impl ApiProvider {
            Self::NvidiaNim,
            Self::Openrouter,
            Self::Novita,
+            Self::Fireworks,
+            Self::Sglang,
        ]
    }
 }
@@ -688,6 +703,10 @@ pub struct ProvidersConfig {
    pub openrouter: ProviderConfig,
    #[serde(default)]
    pub novita: ProviderConfig,
+    #[serde(default)]
+    pub fireworks: ProviderConfig,
+    #[serde(default)]
+    pub sglang: ProviderConfig,
 }

 #[derive(Debug, Clone, Deserialize, Default)]
@@ -747,7 +766,7 @@ impl Config {
            && ApiProvider::parse(provider).is_none()
        {
            anyhow::bail!(
-                "Invalid provider '{provider}': expected deepseek, nvidia-nim, openrouter, or novita."
+                "Invalid provider '{provider}': expected deepseek, nvidia-nim, openrouter, novita, fireworks, or sglang."
            );
        }
        if let Some(ref key) = self.api_key
@@ -855,6 +874,8 @@ impl Config {
            ApiProvider::NvidiaNim => &providers.nvidia_nim,
            ApiProvider::Openrouter => &providers.openrouter,
            ApiProvider::Novita => &providers.novita,
+            ApiProvider::Fireworks => &providers.fireworks,
+            ApiProvider::Sglang => &providers.sglang,
        })
    }

@@ -883,6 +904,8 @@ impl Config {
            ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL,
            ApiProvider::Openrouter => DEFAULT_OPENROUTER_MODEL,
            ApiProvider::Novita => DEFAULT_NOVITA_MODEL,
+            ApiProvider::Fireworks => DEFAULT_FIREWORKS_MODEL,
+            ApiProvider::Sglang => DEFAULT_SGLANG_MODEL,
        }
        .to_string()
    }
@@ -905,7 +928,8 @@ impl Config {
                .as_ref()
                .filter(|base| base.contains("integrate.api.nvidia.com"))
                .cloned(),
-            ApiProvider::Openrouter | ApiProvider::Novita => None,
+            ApiProvider::Openrouter | ApiProvider::Novita
+                | ApiProvider::Fireworks | ApiProvider::Sglang => None,
        };
        let base = provider_base.or(root_base).unwrap_or_else(|| {
            match provider {
@@ -913,6 +937,8 @@ impl Config {
                ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL,
                ApiProvider::Openrouter => DEFAULT_OPENROUTER_BASE_URL,
                ApiProvider::Novita => DEFAULT_NOVITA_BASE_URL,
+                ApiProvider::Fireworks => DEFAULT_FIREWORKS_BASE_URL,
+                ApiProvider::Sglang => DEFAULT_SGLANG_BASE_URL,
            }
            .to_string()
        });
@@ -932,6 +958,8 @@ impl Config {
            ApiProvider::NvidiaNim => "nvidia-nim",
            ApiProvider::Openrouter => "openrouter",
            ApiProvider::Novita => "novita",
+            ApiProvider::Fireworks => "fireworks",
+            ApiProvider::Sglang => "sglang",
        };

        // 1. OS keyring + 2. environment variables (handled by Secrets).
@@ -986,6 +1014,15 @@ impl Config {
                "Novita API key not found. Run 'deepseek auth set --provider novita', \
                 set NOVITA_API_KEY, or add [providers.novita] api_key in ~/.deepseek/config.toml."
            ),
+            ApiProvider::Fireworks => anyhow::bail!(
+                "Fireworks AI API key not found. Run 'deepseek auth set --provider fireworks', \
+                 set FIREWORKS_API_KEY, or add [providers.fireworks] api_key in ~/.deepseek/config.toml."
+            ),
+            ApiProvider::Sglang => anyhow::bail!(
+                "SGLang API key not found (optional for self-hosted). Run 'deepseek auth set --provider sglang', \
+                 set SGLANG_API_KEY, or add [providers.sglang] api_key in ~/.deepseek/config.toml. \
+                 If your SGLang deployment runs without authentication, set SGLANG_API_KEY to an empty string or any placeholder."
+            ),
        }
    }

@@ -1300,6 +1337,31 @@ fn apply_env_overrides(config: &mut Config) {
            .novita
            .base_url = Some(value);
    }
+    if matches!(config.api_provider(), ApiProvider::Fireworks)
+        && let Ok(value) = std::env::var("FIREWORKS_BASE_URL")
+        && !value.trim().is_empty()
+    {
+        config
+            .providers
+            .get_or_insert_with(ProvidersConfig::default)
+            .fireworks
+            .base_url = Some(value);
+    }
+    if matches!(config.api_provider(), ApiProvider::Sglang)
+        && let Ok(value) = std::env::var("SGLANG_BASE_URL")
+        && !value.trim().is_empty()
+    {
+        config
+            .providers
+            .get_or_insert_with(ProvidersConfig::default)
+            .sglang
+            .base_url = Some(value);
+    }
+    if matches!(config.api_provider(), ApiProvider::Sglang)
+        && let Ok(value) = std::env::var("SGLANG_MODEL")
+    {
+        config.default_text_model = Some(value);
+    }
    if let Ok(value) =
        std::env::var("DEEPSEEK_MODEL").or_else(|_| std::env::var("DEEPSEEK_DEFAULT_TEXT_MODEL"))
    {
@@ -1485,6 +1547,16 @@ fn normalize_model_config(config: &mut Config) {
        {
            providers.novita.model = Some(normalized);
        }
+        if let Some(model) = providers.fireworks.model.as_deref()
+            && let Some(normalized) = normalize_model_for_provider(ApiProvider::Fireworks, model)
+        {
+            providers.fireworks.model = Some(normalized);
+        }
+        if let Some(model) = providers.sglang.model.as_deref()
+            && let Some(normalized) = normalize_model_for_provider(ApiProvider::Sglang, model)
+        {
+            providers.sglang.model = Some(normalized);
+        }
    }
 }

@@ -1502,6 +1574,13 @@ fn model_for_provider(provider: ApiProvider, normalized: String) -> String {
        }
        (ApiProvider::Novita, "deepseek-v4-pro") => DEFAULT_NOVITA_MODEL.to_string(),
        (ApiProvider::Novita, "deepseek-v4-flash") => DEFAULT_NOVITA_FLASH_MODEL.to_string(),
+        (ApiProvider::Fireworks, "deepseek-v4-pro") => DEFAULT_FIREWORKS_MODEL.to_string(),
+        (ApiProvider::Fireworks, "deepseek-v4-flash") => {
+            // Flash not yet available on Fireworks; fall through to normalized name
+            "accounts/fireworks/models/deepseek-v4-flash".to_string()
+        }
+        (ApiProvider::Sglang, "deepseek-v4-pro") => DEFAULT_SGLANG_MODEL.to_string(),
+        (ApiProvider::Sglang, "deepseek-v4-flash") => DEFAULT_SGLANG_FLASH_MODEL.to_string(),
        _ => normalized,
    }
 }
@@ -1618,6 +1697,8 @@ fn merge_providers(
            nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim),
            openrouter: merge_provider_config(base.openrouter, override_cfg.openrouter),
            novita: merge_provider_config(base.novita, override_cfg.novita),
+            fireworks: merge_provider_config(base.fireworks, override_cfg.fireworks),
+            sglang: merge_provider_config(base.sglang, override_cfg.sglang),
        }),
    }
 }
@@ -1821,6 +1902,8 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
        ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
        ApiProvider::Openrouter => "OPENROUTER_API_KEY",
        ApiProvider::Novita => "NOVITA_API_KEY",
+        ApiProvider::Fireworks => "FIREWORKS_API_KEY",
+        ApiProvider::Sglang => "SGLANG_API_KEY",
    };
    if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) {
        return true;
@@ -1831,12 +1914,19 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
        return true;
    }

+    // SGLang is self-hosted and typically runs without authentication.
+    if matches!(provider, ApiProvider::Sglang) {
+        return true;
+    }
+
    if let Some(providers) = config.providers.as_ref() {
        let entry = match provider {
            ApiProvider::Deepseek => &providers.deepseek,
            ApiProvider::NvidiaNim => &providers.nvidia_nim,
            ApiProvider::Openrouter => &providers.openrouter,
            ApiProvider::Novita => &providers.novita,
+            ApiProvider::Fireworks => &providers.fireworks,
+            ApiProvider::Sglang => &providers.sglang,
        };
        if entry
            .api_key
@@ -1873,6 +1963,8 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
        ApiProvider::NvidiaNim => "providers.nvidia_nim",
        ApiProvider::Openrouter => "providers.openrouter",
        ApiProvider::Novita => "providers.novita",
+        ApiProvider::Fireworks => "providers.fireworks",
+        ApiProvider::Sglang => "providers.sglang",
    };

    // Parse existing TOML (or start fresh) so we can edit the right table
@@ -1898,6 +1990,8 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
        ApiProvider::NvidiaNim => "nvidia_nim",
        ApiProvider::Openrouter => "openrouter",
        ApiProvider::Novita => "novita",
+        ApiProvider::Fireworks => "fireworks",
+        ApiProvider::Sglang => "sglang",
    };
    let entry = providers
        .entry(key_inside.to_string())
@@ -1987,6 +2081,11 @@ mod tests {
        openrouter_base_url: Option<OsString>,
        novita_api_key: Option<OsString>,
        novita_base_url: Option<OsString>,
+        fireworks_api_key: Option<OsString>,
+        fireworks_base_url: Option<OsString>,
+        sglang_api_key: Option<OsString>,
+        sglang_base_url: Option<OsString>,
+        sglang_model: Option<OsString>,
    }

    impl EnvGuard {
@@ -2012,6 +2111,11 @@ mod tests {
            let openrouter_base_url_prev = env::var_os("OPENROUTER_BASE_URL");
            let novita_api_key_prev = env::var_os("NOVITA_API_KEY");
            let novita_base_url_prev = env::var_os("NOVITA_BASE_URL");
+            let fireworks_api_key_prev = env::var_os("FIREWORKS_API_KEY");
+            let fireworks_base_url_prev = env::var_os("FIREWORKS_BASE_URL");
+            let sglang_api_key_prev = env::var_os("SGLANG_API_KEY");
+            let sglang_base_url_prev = env::var_os("SGLANG_BASE_URL");
+            let sglang_model_prev = env::var_os("SGLANG_MODEL");
            // Safety: test-only environment mutation guarded by a global mutex.
            unsafe {
                env::set_var("HOME", &home_str);
@@ -2032,6 +2136,11 @@ mod tests {
                env::remove_var("OPENROUTER_BASE_URL");
                env::remove_var("NOVITA_API_KEY");
                env::remove_var("NOVITA_BASE_URL");
+                env::remove_var("FIREWORKS_API_KEY");
+                env::remove_var("FIREWORKS_BASE_URL");
+                env::remove_var("SGLANG_API_KEY");
+                env::remove_var("SGLANG_BASE_URL");
+                env::remove_var("SGLANG_MODEL");
            }
            Self {
                home: home_prev,
@@ -2052,6 +2161,11 @@ mod tests {
                openrouter_base_url: openrouter_base_url_prev,
                novita_api_key: novita_api_key_prev,
                novita_base_url: novita_base_url_prev,
+                fireworks_api_key: fireworks_api_key_prev,
+                fireworks_base_url: fireworks_base_url_prev,
+                sglang_api_key: sglang_api_key_prev,
+                sglang_base_url: sglang_base_url_prev,
+                sglang_model: sglang_model_prev,
            }
        }
    }
@@ -2081,6 +2195,11 @@ mod tests {
                Self::restore_var("OPENROUTER_BASE_URL", self.openrouter_base_url.take());
                Self::restore_var("NOVITA_API_KEY", self.novita_api_key.take());
                Self::restore_var("NOVITA_BASE_URL", self.novita_base_url.take());
+                Self::restore_var("FIREWORKS_API_KEY", self.fireworks_api_key.take());
+                Self::restore_var("FIREWORKS_BASE_URL", self.fireworks_base_url.take());
+                Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
+                Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
+                Self::restore_var("SGLANG_MODEL", self.sglang_model.take());
            }
        }
    }
@@ -24,6 +24,7 @@ use crate::compaction::{
    CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact,
 };
 use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL};
+use crate::seam_manager::{SeamConfig, SeamManager};
 use crate::cycle_manager::{
    CycleBriefing, CycleConfig, StructuredState, archive_cycle, build_seed_messages,
    estimate_briefing_tokens, produce_briefing, should_advance_cycle,
@@ -265,6 +266,9 @@ pub struct Engine {
    shared_cancel_token: Arc<StdMutex<CancellationToken>>,
    tool_exec_lock: Arc<RwLock<()>>,
    capacity_controller: CapacityController,
+    /// Append-only layered context manager (#159). Produces soft seams at
+    /// 192K/384K/576K and Flash-cycle briefings at 768K.
+    seam_manager: Option<SeamManager>,
    coherence_state: CoherenceState,
    turn_counter: u64,
    /// Post-edit LSP diagnostics injection (#136). Populated unconditionally
@@ -1254,6 +1258,36 @@ impl Engine {
        let shell_manager = new_shared_shell_manager(config.workspace.clone());
        let capacity_controller = CapacityController::new(config.capacity.clone());

+        // Create Flash seam manager for layered context (#159). Uses the same
+        // API credentials as the main client but targets the Flash model for
+        // cost-effective summarisation and cycle briefing work.
+        let seam_manager = deepseek_client.as_ref().map(|main_client| {
+            let seam_config = SeamConfig {
+                enabled: api_config.context.enabled.unwrap_or(true),
+                verbatim_window_turns: api_config.context.verbatim_window_turns.unwrap_or(
+                    crate::seam_manager::VERBATIM_WINDOW_TURNS,
+                ),
+                l1_threshold: api_config.context.l1_threshold.unwrap_or(
+                    crate::seam_manager::DEFAULT_L1_THRESHOLD,
+                ),
+                l2_threshold: api_config.context.l2_threshold.unwrap_or(
+                    crate::seam_manager::DEFAULT_L2_THRESHOLD,
+                ),
+                l3_threshold: api_config.context.l3_threshold.unwrap_or(
+                    crate::seam_manager::DEFAULT_L3_THRESHOLD,
+                ),
+                cycle_threshold: api_config.context.cycle_threshold.unwrap_or(
+                    crate::seam_manager::DEFAULT_CYCLE_THRESHOLD,
+                ),
+                seam_model: api_config
+                    .context
+                    .seam_model
+                    .clone()
+                    .unwrap_or_else(|| crate::seam_manager::DEFAULT_SEAM_MODEL.to_string()),
+            };
+            SeamManager::new(main_client.clone(), seam_config)
+        });
+
        let lsp_manager = Arc::new(match config.lsp_config.clone() {
            Some(cfg) => crate::lsp::LspManager::new(cfg, config.workspace.clone()),
            None => crate::lsp::LspManager::disabled(),
@@ -1276,6 +1310,7 @@ impl Engine {
            shared_cancel_token: shared_cancel_token.clone(),
            tool_exec_lock,
            capacity_controller,
+            seam_manager,
            coherence_state: CoherenceState::default(),
            turn_counter: 0,
            lsp_manager,
@@ -2378,7 +2413,117 @@ impl Engine {

    /// Handle a turn using the DeepSeek API.
    #[allow(clippy::too_many_lines)]
-    /// Run the checkpoint-restart cycle boundary if the session has crossed
+    /// Run the pre-request layered-context checkpoint (#159). Checks whether
+    /// cumulative tokens have crossed a soft-seam threshold and, if so,
+    /// produces an `<archived_context>` block via Flash and appends it as an
+    /// assistant message. Called from `handle_deepseek_turn` before each API
+    /// request so the model always has the latest navigation aids.
+    async fn layered_context_checkpoint(&mut self) {
+        let Some(ref seam_mgr) = self.seam_manager else {
+            return;
+        };
+        if !seam_mgr.config().enabled {
+            return;
+        }
+
+        // Cumulative tokens: session total (all turns so far) + current
+        // estimated input (the messages that will be sent next).
+        let cumulative_input = self
+            .session
+            .total_usage
+            .input_tokens
+            .saturating_add(self.session.total_usage.output_tokens);
+        let cumulative_estimate =
+            cumulative_input.saturating_add(self.estimated_input_tokens() as u64);
+
+        let highest = seam_mgr.highest_level().await;
+        let Some(level) = seam_mgr.seam_level_for(cumulative_estimate as usize, highest) else {
+            return;
+        };
+
+        // Determine the message range to summarize: everything before the
+        // verbatim window. The verbatim window (last ~16 turns) stays
+        // untouched so the model always has ground-truth recent context.
+        let msg_count = self.session.messages.len();
+        let verbatim_start = seam_mgr.verbatim_window_start(msg_count);
+        if verbatim_start == 0 {
+            return; // Not enough messages to summarize.
+        }
+
+        let msg_range_end = verbatim_start;
+        let pinned = self
+            .session
+            .working_set
+            .pinned_message_indices(&self.session.messages, &self.session.workspace);
+
+        let _ = self
+            .tx_event
+            .send(Event::status(format!(
+                "⏻ producing L{level} context seam ({msg_range_end} messages)…"
+            )))
+            .await;
+
+        // If we have existing seams, recompact; otherwise produce fresh.
+        let existing_seams = seam_mgr.collect_seam_texts(&self.session.messages).await;
+        let seam_text = if existing_seams.is_empty() {
+            match seam_mgr
+                .produce_soft_seam(
+                    &self.session.messages,
+                    level,
+                    0,
+                    msg_range_end,
+                    Some(&self.session.workspace),
+                    &pinned,
+                )
+                .await
+            {
+                Ok(text) => text,
+                Err(err) => {
+                    crate::logging::warn(format!("L{level} soft seam failed: {err}"));
+                    return;
+                }
+            }
+        } else {
+            let recent: Vec<&Message> = (0..msg_range_end)
+                .filter_map(|i| self.session.messages.get(i))
+                .collect();
+            match seam_mgr
+                .recompact(&existing_seams, &recent, level, 0, msg_range_end)
+                .await
+            {
+                Ok(text) => text,
+                Err(err) => {
+                    crate::logging::warn(format!("L{level} recompact failed: {err}"));
+                    return;
+                }
+            }
+        };
+
+        if seam_text.is_empty() {
+            return;
+        }
+
+        // Capture seam count before the mutable borrow below.
+        let seam_count = seam_mgr.seam_count().await;
+
+        // Append the seam as an assistant message. This is an append-only
+        // operation — no messages are deleted. The prefix cache stays hot.
+        self.add_session_message(Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: seam_text,
+                cache_control: None,
+            }],
+        })
+        .await;
+
+        let _ = self
+            .tx_event
+            .send(Event::status(format!(
+                "⏻ L{level} seam complete ({seam_count} total, {msg_range_end} messages covered)"
+            )))
+            .await;
+    }
    /// its token threshold (issue #124). No-op in the common case.
    ///
    /// Caller must invoke this only at a clean turn boundary (no in-flight
@@ -2420,31 +2565,79 @@ impl Engine {
            )))
            .await;

-        // 1. Generate the model-curated briefing. We do this *before*
-        //    archiving so a briefing-call failure leaves the cycle intact —
-        //    the user can keep working at higher token counts until the next
-        //    boundary check, rather than losing their context to a failed
-        //    handoff.
-        let briefing_text = match produce_briefing(
-            &client,
-            &self.session.model,
-            &self.session.messages,
-            max_briefing_tokens,
-        )
-        .await
-        {
-            Ok(text) => text,
-            Err(err) => {
-                crate::logging::warn(format!(
-                    "Cycle briefing turn failed; skipping cycle advance: {err}"
-                ));
-                let _ = self
-                    .tx_event
-                    .send(Event::status(format!(
-                        "↻ cycle handoff failed (continuing in cycle {from}): {err}"
-                    )))
-                    .await;
-                return;
+        // 1. Generate the model-curated briefing. Prefer the Flash seam
+        //    manager (#159) for cost and speed; fall back to the main model
+        //    (legacy produce_briefing) when the seam manager isn't available.
+        let briefing_text = if let Some(ref seam_mgr) = self.seam_manager {
+            let seams = seam_mgr.collect_seam_texts(&self.session.messages).await;
+            let state_text = {
+                let s = StructuredState::capture(
+                    mode.label(),
+                    self.config.workspace.clone(),
+                    std::env::current_dir().ok(),
+                    &self.session.working_set,
+                    &self.config.todos,
+                    &self.config.plan_state,
+                    Some(&self.subagent_manager),
+                )
+                .await;
+                s.to_system_block()
+            };
+            match seam_mgr
+                .produce_flash_briefing(&seams, state_text.as_deref())
+                .await
+            {
+                Ok(text) => text,
+                Err(err) => {
+                    crate::logging::warn(format!(
+                        "Flash briefing failed, falling back to main model: {err}"
+                    ));
+                    match produce_briefing(
+                        &client,
+                        &self.session.model,
+                        &self.session.messages,
+                        max_briefing_tokens,
+                    )
+                    .await
+                    {
+                        Ok(text) => text,
+                        Err(err2) => {
+                            crate::logging::warn(format!(
+                                "Cycle briefing turn failed; skipping cycle advance: {err2}"
+                            ));
+                            let _ = self
+                                .tx_event
+                                .send(Event::status(format!(
+                                    "↻ cycle handoff failed (continuing in cycle {from}): {err2}"
+                                )))
+                                .await;
+                            return;
+                        }
+                    }
+                }
+            }
+        } else {
+            match produce_briefing(
+                &client,
+                &self.session.model,
+                &self.session.messages,
+                max_briefing_tokens,
+            )
+            .await
+            {
+                Ok(text) => text,
+                Err(err) => {
+                    crate::logging::warn(format!(
+                        "Cycle briefing turn failed; skipping cycle advance: {err}"
+                    ));
+                    let _ = self
+                        .tx_event
+                        .send(Event::status(format!(
+                            "↻ cycle handoff failed (continuing in cycle {from}): {err}"
+                        )))
+                        .await;
+                    return;
+                }
            }
        };

@@ -2504,6 +2697,10 @@ impl Engine {
        self.session.cycle_count = to;
        self.session.current_cycle_started = now;
        self.session.cycle_briefings.push(briefing.clone());
+        // Reset seam tracking for the new cycle.
+        if let Some(ref seam_mgr) = self.seam_manager {
+            seam_mgr.reset().await;
+        }
        // Drop any compaction summary — that path is incompatible with the
        // fresh-context model and would Frankenstein-merge with the briefing.
        self.session.compaction_summary_prompt = None;
@@ -212,6 +212,12 @@ impl Engine {
            // model sees compile errors before its next reasoning step.
            self.flush_pending_lsp_diagnostics().await;

+            // #159: layered context seam checkpoint. Produces soft seams at
+            // 192K/384K/576K via Flash and appends <archived_context> blocks
+            // so the model can navigate deep history without losing prefix
+            // cache affinity.
+            self.layered_context_checkpoint().await;
+
            // Build the request
            let force_update_plan_this_step = force_update_plan_first && turn.tool_calls.is_empty();
            let active_tools = if tool_catalog.is_empty() {
@@ -28,13 +28,13 @@
 //!
 //! ## Trigger
 //!
-//! - Token threshold: **110K** by default (leaves ~8.5K headroom for the
-//!   briefing turn plus next-turn growth before crossing the 128K elbow).
+//! - Token threshold: **768K** by default (~75% of the 1M window). Soft seams
+//!   at 192K/384K/576K (layered context manager, #159) handle intermediate
+//!   thresholds. The hard cycle only fires near the wall.
 //! - Phase guard: callers only invoke `should_advance_cycle` at clean turn
 //!   boundaries (no in-flight tool, no streaming, no approval modal).
 //! - Per-model overrides: `[cycle.per_model]` in config.toml lets operators
-//!   tune the threshold separately for `deepseek-v4-pro` vs. `-flash` if
-//!   their workloads have different briefing costs.
+//!   tune the threshold separately for `deepseek-v4-pro` vs. `-flash`.

 use std::collections::HashMap;
 use std::fs::{File, OpenOptions};
@@ -56,10 +56,12 @@ use crate::working_set::WorkingSet;
 /// JSONL header record emitted as the first line of an archived cycle file.
 const CYCLE_ARCHIVE_SCHEMA_VERSION: u32 = 1;

-/// Default token threshold at which a cycle boundary fires. Set below the V4
-/// 128K retrieval elbow to leave room for the briefing turn (≤3K tokens) plus
-/// the next user turn before the next boundary.
-pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 110_000;
+/// Default token threshold at which a cycle boundary fires.
+///
+/// Bumped from 110K (pre-#159) to 768K (~75% of 1M window) in v0.7.2.
+/// The layered context manager (#159) handles intermediate thresholds via
+/// soft seams at 192K/384K/576K, so the hard cycle only fires near the wall.
+pub const DEFAULT_CYCLE_THRESHOLD_TOKENS: usize = 768_000;

 /// Default cap on the model-curated briefing block.
 pub const DEFAULT_BRIEFING_MAX_TOKENS: usize = 3_000;
@@ -758,10 +760,10 @@ mod tests {
    #[test]
    fn should_advance_combines_input_and_output() {
        let cfg = CycleConfig::default();
-        // 60k + 60k = 120k > 110k threshold
+        // 400K + 400K = 800K > 768K threshold
        assert!(should_advance_cycle(
-            60_000,
-            60_000,
+            400_000,
+            400_000,
            "deepseek-v4-pro",
            &cfg,
            false
@@ -1145,6 +1145,14 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
                    "NOVITA_API_KEY",
                    "deepseek auth set --provider novita --api-key \"...\"",
                ),
+                crate::config::ApiProvider::Fireworks => (
+                    "FIREWORKS_API_KEY",
+                    "deepseek auth set --provider fireworks --api-key \"...\"",
+                ),
+                crate::config::ApiProvider::Sglang => (
+                    "SGLANG_API_KEY",
+                    "deepseek auth set --provider sglang --api-key \"...\"",
+                ),
                crate::config::ApiProvider::Deepseek => {
                    ("DEEPSEEK_API_KEY", "deepseek login --api-key \"...\"")
                }
@@ -1156,6 +1164,8 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
                    crate::config::ApiProvider::NvidiaNim => "nvidia_nim",
                    crate::config::ApiProvider::Openrouter => "openrouter",
                    crate::config::ApiProvider::Novita => "novita",
+                    crate::config::ApiProvider::Fireworks => "fireworks",
+                    crate::config::ApiProvider::Sglang => "sglang",
                    crate::config::ApiProvider::Deepseek => "deepseek",
                }
            );
@@ -20,11 +20,19 @@ The user can see their own message. Use the first line to show forward motion.

 You are a "managed genius" — you excel at individual tasks, but your superpower is decomposing complex work. **Always decompose before you act.** A few minutes spent planning saves many minutes of thrashing.

+Use three decomposition patterns from the V4 paper (arXiv:2512.24601), selected by task scope:
+
+**PREVIEW** — Before diving into a large task, survey the terrain. Scan directory structure (`list_dir`), file headers, module trees. Identify problem boundaries and estimate complexity. A 30-second preview prevents hours of wrong-path exploration.
+
+**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_swarm`), then synthesize findings into a coherent whole. Track chunks with `todo_write`.
+
+**RECURSIVE** — When sub-tasks reveal sub-problems: decompose recursively until each leaf is tractable. Maintain the task tree via `update_plan` (strategy) layered above `todo_write` (leaf tasks). Propagate findings upward when sub-problems resolve.
+
 Your default workflow for any non-trivial request:
 1. **`todo_write`** — break the work into concrete, verifiable tasks. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing.
 2. **Execute** — work through each todo, updating status as you go.
 3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `todo_write` (granular steps).
-4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking.
+4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn.
 5. **For long inputs that don't fit in your context** (whole files, transcripts, multi-doc corpora) or when you need recursive sub-LLM work, use `rlm` — it loads the input into a Python REPL as `context` and runs sub-LLM calls there so the long string never enters your window.
 6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context.

@@ -35,6 +43,34 @@ You have a 1 M-token context window. When usage creeps above ~80%, suggest `/c

 Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`) before final answers. These are invisible to the user but count against context. Cost/token estimates are approximate; treat them as a rough guide.

+## Your V4 Characteristics
+
+You run on V4 architecture. Understanding the internals helps you self-manage:
+
+**Degradation curve.** Retrieval quality holds well to ~256K tokens, then degrades rapidly. Keep your active working set below ~256K. Older verbatim messages persist but are harder to retrieve accurately — treat `<archived_context>` seams as navigational markers, not a working-memory substitute.
+
+**Prefix cache economics.** V4 caches shared prefixes at 128-token granularity with ~90% cost discount. Prefer appending to existing messages over mutating old ones — deletion or replacement breaks the cache and increases cost. Structure output to maximize prefix reuse across turns.
+
+**Thinking token strategy.** Thinking tokens count against context and replay across turns (the `reasoning_content` rule). Use them strategically: skip for lookups, light for simple code generation, deep for architecture and debugging. Cache conclusions in concise inline summaries rather than re-deriving each turn.
+
+**Parallel execution.** Batch independent reads, searches, and greps into a single turn. Never serialize operations that can run concurrently — parallel tool calls share the same turn and finish faster.
+
+## Thinking Budget
+
+Match thinking depth to task complexity. Overthinking wastes tokens; underthinking causes rework.
+
+| Task type | Thinking depth | Rationale |
+|-----------|---------------|-----------|
+| Simple factual lookup (read, search) | Skip | Answer is immediate |
+| Tool output interpretation | Light | Verify result matches intent |
+| Code generation (single function) | Light | Pattern-matching |
+| Multi-file refactor | Medium | Cross-file dependencies |
+| Debugging (error to root cause) | Deep | Hypothesis generation |
+| Architecture design | Deep | Trade-offs, constraints |
+| Security review | Deep | Adversarial reasoning |
+
+When context is deep (past a soft seam): cache reasoning conclusions in concise inline summaries, reference prior conclusions rather than re-deriving, and remember that thinking tokens in the verbatim window survive compaction. Think once, reference many times.
+
 ## Toolbox (fast reference — tool descriptions are authoritative)

 - **Planning / tracking**: `update_plan` (high-level strategy), `todo_write` (granular task list — use this first), `todo_add` / `todo_update` / `todo_list` (legacy single-item ops), `note` (persistent memory).
@@ -95,13 +95,19 @@ pub struct SeamMetadata {
    /// Which level (1, 2, or 3).
    pub level: u8,
    /// Message range covered (inclusive-exclusive indices).
+    /// Reserved for future diagnostic use.
+    #[allow(dead_code)]
    pub start_idx: usize,
+    #[allow(dead_code)]
    pub end_idx: usize,
    /// Approximate token count of the summary.
+    #[allow(dead_code)]
    pub token_estimate: usize,
    /// When the seam was produced.
+    #[allow(dead_code)]
    pub timestamp: DateTime<Utc>,
    /// Model that produced it.
+    #[allow(dead_code)]
    pub model: String,
 }

@@ -162,7 +168,11 @@ impl SeamManager {
    }

    /// Check whether the hard cycle boundary is crossed.
+    ///
+    /// Note: not currently called — cycle detection uses an inline check.
+    /// Kept as the canonical boundary definition for future wiring.
    #[must_use]
+    #[allow(dead_code)]
    pub fn should_cycle(&self, cumulative_tokens: usize) -> bool {
        self.config.enabled && cumulative_tokens >= self.config.cycle_threshold
    }
@@ -542,10 +552,10 @@ impl SeamManager {
        for msg in messages {
            if msg.role == "assistant" {
                for block in &msg.content {
-                    if let ContentBlock::Text { text, .. } = block {
-                        if text.contains("<archived_context") {
-                            texts.push(text.clone());
-                        }
+                    if let ContentBlock::Text { text, .. } = block
+                        && text.contains("<archived_context")
+                    {
+                        texts.push(text.clone());
                    }
                }
            }
@@ -602,7 +612,7 @@ mod tests {
    fn cycle_threshold_check() {
        let config = SeamConfig::default();
        assert!(768_000 >= config.cycle_threshold);
-        assert!(!(700_000 >= config.cycle_threshold));
+        assert!(700_000 < config.cycle_threshold);
    }

    #[test]
@@ -263,7 +263,7 @@ impl ToolSpec for EditFileTool {
    }

    fn description(&self) -> &'static str {
-        "Replace text in a file using search/replace."
+        "Replace text in a file using search/replace. Required: 'path' (file to edit), 'search' (exact text to find), 'replace' (text to substitute)."
    }

    fn input_schema(&self) -> Value {
@@ -603,6 +603,39 @@ mod tests {
        assert!(err.to_string().contains("not found"));
    }

+    /// #157 — When the model uses `replacement` instead of `replace`,
+    /// the error should name the provided fields so the model can
+    /// self-correct without a second round-trip.
+    #[tokio::test]
+    async fn test_edit_file_wrong_param_name_shows_provided_fields() {
+        let tmp = tempdir().expect("tempdir");
+        let ctx = ToolContext::new(tmp.path().to_path_buf());
+
+        let test_file = tmp.path().join("test.txt");
+        fs::write(&test_file, "hello world").expect("write");
+
+        let tool = EditFileTool;
+        // Model uses `replacement` instead of `replace`.
+        let result = tool
+            .execute(
+                json!({"path": "test.txt", "search": "hello", "replacement": "hi"}),
+                &ctx,
+            )
+            .await;
+
+        assert!(result.is_err());
+        let err = result.unwrap_err().to_string();
+        // The error must name both the missing field AND the provided ones.
+        assert!(
+            err.contains("missing required field 'replace'"),
+            "error must name the missing field: {err}"
+        );
+        assert!(
+            err.contains("Input provided:") || err.contains("provided:"),
+            "error must list the fields the model did supply: {err}"
+        );
+    }
+
    #[tokio::test]
    async fn test_list_dir_tool() {
        let tmp = tempdir().expect("tempdir");
@@ -58,6 +58,15 @@ pub enum MailboxMessage {
    Failed { agent_id: String, error: String },
    /// Cancellation propagated to this agent.
    Cancelled { agent_id: String },
+    /// Incremental token usage from a sub-agent's API call.
+    /// Published after each turn so the parent's cost counter updates live.
+    TokenUsage {
+        agent_id: String,
+        /// Prompt tokens consumed (input, including cached).
+        prompt_tokens: u32,
+        /// Completion tokens consumed (output).
+        completion_tokens: u32,
+    },
 }

 impl MailboxMessage {
@@ -72,7 +81,8 @@ impl MailboxMessage {
            | Self::ToolCallCompleted { agent_id, .. }
            | Self::Completed { agent_id, .. }
            | Self::Failed { agent_id, .. }
-            | Self::Cancelled { agent_id } => agent_id,
+            | Self::Cancelled { agent_id }
+            | Self::TokenUsage { agent_id, .. } => agent_id,
            Self::ChildSpawned { child_id, .. } => child_id,
        }
    }
@@ -90,6 +100,18 @@ impl MailboxMessage {
            status: status.into(),
        }
    }
+
+    pub(crate) fn token_usage(
+        agent_id: impl Into<String>,
+        prompt_tokens: u32,
+        completion_tokens: u32,
+    ) -> Self {
+        Self::TokenUsage {
+            agent_id: agent_id.into(),
+            prompt_tokens,
+            completion_tokens,
+        }
+    }
 }

 /// One delivery: a sequence number plus the message. The sequence is
@@ -434,6 +456,14 @@ mod tests {
                },
                "a8",
            ),
+            (
+                MailboxMessage::TokenUsage {
+                    agent_id: "a9".into(),
+                    prompt_tokens: 100,
+                    completion_tokens: 50,
+                },
+                "a9",
+            ),
        ];
        for (msg, expected) in cases {
            assert_eq!(msg.agent_id(), expected, "extract failed for {msg:?}");
@@ -2725,6 +2725,16 @@ async fn run_subagent(
        };

        let mut tool_uses = Vec::new();
+
+        // Report token usage so the parent's cost counter updates live.
+        if let Some(mb) = runtime.mailbox.as_ref() {
+            let _ = mb.send(MailboxMessage::token_usage(
+                &agent_id,
+                response.usage.input_tokens,
+                response.usage.output_tokens,
+            ));
+        }
+
        for block in &response.content {
            match block {
                ContentBlock::Text { text, .. } if !text.trim().is_empty() => {
@@ -520,6 +520,8 @@ pub struct App {
    pub tool_log: Vec<String>,
    /// Session cost tracking
    pub session_cost: f64,
+    /// Running cost from active sub-agents (updated live via mailbox).
+    pub subagent_cost: f64,
    /// Active skill to apply to next user message
    pub active_skill: Option<String>,
    /// Tool call cells by tool id (for cells already finalized in `history`).
@@ -923,6 +925,7 @@ impl App {
            todos: new_shared_todo_list(),
            tool_log: Vec::new(),
            session_cost: 0.0,
+            subagent_cost: 0.0,
            active_skill: None,
            tool_cells: HashMap::new(),
            tool_details_by_cell: HashMap::new(),
@@ -97,6 +97,25 @@ pub enum HistoryCell {
        streaming: bool,
        duration_secs: Option<f32>,
    },
+    /// An `<archived_context>` seam block produced by the Flash seam manager
+    /// (issue #159). Rendered dimmed/italic with a level + range label so
+    /// the user can see at a glance where context seams exist.
+    ArchivedContext {
+        /// Seam level (1, 2, 3, or 0 for cycle-level).
+        level: u8,
+        /// Message range covered (e.g. "msg 0-128").
+        range: String,
+        /// Token estimate string (e.g. "~2500").
+        tokens: String,
+        /// Density label (e.g. "~2,500 tokens").
+        density: String,
+        /// Model that produced the summary.
+        model: String,
+        /// RFC 3339 timestamp.
+        timestamp: String,
+        /// The summary text content.
+        summary: String,
+    },
    Tool(ToolCell),
    /// Live in-transcript card for sub-agent activity (issue #128). Owns
    /// either a single `DelegateCard` or a multi-worker `FanoutCard`; the
@@ -188,6 +207,9 @@ impl HistoryCell {
            } => render_thinking(content, width, *streaming, *duration_secs, false, false),
            HistoryCell::Tool(cell) => cell.lines_with_motion(width, false),
            HistoryCell::SubAgent(cell) => cell.lines(width),
+            HistoryCell::ArchivedContext { .. } => {
+                render_archived_context(self, width, false)
+            }
        }
    }

@@ -249,6 +271,9 @@ impl HistoryCell {
            ),
            HistoryCell::System { .. } | HistoryCell::Error { .. } => self.lines(width),
            HistoryCell::SubAgent(cell) => cell.lines(width),
+            HistoryCell::ArchivedContext { .. } => {
+                render_archived_context(self, width, options.low_motion)
+            }
        }
    }

@@ -293,6 +318,9 @@ impl HistoryCell {
            ),
            HistoryCell::Tool(cell) => cell.transcript_lines(width),
            HistoryCell::SubAgent(cell) => cell.lines(width),
+            HistoryCell::ArchivedContext { .. } => {
+                render_archived_context(self, width, true)
+            }
        }
    }

@@ -317,6 +345,172 @@ impl HistoryCell {
    }
 }

+/// Parse an `<archived_context>` block from an assistant Text block.
+///
+/// Returns `Some(HistoryCell::ArchivedContext)` when the text contains a
+/// well-formed `<archived_context>...</archived_context>` block, or `None`
+/// if the text is regular assistant content.
+fn parse_archived_context(text: &str) -> Option<HistoryCell> {
+    let text = text.trim();
+    if !text.starts_with("<archived_context") || !text.ends_with("</archived_context>") {
+        return None;
+    }
+
+    let tag_end = text.find('>')?;
+    let tag = &text[..tag_end];
+
+    let level = tag
+        .split(' ')
+        .find(|part| part.starts_with("level="))
+        .and_then(|part| part.split('"').nth(1))
+        .and_then(|v| v.parse::<u8>().ok())
+        .unwrap_or(0);
+
+    let range = tag
+        .split(' ')
+        .find(|part| part.starts_with("range="))
+        .and_then(|part| part.split('"').nth(1))
+        .unwrap_or("")
+        .to_string();
+
+    let tokens = tag
+        .split(' ')
+        .find(|part| part.starts_with("tokens="))
+        .and_then(|part| part.split('"').nth(1))
+        .unwrap_or("")
+        .to_string();
+
+    let density = tag
+        .split(' ')
+        .find(|part| part.starts_with("density="))
+        .and_then(|part| part.split('"').nth(1))
+        .unwrap_or("")
+        .to_string();
+
+    let model = tag
+        .split(' ')
+        .find(|part| part.starts_with("model="))
+        .and_then(|part| part.split('"').nth(1))
+        .unwrap_or("")
+        .to_string();
+
+    let timestamp = tag
+        .split(' ')
+        .find(|part| part.starts_with("timestamp="))
+        .and_then(|part| part.split('"').nth(1))
+        .unwrap_or("")
+        .to_string();
+
+    let close_tag = text.rfind("</archived_context>")?;
+    let summary_start = tag_end + 1;
+    let summary = text[summary_start..close_tag].trim().to_string();
+
+    Some(HistoryCell::ArchivedContext {
+        level,
+        range,
+        tokens,
+        density,
+        model,
+        timestamp,
+        summary,
+    })
+}
+
+/// Render an `<archived_context>` block with dimmed/italic styling.
+fn render_archived_context(cell: &HistoryCell, width: u16, _low_motion: bool) -> Vec<Line<'static>> {
+    let HistoryCell::ArchivedContext {
+        level,
+        range,
+        tokens,
+        density,
+        model,
+        timestamp,
+        summary,
+    } = cell
+    else {
+        return Vec::new();
+    };
+
+    let body = if summary.is_empty() {
+        "(no summary)".to_string()
+    } else {
+        summary.clone()
+    };
+
+    let label = format!("Context L{level}");
+    let label_style = Style::default()
+        .fg(palette::TEXT_DIM)
+        .add_modifier(Modifier::BOLD);
+    let body_style = Style::default()
+        .fg(palette::TEXT_DIM)
+        .italic();
+
+    let content_width = width.saturating_sub(4).max(1);
+
+    let mut lines = Vec::new();
+
+    let range_display = if range.is_empty() {
+        String::new()
+    } else {
+        range.to_string()
+    };
+    let mut header = format!("{label}  {range_display}");
+    if !tokens.is_empty() {
+        header.push_str(&format!("  {tokens}"));
+    }
+    if !density.is_empty() && density != tokens {
+        header.push_str(&format!("  {density}"));
+    }
+    lines.push(Line::from(Span::styled(header, label_style)));
+
+    let model_display = if model.is_empty() {
+        String::new()
+    } else {
+        format!("via {model}")
+    };
+    let ts_display = if timestamp.is_empty() {
+        String::new()
+    } else {
+        timestamp.clone()
+    };
+    let mut sub = String::new();
+    if !model_display.is_empty() {
+        sub.push_str(&model_display);
+    }
+    if !ts_display.is_empty() {
+        if !sub.is_empty() {
+            sub.push_str(" · ");
+        }
+        sub.push_str(&ts_display);
+    }
+    if !sub.is_empty() {
+        lines.push(Line::from(Span::styled(
+            sub,
+            Style::default().fg(palette::TEXT_MUTED),
+        )));
+    }
+
+    let rendered = crate::tui::markdown_render::render_markdown(&body, content_width, body_style);
+    for (idx, line) in rendered.into_iter().enumerate() {
+        if idx == 0 {
+            let mut spans = vec![Span::styled(
+                "▏ ",
+                Style::default().fg(palette::TEXT_DIM),
+            )];
+            spans.extend(line.spans);
+            lines.push(Line::from(spans));
+        } else {
+            let mut spans = vec![Span::raw("  ")];
+            spans.extend(line.spans);
+            lines.push(Line::from(spans));
+        }
+    }
+
+    lines.push(Line::from(""));
+
+    lines
+}
+
 /// Convert a message into history cells for rendering.
 #[must_use]
 pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
@@ -324,7 +518,15 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {

    for block in &msg.content {
        match block {
-            ContentBlock::Text { text, .. } => match msg.role.as_str() {
+            ContentBlock::Text { text, .. } => {
+                // Check if this is an `<archived_context>` block.
+                if msg.role == "assistant"
+                    && let Some(archived) = parse_archived_context(text)
+                {
+                    cells.push(archived);
+                    continue;
+                }
+                match msg.role.as_str() {
                "user" => {
                    if let Some(HistoryCell::User { content }) = cells.last_mut() {
                        if !content.is_empty() {
@@ -363,6 +565,7 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
                    }
                }
                _ => {}
+            }
            },
            ContentBlock::Thinking { thinking } => {
                if let Some(HistoryCell::Thinking { content, .. }) = cells.last_mut() {
@@ -90,6 +90,8 @@ impl ProviderPickerView {
            ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
            ApiProvider::Openrouter => "OPENROUTER_API_KEY",
            ApiProvider::Novita => "NOVITA_API_KEY",
+            ApiProvider::Fireworks => "FIREWORKS_API_KEY",
+            ApiProvider::Sglang => "SGLANG_API_KEY",
        }
    }

@@ -339,7 +341,7 @@ mod tests {
    }

    #[test]
-    fn picker_lists_all_four_providers() {
+    fn picker_lists_all_six_providers() {
        let config = Config::default();
        let picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
        let names: Vec<_> = picker
@@ -349,7 +351,7 @@ mod tests {
            .collect();
        assert_eq!(
            names,
-            vec!["DeepSeek", "NVIDIA NIM", "OpenRouter", "Novita AI"]
+            vec!["DeepSeek", "NVIDIA NIM", "OpenRouter", "Novita AI", "Fireworks AI", "SGLang"]
        );
    }

@@ -184,6 +184,7 @@ impl TranscriptViewCache {
                            | HistoryCell::Error { .. }
                            | HistoryCell::Tool(_)
                            | HistoryCell::SubAgent(_)
+                            | HistoryCell::ArchivedContext { .. }
                    ),
                    is_tool_groupable,
                });
@@ -3408,6 +3408,8 @@ fn render(f: &mut Frame, app: &mut App) {
            crate::config::ApiProvider::NvidiaNim => Some("NIM"),
            crate::config::ApiProvider::Openrouter => Some("OR"),
            crate::config::ApiProvider::Novita => Some("Novita"),
+            crate::config::ApiProvider::Fireworks => Some("Fireworks"),
+            crate::config::ApiProvider::Sglang => Some("SGLang"),
        };
        let header_data = HeaderData::new(
            app.mode,
@@ -3965,6 +3967,8 @@ async fn apply_provider_picker_api_key(
            ApiProvider::NvidiaNim => &mut providers.nvidia_nim,
            ApiProvider::Openrouter => &mut providers.openrouter,
            ApiProvider::Novita => &mut providers.novita,
+            ApiProvider::Fireworks => &mut providers.fireworks,
+            ApiProvider::Sglang => &mut providers.sglang,
        };
        entry.api_key = Some(api_key);
    }
@@ -4277,7 +4281,7 @@ fn render_footer(f: &mut Frame, area: Rect, app: &mut App) {
        // `working...` pulse stays even in low-motion mode so the user still
        // sees that something is happening.
        if !app.low_motion {
-            let strip_frame = now_ms / 150;
+            let strip_frame = now_ms;
            props.working_strip_frame = Some(strip_frame);
        }
    } else if props.state_label == "ready"
@@ -4482,9 +4486,9 @@ fn render_footer_from(
    } else {
        Vec::new()
    };
-    let cost = if has(S::Cost) && app.session_cost > 0.001 {
+    let cost = if has(S::Cost) && app.session_cost + app.subagent_cost > 0.001 {
        vec![Span::styled(
-            format!("${:.2}", app.session_cost),
+            format!("${:.2}", app.session_cost + app.subagent_cost),
            Style::default().fg(palette::TEXT_MUTED),
        )]
    } else {
@@ -4576,9 +4580,9 @@ fn footer_auxiliary_spans(app: &App, max_width: usize) -> Vec<Span<'static>> {
    let agents_spans = crate::tui::widgets::footer_agents_chip(running_agent_count(app));
    let replay_spans = footer_reasoning_replay_spans(app);
    let cache_spans = footer_cache_spans(app);
-    let cost_spans = if app.session_cost > 0.001 {
+    let cost_spans = if app.session_cost + app.subagent_cost > 0.001 {
        vec![Span::styled(
-            format!("${:.2}", app.session_cost),
+            format!("${:.2}", app.session_cost + app.subagent_cost),
            Style::default().fg(palette::TEXT_MUTED),
        )]
    } else {
@@ -4633,7 +4637,11 @@ fn footer_cache_spans(app: &App) -> Vec<Span<'static>> {
    let Some(hit_tokens) = app.last_prompt_cache_hit_tokens else {
        return Vec::new();
    };
-    let miss_tokens = app.last_prompt_cache_miss_tokens.unwrap_or(0);
+    let miss_tokens = app.last_prompt_cache_miss_tokens.unwrap_or_else(|| {
+        app.last_prompt_tokens
+            .unwrap_or(0)
+            .saturating_sub(hit_tokens)
+    });
    let total = hit_tokens.saturating_add(miss_tokens);
    if total == 0 {
        return Vec::new();
@@ -5329,6 +5337,7 @@ fn open_tool_details_pager(app: &mut App) -> bool {
        HistoryCell::Thinking { .. } => "Reasoning".to_string(),
        HistoryCell::Tool(_) => "Message".to_string(),
        HistoryCell::SubAgent(_) => "Sub-agent".to_string(),
+        HistoryCell::ArchivedContext { .. } => "Archived Context".to_string(),
    };
    let width = app
        .last_transcript_area
@@ -5492,6 +5501,21 @@ fn handle_subagent_mailbox(app: &mut App, _seq: u64, message: &MailboxMessage) {
        DelegateCard, FanoutCard, apply_to_delegate, apply_to_fanout,
    };

+    // Accumulate sub-agent token costs for the real-time footer counter (#166).
+    if let MailboxMessage::TokenUsage {
+        prompt_tokens,
+        completion_tokens,
+        ..
+    } = message
+    {
+        if let Some(cost) =
+            crate::pricing::calculate_turn_cost(&app.model, *prompt_tokens, *completion_tokens)
+        {
+            app.subagent_cost += cost;
+        }
+        return; // No card visual change needed; the footer handles display.
+    }
+
    // Resolve (or allocate) the target cell for this envelope. ChildSpawned
    // is special — it always belongs to the active fanout card if one
    // exists; otherwise it seeds a new one.
@@ -387,6 +387,12 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool
            // to a sibling fanout card, not this one.
            return false;
        }
+        MailboxMessage::TokenUsage { .. } => {
+            // Cost accumulation happens in handle_subagent_mailbox (ui.rs)
+            // before this apply function is called; TokenUsage never reaches
+            // this arm in practice.
+            return false;
+        }
    }
    true
 }
@@ -421,6 +427,12 @@ pub fn apply_to_fanout(card: &mut FanoutCard, msg: &MailboxMessage) -> bool {
            card.upsert_worker(child_id, AgentLifecycle::Pending);
            true
        }
+        MailboxMessage::TokenUsage { .. } => {
+            // Cost accumulation happens in handle_subagent_mailbox (ui.rs)
+            // before this apply function is called; TokenUsage never reaches
+            // this arm in practice.
+            true
+        }
    }
 }

@@ -59,15 +59,16 @@ pub struct FooterProps {
 }

 /// One frame of the footer's water-spout animation. `col` is the cell index
-/// inside the strip, `width` the strip's total width, `frame` the discrete
-/// 150 ms tick counter. Returns the glyph that should appear in that cell on
+/// inside the strip, `width` the strip's total width, `frame` the raw
+/// millisecond counter. Returns the glyph that should appear in that cell on
 /// that frame.
 ///
 /// Visual: two crests sweep across a calm water surface (`─`). The opener
-/// `⌒` rises, then a soft `‿` trails behind. Crest A advances every 4 ticks
-/// (~600 ms), crest B every 6 ticks (~900 ms) — independent speeds give the
-/// criss-cross fountain feel. Every 17 ticks (~2.5 s) the phase of crest B
-/// jitters by one column so the pattern never settles into a strict beat.
+/// `⌒` rises, then a soft `‿` trails behind. Crest A advances one column
+/// every ~600 ms (4 × 150 ms), crest B every ~900 ms (6 × 150 ms) —
+/// independent speeds give the criss-cross fountain feel. The positions
+/// are computed from `frame / 150.0` (fractional) so crests slide smoothly
+/// rather than jumping in discrete 150 ms steps.
 ///
 /// All math is pure given (col, width, frame) so unit tests can pin frames.
 #[must_use]
@@ -76,17 +77,22 @@ pub fn footer_working_strip_glyph_at(col: usize, width: usize, frame: u64) -> ch
        return ' ';
    }

+    // Number of 150 ms ticks since epoch — fractional so crests move
+    // continuously rather than teleporting every 4-6 ticks.
+    let frame_f = frame as f64 / 150.0;
+
    // Crest is two glyphs wide: the leading `⌒` followed by a trailing `‿`.
    const CREST_SPAN: i64 = 2;
    // Cycle wide enough that each crest enters and exits cleanly.
    let cycle = (width as i64).max(CREST_SPAN) + CREST_SPAN * 2;
-    let frame_i = frame as i64;
-    // Crest A advances one column every 4 ticks; B every 6.
-    let pos_a = frame_i.div_euclid(4).rem_euclid(cycle) - CREST_SPAN;
-    // Phase jitter: every 17 ticks, nudge B by one column so the two crests
-    // never lock into a fixed offset.
-    let jitter = frame_i.div_euclid(17).rem_euclid(3);
-    let pos_b = (frame_i.div_euclid(6) + jitter + (cycle / 3) + 5).rem_euclid(cycle) - CREST_SPAN;
+    // Crest A advances one column every ~300 ms (2 × 150 ms ticks).
+    let pos_a = (frame_f / 2.0).round() as i64 % cycle - CREST_SPAN;
+    // Phase jitter: every ~2.5 s (17 ticks), nudge B by one column so the
+    // two crests never lock into a fixed offset.
+    let jitter = (frame_f / 17.0).round() as i64 % 3;
+    // Crest B advances one column every ~450 ms (3 × 150 ms ticks).
+    let pos_b =
+        ((frame_f / 3.0).round() as i64 + jitter + (cycle / 3) + 5).rem_euclid(cycle) - CREST_SPAN;

    crest_glyph_for(col as i64, pos_a)
        .or_else(|| crest_glyph_for(col as i64, pos_b))
@@ -687,16 +693,16 @@ mod tests {

    #[test]
    fn working_strip_glyph_is_deterministic_per_frame() {
-        // Same (col, width, frame) → same glyph. Stepping by one full
-        // crest-A tick (4 ticks ≈ 600 ms) is the minimum guaranteed
-        // animation step.
-        let a = super::footer_working_strip_string(40, 1);
-        let b = super::footer_working_strip_string(40, 1);
+        // Same (col, width, frame) → same glyph. Frames are now raw
+        // milliseconds; 150 ms apart represents one tick.
+        let a = super::footer_working_strip_string(40, 150);
+        let b = super::footer_working_strip_string(40, 150);
        assert_eq!(a, b, "deterministic given the same frame");
-        let c = super::footer_working_strip_string(40, 5);
+        // 750 ms → 5 ticks, crest A advances every 2 ticks → ≥2 steps.
+        let c = super::footer_working_strip_string(40, 750);
        assert_ne!(
            a, c,
-            "advancing one full crest-A step must change the strip",
+            "advancing 4 ticks must change the strip",
        );
    }

@@ -713,7 +719,7 @@ mod tests {
        FooterWidget::new(props.clone()).render(area, &mut buf);
        let idle: String = (0..area.width).map(|x| buf[(x, 0)].symbol()).collect();

-        props.working_strip_frame = Some(13);
+        props.working_strip_frame = Some(600);
        let mut buf2 = ratatui::buffer::Buffer::empty(area);
        FooterWidget::new(props).render(area, &mut buf2);
        let active: String = (0..area.width).map(|x| buf2[(x, 0)].symbol()).collect();
@@ -732,12 +738,11 @@ mod tests {

    #[test]
    fn working_strip_advances_position_within_full_crest_step() {
-        // Crest A advances one column every 4 ticks; B every 6. Stepping by
-        // 12 ticks guarantees both have moved at least one column,
-        // independent of the jitter cadence (17).
+        // Crest A advances every 2 ticks (300 ms), B every 3 (450 ms).
+        // 900 ms (6 ticks) guarantees crest A has advanced at least 3 columns.
        let width = 60;
        let f0 = super::footer_working_strip_string(width, 0);
-        let f12 = super::footer_working_strip_string(width, 12);
+        let f900 = super::footer_working_strip_string(width, 900);
        // Collect the columns that hold a crest opener `⌒` in each frame.
        let openers = |s: &str| -> Vec<usize> {
            s.chars()
@@ -747,20 +752,20 @@ mod tests {
        };
        assert_ne!(
            openers(&f0),
-            openers(&f12),
-            "crest opener columns must shift across a 12-tick window",
+            openers(&f900),
+            "crest opener columns must shift across a 900ms window",
        );
    }

    #[test]
    fn working_strip_renders_paired_crest_glyphs() {
        // The `⌒‿` pair is the visual centrepiece — a soft rise followed by
-        // a gentle dip. Sweep enough ticks that a crest is guaranteed to
-        // land fully inside a 60-cell strip at some point.
+        // a gentle dip. Sweep enough time (in ms) that a crest is guaranteed
+        // to land fully inside a 60-cell strip at some point.
        let width = 60;
        let mut saw_pair = false;
-        for frame in 0..120 {
-            let s = super::footer_working_strip_string(width, frame);
+        for frame_ms in (0..24_000).step_by(150) {
+            let s = super::footer_working_strip_string(width, frame_ms);
            if s.contains("\u{2312}\u{203F}") {
                saw_pair = true;
                break;
@@ -768,7 +773,7 @@ mod tests {
        }
        assert!(
            saw_pair,
-            "expected `⌒‿` pair somewhere in the first 120 ticks",
+            "expected `⌒‿` pair somewhere in the first 24s of animation",
        );
    }