chore(release): finalize v0.8.13 stabilization

2026-05-05 13:06:09 -05:00
parent 6b0a01d054
commit c4cbd7c19f
34 changed files with 1860 additions and 230 deletions
@@ -5,6 +5,49 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

+## [0.8.13] - 2026-05-05
+
+A stabilization release for DeepSeek V4 runtime and TUI reliability. The
+v0.8.13 milestone was narrowed to direct runtime/TUI fixes; prompt hygiene,
+trajectory logging, Anthropic-wire support, and larger UI cleanup were moved
+out of this release.
+
+### Added
+- **No-LLM tool-result prune before compaction** (#710) — old verbose tool
+  results are mechanically summarized before the paid summary pass. Duplicate
+  reads keep the freshest full body and replace older copies with one-line
+  summaries; if that gets the session back under the compaction threshold, the
+  LLM summary call is skipped entirely.
+- **Repeated-tool anti-loop guard** (#714) — the engine now tracks
+  `(tool_name, args)` pairs per user turn. On the third identical call it
+  inserts a synthetic corrective tool result instead of running the same tool
+  again unchanged; per-tool failures warn at three and halt at eight.
+- **V4 cache-hit telemetry fallback** (#721) — usage parsing now recognizes
+  `usage.prompt_tokens_details.cached_tokens`, so the existing footer cache-hit
+  chip works with DeepSeek V4's automatic prefix-cache telemetry as well as the
+  older explicit hit/miss fields.
+
+### Fixed
+- **Invalid tool-call JSON repair** (#712) — malformed streamed tool arguments
+  now pass through a deterministic repair ladder before dispatch.
+- **Hallucinated tool-name recovery** (#713) — common non-canonical tool names
+  are resolved through the registry before the engine reports a missing tool.
+- **Tool-schema sanitation** (#715) — schemas are normalized before API
+  emission so provider-strict JSON Schema handling does not reject valid tools.
+- **Case-sensitive model IDs** (#717, #729) — valid configured model IDs keep
+  caller-provided case while compact DeepSeek aliases still canonicalize.
+- **Stale `working...` state after failed dispatch** (#738) — if the UI fails
+  to send a message to the engine before a turn starts, the composer loading
+  state is cleared instead of trapping later input in pending state.
+- **Prompt-free doctor key checks** — `deepseek doctor` no longer reads the OS
+  keyring, avoiding macOS Keychain prompts during diagnostics.
+- **macOS Terminal color compatibility** — `xterm-256color` sessions now
+  receive 256-color palette indexes instead of truecolor SGR, preventing
+  Apple Terminal from misrendering whale blues as green/cyan blocks.
+- **Chat client repair after Responses cleanup** — restored the chat client
+  body and regression coverage after removing the dead experimental Responses
+  fallback path.
+
 ## [0.8.11] - 2026-05-04

 ### Changed
@@ -1080,7 +1080,7 @@ dependencies = [

 [[package]]
 name = "deepseek-agent"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "deepseek-config",
 "serde",
@@ -1088,7 +1088,7 @@ dependencies = [

 [[package]]
 name = "deepseek-app-server"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "axum",
@@ -1110,7 +1110,7 @@ dependencies = [

 [[package]]
 name = "deepseek-config"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "deepseek-secrets",
@@ -1122,7 +1122,7 @@ dependencies = [

 [[package]]
 name = "deepseek-core"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1140,7 +1140,7 @@ dependencies = [

 [[package]]
 name = "deepseek-execpolicy"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -1149,7 +1149,7 @@ dependencies = [

 [[package]]
 name = "deepseek-hooks"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -1163,7 +1163,7 @@ dependencies = [

 [[package]]
 name = "deepseek-mcp"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "serde",
@@ -1172,7 +1172,7 @@ dependencies = [

 [[package]]
 name = "deepseek-protocol"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "serde",
 "serde_json",
@@ -1180,7 +1180,7 @@ dependencies = [

 [[package]]
 name = "deepseek-secrets"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "dirs",
 "keyring",
@@ -1193,7 +1193,7 @@ dependencies = [

 [[package]]
 name = "deepseek-state"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1205,7 +1205,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tools"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -1218,7 +1218,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "arboard",
@@ -1277,7 +1277,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-cli"
-version = "0.8.12"
+version = "0.8.13"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1301,7 +1301,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-core"
-version = "0.8.12"
+version = "0.8.13"

 [[package]]
 name = "deranged"
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"

 [workspace.package]
-version = "0.8.12"
+version = "0.8.13"
 edition = "2024"
 # Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the
 # codebase relies on extensively. Cargo enforces this so users on older
@@ -174,24 +174,18 @@ SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model d

 ---

-## What's New In v0.8.12
+## What's New In v0.8.13

-A feature release with 20 community PRs on top of the v0.8.11 cache-maxing foundation. [Full changelog](CHANGELOG.md).
+A stabilization release focused on DeepSeek V4 runtime reliability, tool-call recovery, and TUI truthfulness. [Full changelog](CHANGELOG.md).

- **Reasoning-effort auto mode** — `reasoning_effort = "auto"` picks the right tier from the prompt: debug/error → Max, search/lookup → Low, default → High
- **Bash arity dictionary** — `auto_allow = ["git status"]` matches `git status -s` but not `git push`. Knows git, cargo, npm, docker, kubectl, and more
- **Vim modal editing** — normal/insert mode in the composer with standard Vim keybindings
- **Skill registry sync** — `/skills sync` fetches and installs/updates the community registry
- **FIM edit tool** — surgical code edits via DeepSeek's `/beta` fill-in-the-middle endpoint
- **Large-tool-output routing** — outsized tool results get truncated previews with spillover, protecting parent context
- **Pluggable sandbox backends** — `exec_shell` can route to Alibaba OpenSandbox or other remote backends
- **Layered permission rulesets** — builtin/agent/user priority layers for execpolicy deny/allow rules
- **Cache-aware resident sub-agents** — file content prepended for V4 prefix-cache locality; global lease table
- **Unified slash-command namespace** — user commands with `$1`/`$2`/`$ARGUMENTS` templates
- **Color::Reset migration** — all hardcoded backgrounds replaced with `Color::Reset` for light-terminal support
- **New docs**: SECURITY.md (#648), CODE_OF_CONDUCT.md (#686), zh-Hans locale activation (#652)
-
-*28 community PRs by [@merchloubna70-dot](https://github.com/merchloubna70-dot). First-time contributor [@zichen0116](https://github.com/zichen0116) (#686).*
+- **No-LLM compaction prune** — old verbose tool results are mechanically summarized before any paid summary call; duplicate reads keep only the freshest full body
+- **Repeated-tool anti-loop guard** — the third identical `(tool, args)` call in a turn becomes a corrective tool result instead of another stuck retry
+- **V4 cache-hit footer telemetry** — the status line now understands `usage.prompt_tokens_details.cached_tokens`
+- **Tool-call recovery** — invalid JSON arguments, hallucinated tool names, and strict schema issues are repaired or sanitized before dispatch
+- **Case-sensitive model IDs** — provider-specific model names keep caller-provided case while compact DeepSeek aliases still normalize
+- **Stale busy-state fix** — failed dispatch before turn start clears `working...` so input does not get stuck in pending state
+- **Prompt-free doctor key checks** — diagnostics no longer read the OS keyring
+- **macOS Terminal color compatibility** — `xterm-256color` sessions no longer render whale blues as green/cyan blocks

 ---

@@ -168,24 +168,18 @@ SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model d

 ---

-## v0.8.12 新功能
+## v0.8.13 新功能

-功能发布：在 v0.8.11 缓存优化基础上合并了 20 个社区 PR。[完整更新日志](CHANGELOG.md)。
+稳定性发布：聚焦 DeepSeek V4 运行时可靠性、工具调用恢复和 TUI 状态准确性。[完整更新日志](CHANGELOG.md)。

- **推理强度自动模式** —— `reasoning_effort = "auto"` 根据提示词自动选择档位：debug/error → Max，search/lookup → Low，默认 → High
- **Bash 参数匹配字典** —— `auto_allow = ["git status"]` 匹配 `git status -s` 但不匹配 `git push`。支持 git、cargo、npm、docker、kubectl 等
- **Vim 模态编辑** —— 在输入框中支持 Vim 普通/插入模式切换
- **技能注册表同步** —— `/skills sync` 拉取并安装/更新社区技能注册表
- **FIM 编辑工具** —— 通过 DeepSeek `/beta` 的 fill-in-the-middle 端点进行精确代码编辑
- **大工具输出路由** —— 超大工具结果被截断预览，保护父上下文窗口
- **可插拔沙箱后端** —— `exec_shell` 可路由到 Alibaba OpenSandbox 或其他远程后端
- **分层权限规则** —— builtin/agent/user 三层优先级，deny 永远优先
- **缓存感知常驻子智能体** —— 文件内容预置于系统提示中以利用 V4 前缀缓存；全局租约表
- **统一斜杠命令命名空间** —— 用户命令支持 `$1`/`$2`/`$ARGUMENTS` 模板
- **Color::Reset 迁移** —— 所有硬编码背景替换为 `Color::Reset`，适配浅色终端
- **新文档**：SECURITY.md (#648)、CODE_OF_CONDUCT.md (#686)、zh-Hans 语言激活 (#652)
-
-**28 个社区 PR 由 [@merchloubna70-dot](https://github.com/merchloubna70-dot) 贡献。首次贡献者 [@zichen0116](https://github.com/zichen0116) (#686)。**
+- **无需 LLM 的压缩预剪枝** —— 付费摘要前先机械压缩旧的大型工具结果；重复读取只保留最新完整内容
+- **重复工具调用防循环** —— 同一轮内第三次完全相同的 `(tool, args)` 会变成纠正性工具结果，而不是继续卡住重试
+- **V4 缓存命中率状态栏** —— 状态栏现在识别 `usage.prompt_tokens_details.cached_tokens`
+- **工具调用恢复** —— 无效 JSON 参数、幻觉工具名和严格 schema 问题会在分发前修复或清理
+- **区分大小写的模型 ID** —— 第三方 provider 的模型名保留用户输入大小写，同时继续规范化紧凑 DeepSeek 别名
+- **忙碌状态修复** —— 如果 turn 开始前分发失败，会清除 `working...`，避免后续输入一直进入 pending
+- **不会弹出 Keychain 的 doctor 密钥检查** —— 诊断流程不再读取 OS keyring
+- **macOS Terminal 颜色兼容** —— `xterm-256color` 会使用 256 色索引，避免鲸蓝主题被渲染成绿色/青色块

 ---

@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"

 [dependencies]
-deepseek-config = { path = "../config", version = "0.8.12" }
+deepseek-config = { path = "../config", version = "0.8.13" }
 serde.workspace = true
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.12" }
-deepseek-config = { path = "../config", version = "0.8.12" }
-deepseek-core = { path = "../core", version = "0.8.12" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
-deepseek-hooks = { path = "../hooks", version = "0.8.12" }
-deepseek-mcp = { path = "../mcp", version = "0.8.12" }
-deepseek-protocol = { path = "../protocol", version = "0.8.12" }
-deepseek-state = { path = "../state", version = "0.8.12" }
-deepseek-tools = { path = "../tools", version = "0.8.12" }
+deepseek-agent = { path = "../agent", version = "0.8.13" }
+deepseek-config = { path = "../config", version = "0.8.13" }
+deepseek-core = { path = "../core", version = "0.8.13" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
+deepseek-hooks = { path = "../hooks", version = "0.8.13" }
+deepseek-mcp = { path = "../mcp", version = "0.8.13" }
+deepseek-protocol = { path = "../protocol", version = "0.8.13" }
+deepseek-state = { path = "../state", version = "0.8.13" }
+deepseek-tools = { path = "../tools", version = "0.8.13" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -14,13 +14,13 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.12" }
-deepseek-app-server = { path = "../app-server", version = "0.8.12" }
-deepseek-config = { path = "../config", version = "0.8.12" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
-deepseek-mcp = { path = "../mcp", version = "0.8.12" }
-deepseek-secrets = { path = "../secrets", version = "0.8.12" }
-deepseek-state = { path = "../state", version = "0.8.12" }
+deepseek-agent = { path = "../agent", version = "0.8.13" }
+deepseek-app-server = { path = "../app-server", version = "0.8.13" }
+deepseek-config = { path = "../config", version = "0.8.13" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
+deepseek-mcp = { path = "../mcp", version = "0.8.13" }
+deepseek-secrets = { path = "../secrets", version = "0.8.13" }
+deepseek-state = { path = "../state", version = "0.8.13" }
 chrono.workspace = true
 dirs.workspace = true
 serde.workspace = true
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite

 [dependencies]
 anyhow.workspace = true
-deepseek-secrets = { path = "../secrets", version = "0.8.12" }
+deepseek-secrets = { path = "../secrets", version = "0.8.13" }
 dirs.workspace = true
 serde.workspace = true
 toml.workspace = true
@@ -9,13 +9,13 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.12" }
-deepseek-config = { path = "../config", version = "0.8.12" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
-deepseek-hooks = { path = "../hooks", version = "0.8.12" }
-deepseek-mcp = { path = "../mcp", version = "0.8.12" }
-deepseek-protocol = { path = "../protocol", version = "0.8.12" }
-deepseek-state = { path = "../state", version = "0.8.12" }
-deepseek-tools = { path = "../tools", version = "0.8.12" }
+deepseek-agent = { path = "../agent", version = "0.8.13" }
+deepseek-config = { path = "../config", version = "0.8.13" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
+deepseek-hooks = { path = "../hooks", version = "0.8.13" }
+deepseek-mcp = { path = "../mcp", version = "0.8.13" }
+deepseek-protocol = { path = "../protocol", version = "0.8.13" }
+deepseek-state = { path = "../state", version = "0.8.13" }
+deepseek-tools = { path = "../tools", version = "0.8.13" }
 serde_json.workspace = true
 uuid.workspace = true
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace

 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.12" }
+deepseek-protocol = { path = "../protocol", version = "0.8.13" }
 serde.workspace = true
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.12" }
+deepseek-protocol = { path = "../protocol", version = "0.8.13" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -88,10 +88,18 @@ impl DefaultKeyringStore {
    /// Probe the OS keyring without writing anything. Returns `Ok(())` if
    /// a backend is reachable, otherwise an error describing why not.
    pub fn probe(&self) -> Result<(), SecretsError> {
-        // `Entry::new` is enough to surface "no backend / no storage" on
-        // headless Linux; no actual read happens until `.get_password()`.
+        // `Entry::new` is enough to validate the native macOS/Windows
+        // backend path. Avoid a dummy read there because it can trigger
+        // a second user-visible Keychain/Credential Manager access before
+        // the real provider key lookup.
        let entry = keyring::Entry::new(&self.service, "__probe__")
            .map_err(|err| SecretsError::Keyring(err.to_string()))?;
+        #[cfg(any(target_os = "macos", target_os = "windows"))]
+        {
+            let _ = entry;
+            Ok(())
+        }
+        #[cfg(not(any(target_os = "macos", target_os = "windows")))]
        match entry.get_password() {
            Ok(_) | Err(keyring::Error::NoEntry) => Ok(()),
            Err(keyring::Error::PlatformFailure(err)) => {
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.12" }
+deepseek-protocol = { path = "../protocol", version = "0.8.13" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -21,8 +21,8 @@ path = "src/main.rs"
 [dependencies]
 anyhow = "1.0.100"
 arboard = "3.4"
-deepseek-secrets = { path = "../secrets", version = "0.8.12" }
-deepseek-tools = { path = "../tools", version = "0.8.12" }
+deepseek-secrets = { path = "../secrets", version = "0.8.13" }
+deepseek-tools = { path = "../tools", version = "0.8.13" }
 schemaui = { version = "0.12.0", default-features = false, optional = true }
 async-stream = "0.3.6"
 async-trait = "0.1"
@@ -1,10 +1,8 @@
 //! HTTP client for DeepSeek's OpenAI-compatible Chat Completions API.
 //!
-//! DeepSeek documents `/chat/completions` as the primary endpoint. A legacy
-//! Responses probe remains available behind `DEEPSEEK_EXPERIMENTAL_RESPONSES_API`
-//! for local compatibility experiments, but normal traffic uses chat completions.
+//! DeepSeek documents `/chat/completions` as the primary endpoint, and this
+//! client now routes all normal traffic through that surface.

-use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
 use std::sync::{Arc, Mutex as StdMutex, OnceLock};
 use std::time::{Duration, Instant};

@@ -16,8 +14,7 @@ use tokio::sync::Mutex as AsyncMutex;

 use crate::config::{ApiProvider, Config, RetryPolicy};
 use crate::llm_client::{
-    LlmClient, LlmError, RetryConfig as LlmRetryConfig, StreamEventBox, extract_retry_after,
-    with_retry,
+    LlmClient, LlmError, RetryConfig as LlmRetryConfig, extract_retry_after, with_retry,
 };
 use crate::logging;
 use crate::models::{MessageRequest, MessageResponse, ServerToolUsage, SystemPrompt, Usage};
@@ -130,15 +127,10 @@ pub struct DeepSeekClient {
    pub(super) api_provider: ApiProvider,
    retry: RetryPolicy,
    default_model: String,
-    use_chat_completions: AtomicBool,
-    /// Counter of chat-completions requests since last experimental Responses API probe.
-    /// After RESPONSES_RECOVERY_INTERVAL requests, we retry the Responses API when
    connection_health: Arc<AsyncMutex<ConnectionHealth>>,
    rate_limiter: Arc<AsyncMutex<TokenBucket>>,
 }

-/// After this many chat-completions requests, retry the experimental Responses
-/// API to see if it has recovered.
 const CONNECTION_FAILURE_THRESHOLD: u32 = 2;
 const RECOVERY_PROBE_COOLDOWN: Duration = Duration::from_secs(15);

@@ -302,8 +294,817 @@ impl Clone for DeepSeekClient {
            api_provider: self.api_provider,
            retry: self.retry.clone(),
            default_model: self.default_model.clone(),
-            use_chat_completions: AtomicBool::new(
-                self.use_chat_completions.load(Ordering::Relaxed),
+            connection_health: self.connection_health.clone(),
+            rate_limiter: self.rate_limiter.clone(),
+        }
+    }
+}
+
+// === Helpers ===
+
+/// Maximum bytes to read from an error response body (64 KB).
+pub(super) const ERROR_BODY_MAX_BYTES: usize = 64 * 1024;
+
+/// Read an error response body with a size limit to prevent unbounded allocation.
+pub(super) async fn bounded_error_text(response: reqwest::Response, max_bytes: usize) -> String {
+    use futures_util::StreamExt;
+    let mut stream = response.bytes_stream();
+    let mut buf = Vec::with_capacity(max_bytes.min(8192));
+    while let Some(chunk) = stream.next().await {
+        let Ok(chunk) = chunk else { break };
+        let remaining = max_bytes.saturating_sub(buf.len());
+        if remaining == 0 {
+            break;
+        }
+        buf.extend_from_slice(&chunk[..chunk.len().min(remaining)]);
+    }
+    String::from_utf8_lossy(&buf).into_owned()
+}
+
+fn validate_base_url_security(base_url: &str) -> Result<()> {
+    if base_url.starts_with("https://")
+        || base_url.starts_with("http://localhost")
+        || base_url.starts_with("http://127.0.0.1")
+        || base_url.starts_with("http://[::1]")
+    {
+        return Ok(());
+    }
+
+    if base_url.starts_with("http://")
+        && std::env::var(ALLOW_INSECURE_HTTP_ENV)
+            .ok()
+            .as_deref()
+            .is_some_and(|v| v == "1" || v.eq_ignore_ascii_case("true"))
+    {
+        logging::warn(format!(
+            "Using insecure HTTP base URL because {} is set",
+            ALLOW_INSECURE_HTTP_ENV
+        ));
+        return Ok(());
+    }
+
+    if base_url.starts_with("http://") {
+        anyhow::bail!(
+            "Refusing insecure base URL '{}'. Use HTTPS or set {}=1 to override for trusted environments.",
+            base_url,
+            ALLOW_INSECURE_HTTP_ENV
+        );
+    }
+
+    anyhow::bail!(
+        "Refusing base URL '{}': only HTTPS (or explicitly allowed HTTP) URLs are supported.",
+        base_url,
+    )
+}
+
+pub(super) fn versioned_base_url(base_url: &str) -> String {
+    let trimmed = base_url.trim_end_matches('/');
+    if trimmed.ends_with("/v1") || trimmed.ends_with("/beta") {
+        trimmed.to_string()
+    } else {
+        format!("{trimmed}/v1")
+    }
+}
+
+pub(super) fn api_url(base_url: &str, path: &str) -> String {
+    format!(
+        "{}/{}",
+        versioned_base_url(base_url).trim_end_matches('/'),
+        path.trim_start_matches('/')
+    )
+}
+
+// === DeepSeekClient ===
+
+/// Returns true when DEEPSEEK_FORCE_HTTP1 is set to a truthy value
+/// (`1`, `true`, `yes`, `on`, case-insensitive). Used by `build_http_client`
+/// to opt out of HTTP/2 entirely when DeepSeek's edge mishandles long-lived H2
+/// streams (#103). Anything else (unset, `0`, `false`, ...) leaves HTTP/2 on.
+fn force_http1_from_env() -> bool {
+    std::env::var("DEEPSEEK_FORCE_HTTP1")
+        .ok()
+        .map(|v| v.trim().to_ascii_lowercase())
+        .is_some_and(|v| matches!(v.as_str(), "1" | "true" | "yes" | "on"))
+}
+
+/// Read `SSL_CERT_FILE` and add its contents as extra root
+/// certificates on the reqwest builder (#418). Tries the PEM-bundle
+/// parser first (covers single-cert files too), then falls back to
+/// DER. All failures log a warning and return the builder unchanged
+/// so a malformed env var degrades gracefully.
+fn add_extra_root_certs(
+    mut builder: reqwest::ClientBuilder,
+    cert_path: &str,
+) -> reqwest::ClientBuilder {
+    let bytes = match std::fs::read(cert_path) {
+        Ok(b) => b,
+        Err(err) => {
+            logging::warn(format!(
+                "SSL_CERT_FILE={cert_path} could not be read: {err}"
+            ));
+            return builder;
+        }
+    };
+
+    if let Ok(certs) = reqwest::Certificate::from_pem_bundle(&bytes) {
+        let added = certs.len();
+        for cert in certs {
+            builder = builder.add_root_certificate(cert);
+        }
+        logging::info(format!(
+            "SSL_CERT_FILE={cert_path} loaded ({added} cert(s))"
+        ));
+        return builder;
+    }
+
+    match reqwest::Certificate::from_der(&bytes) {
+        Ok(cert) => {
+            builder = builder.add_root_certificate(cert);
+            logging::info(format!("SSL_CERT_FILE={cert_path} loaded (1 DER cert)"));
+        }
+        Err(err) => {
+            logging::warn(format!(
+                "SSL_CERT_FILE={cert_path} could not be parsed as PEM bundle or DER: {err}"
+            ));
+        }
+    }
+    builder
+}
+
+impl DeepSeekClient {
+    /// Create a DeepSeek client from CLI configuration.
+    pub fn new(config: &Config) -> Result<Self> {
+        let api_key = config.deepseek_api_key()?;
+        let base_url = config.deepseek_base_url();
+        let api_provider = config.api_provider();
+        validate_base_url_security(&base_url)?;
+        let retry = config.retry_policy();
+        let default_model = config.default_model();
+
+        logging::info(format!("API provider: {}", api_provider.as_str()));
+        logging::info(format!("API base URL: {base_url}"));
+        logging::info(format!(
+            "Retry policy: enabled={}, max_retries={}, initial_delay={}s, max_delay={}s",
+            retry.enabled, retry.max_retries, retry.initial_delay, retry.max_delay
+        ));
+
+        let http_client = Self::build_http_client(&api_key)?;
+
+        Ok(Self {
+            http_client,
+            api_key,
+            base_url,
+            api_provider,
+            retry,
+            default_model,
+            connection_health: Arc::new(AsyncMutex::new(ConnectionHealth::default())),
+            rate_limiter: Arc::new(AsyncMutex::new(TokenBucket::from_env())),
+        })
+    }
+
+    fn build_http_client(api_key: &str) -> Result<reqwest::Client> {
+        let mut headers = HeaderMap::new();
+        headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
+        if !api_key.trim().is_empty() {
+            headers.insert(
+                AUTHORIZATION,
+                HeaderValue::from_str(&format!("Bearer {api_key}"))?,
+            );
+        }
+        let mut builder = reqwest::Client::builder()
+            .default_headers(headers)
+            .connect_timeout(Duration::from_secs(30))
+            .tcp_keepalive(Some(Duration::from_secs(30)))
+            .http2_keep_alive_interval(Some(Duration::from_secs(15)))
+            .http2_keep_alive_timeout(Duration::from_secs(20))
+            .min_tls_version(reqwest::tls::Version::TLS_1_2);
+        if force_http1_from_env() {
+            logging::info("DEEPSEEK_FORCE_HTTP1=1 — pinning HTTP client to HTTP/1.1");
+            builder = builder.http1_only();
+        }
+        if let Ok(cert_path) = std::env::var("SSL_CERT_FILE")
+            && !cert_path.is_empty()
+        {
+            builder = add_extra_root_certs(builder, &cert_path);
+        }
+        builder.build().map_err(Into::into)
+    }
+
+    /// List available models from the provider.
+    pub async fn list_models(&self) -> Result<Vec<AvailableModel>> {
+        let url = api_url(&self.base_url, "models");
+        let response = self.send_with_retry(|| self.http_client.get(&url)).await?;
+
+        let status = response.status();
+        if !status.is_success() {
+            let error_text = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
+            anyhow::bail!("Failed to list models: HTTP {status}: {error_text}");
+        }
+        let response_text = response.text().await.unwrap_or_default();
+
+        parse_models_response(&response_text)
+    }
+
+    async fn wait_for_rate_limit(&self) {
+        let maybe_delay = {
+            let mut limiter = self.rate_limiter.lock().await;
+            limiter.delay_until_available(1.0)
+        };
+        if let Some(delay) = maybe_delay {
+            tokio::time::sleep(delay).await;
+        }
+    }
+
+    async fn mark_request_success(&self) {
+        let mut health = self.connection_health.lock().await;
+        if apply_request_success(&mut health, Instant::now()) {
+            logging::info("Connection recovered");
+        }
+    }
+
+    async fn mark_request_failure(&self, reason: &str) {
+        let mut health = self.connection_health.lock().await;
+        apply_request_failure(&mut health, Instant::now());
+        logging::warn(format!(
+            "Connection degraded (failures={}): {}",
+            health.consecutive_failures, reason
+        ));
+    }
+
+    async fn maybe_probe_recovery(&self) {
+        let should_probe = {
+            let mut health = self.connection_health.lock().await;
+            mark_recovery_probe_if_due(&mut health, Instant::now())
+        };
+        if !should_probe {
+            return;
+        }
+        let health_url = api_url(&self.base_url, "models");
+        let probe = self.http_client.get(health_url).send().await;
+        match probe {
+            Ok(resp) if resp.status().is_success() => {
+                self.mark_request_success().await;
+                logging::info("Recovery probe succeeded");
+            }
+            Ok(resp) => {
+                self.mark_request_failure(&format!("probe status={}", resp.status()))
+                    .await;
+            }
+            Err(err) => {
+                self.mark_request_failure(&format!("probe error={err}"))
+                    .await;
+            }
+        }
+    }
+
+    pub(super) async fn send_with_retry<F>(&self, mut build: F) -> Result<reqwest::Response>
+    where
+        F: FnMut() -> reqwest::RequestBuilder,
+    {
+        let retry_cfg: LlmRetryConfig = self.retry.clone().into();
+        let request_result = with_retry(
+            &retry_cfg,
+            || {
+                let request = build();
+                async move {
+                    self.wait_for_rate_limit().await;
+                    let response = request
+                        .send()
+                        .await
+                        .map_err(|err| LlmError::from_reqwest(&err))?;
+                    let status = response.status();
+                    if status.is_success() {
+                        return Ok(response);
+                    }
+                    let retryable = status.as_u16() == 429 || status.is_server_error();
+                    if !retryable {
+                        return Ok(response);
+                    }
+                    let retry_after = extract_retry_after(response.headers());
+                    let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
+                    Err(LlmError::from_http_response_with_retry_after(
+                        status.as_u16(),
+                        &body,
+                        retry_after,
+                    ))
+                }
+            },
+            Some(Box::new(|err, attempt, delay| {
+                let (reason_label, human_reason) = retry_reason_label_and_human(err);
+                logging::warn(format!(
+                    "HTTP retry reason={} attempt={} delay={:.2}s",
+                    reason_label,
+                    attempt + 1,
+                    delay.as_secs_f64(),
+                ));
+                crate::retry_status::start(attempt + 1, delay, human_reason);
+            })),
+        )
+        .await;
+
+        match request_result {
+            Ok(response) => {
+                crate::retry_status::succeeded();
+                self.mark_request_success().await;
+                Ok(response)
+            }
+            Err(err) => {
+                let last = err.last_error.to_string();
+                if err.attempts > 1 {
+                    crate::retry_status::failed(last.clone());
+                } else {
+                    crate::retry_status::clear();
+                }
+                self.mark_request_failure(&last).await;
+                self.maybe_probe_recovery().await;
+                Err(anyhow::anyhow!(last))
+            }
+        }
+    }
+}
+
+/// Translate the structured `LlmError` into both a categorical label
+/// (for structured logs / metrics) and a short human reason string
+/// (for the retry banner). Returning both from one match avoids the
+/// double-classification we had before.
+fn retry_reason_label_and_human(err: &LlmError) -> (&'static str, String) {
+    match err {
+        LlmError::RateLimited { retry_after, .. } => {
+            let human = if let Some(after) = retry_after {
+                format!("rate limited (Retry-After {}s)", after.as_secs())
+            } else {
+                "rate limited".to_string()
+            };
+            ("rate_limited", human)
+        }
+        LlmError::ServerError { status, .. } => ("server_error", format!("upstream {status}")),
+        LlmError::NetworkError(_) => ("network_error", "network error".to_string()),
+        LlmError::Timeout(_) => ("timeout", "timeout".to_string()),
+        _ => ("other", "other".to_string()),
+    }
+}
+
+impl LlmClient for DeepSeekClient {
+    fn provider_name(&self) -> &'static str {
+        self.api_provider.as_str()
+    }
+
+    fn model(&self) -> &str {
+        &self.default_model
+    }
+
+    async fn health_check(&self) -> Result<bool> {
+        let health_url = api_url(&self.base_url, "models");
+        self.wait_for_rate_limit().await;
+        let response = self.http_client.get(health_url).send().await;
+        match response {
+            Ok(resp) if resp.status().is_success() => {
+                self.mark_request_success().await;
+                Ok(true)
+            }
+            Ok(resp) => {
+                self.mark_request_failure(&format!("health status={}", resp.status()))
+                    .await;
+                Ok(false)
+            }
+            Err(err) => {
+                self.mark_request_failure(&format!("health error={err}"))
+                    .await;
+                Ok(false)
+            }
+        }
+    }
+
+    async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse> {
+        self.create_message_chat(&request).await
+    }
+
+    async fn create_message_stream(
+        &self,
+        request: MessageRequest,
+    ) -> Result<crate::llm_client::StreamEventBox> {
+        self.handle_chat_completion_stream(request).await
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct ModelsListResponse {
+    data: Vec<ModelListItem>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ModelListItem {
+    id: String,
+    #[serde(default)]
+    owned_by: Option<String>,
+    #[serde(default)]
+    created: Option<u64>,
+}
+
+pub(super) fn parse_models_response(payload: &str) -> Result<Vec<AvailableModel>> {
+    let parsed: ModelsListResponse =
+        serde_json::from_str(payload).context("Failed to parse model list JSON")?;
+
+    let mut models = parsed
+        .data
+        .into_iter()
+        .map(|item| AvailableModel {
+            id: item.id,
+            owned_by: item.owned_by,
+            created: item.created,
+        })
+        .collect::<Vec<_>>();
+    models.sort_by(|a, b| a.id.cmp(&b.id));
+    models.dedup_by(|a, b| a.id == b.id);
+    Ok(models)
+}
+
+pub(super) fn system_to_instructions(system: Option<SystemPrompt>) -> Option<String> {
+    match system {
+        Some(SystemPrompt::Text(text)) => Some(text),
+        Some(SystemPrompt::Blocks(blocks)) => {
+            let joined = blocks
+                .into_iter()
+                .map(|b| b.text)
+                .collect::<Vec<_>>()
+                .join("\n\n---\n\n");
+            if joined.trim().is_empty() {
+                None
+            } else {
+                Some(joined)
+            }
+        }
+        None => None,
+    }
+}
+
+pub(super) fn apply_reasoning_effort(
+    body: &mut Value,
+    effort: Option<&str>,
+    provider: ApiProvider,
+) {
+    let Some(effort) = effort else {
+        return;
+    };
+    let normalized = effort.trim().to_ascii_lowercase();
+    match normalized.as_str() {
+        "off" | "disabled" | "none" | "false" => match provider {
+            ApiProvider::Deepseek
+            | ApiProvider::DeepseekCN
+            | ApiProvider::Openrouter
+            | ApiProvider::Novita
+            | ApiProvider::Fireworks
+            | ApiProvider::Sglang => {
+                body["thinking"] = json!({ "type": "disabled" });
+            }
+            ApiProvider::NvidiaNim => {
+                body["chat_template_kwargs"] = json!({
+                    "thinking": false,
+                });
+            }
+        },
+        "low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
+            ApiProvider::Deepseek
+            | ApiProvider::DeepseekCN
+            | ApiProvider::Openrouter
+            | ApiProvider::Novita
+            | ApiProvider::Fireworks
+            | ApiProvider::Sglang => {
+                body["reasoning_effort"] = json!("high");
+                body["thinking"] = json!({ "type": "enabled" });
+            }
+            ApiProvider::NvidiaNim => {
+                body["chat_template_kwargs"] = json!({
+                    "thinking": true,
+                    "reasoning_effort": "high",
+                });
+            }
+        },
+        "xhigh" | "max" | "highest" => match provider {
+            ApiProvider::Deepseek
+            | ApiProvider::DeepseekCN
+            | ApiProvider::Openrouter
+            | ApiProvider::Novita
+            | ApiProvider::Fireworks
+            | ApiProvider::Sglang => {
+                body["reasoning_effort"] = json!("max");
+                body["thinking"] = json!({ "type": "enabled" });
+            }
+            ApiProvider::NvidiaNim => {
+                body["chat_template_kwargs"] = json!({
+                    "thinking": true,
+                    "reasoning_effort": "max",
+                });
+            }
+        },
+        _ => {}
+    }
+}
+
+pub(super) fn parse_usage(usage: Option<&Value>) -> Usage {
+    let input_tokens = usage
+        .and_then(|u| u.get("input_tokens").or_else(|| u.get("prompt_tokens")))
+        .and_then(Value::as_u64)
+        .unwrap_or(0);
+    let output_tokens = usage
+        .and_then(|u| {
+            u.get("output_tokens")
+                .or_else(|| u.get("completion_tokens"))
+        })
+        .and_then(Value::as_u64)
+        .unwrap_or(0);
+    let cached_tokens = usage
+        .and_then(|u| u.get("prompt_tokens_details"))
+        .and_then(|details| details.get("cached_tokens"))
+        .and_then(Value::as_u64);
+    let prompt_cache_hit_tokens = usage
+        .and_then(|u| u.get("prompt_cache_hit_tokens"))
+        .and_then(Value::as_u64)
+        .or(cached_tokens)
+        .map(|v| v as u32);
+    let prompt_cache_miss_tokens = usage
+        .and_then(|u| u.get("prompt_cache_miss_tokens"))
+        .and_then(Value::as_u64)
+        .or_else(|| cached_tokens.map(|cached| input_tokens.saturating_sub(cached)))
+        .map(|v| v as u32);
+    let reasoning_tokens = usage
+        .and_then(|u| u.get("completion_tokens_details"))
+        .and_then(|details| details.get("reasoning_tokens"))
+        .and_then(Value::as_u64)
+        .map(|v| v as u32);
+
+    let server_tool_use = usage.and_then(|u| u.get("server_tool_use")).map(|server| {
+        let code_execution_requests = server
+            .get("code_execution_requests")
+            .and_then(Value::as_u64)
+            .map(|v| v as u32);
+        let tool_search_requests = server
+            .get("tool_search_requests")
+            .and_then(Value::as_u64)
+            .map(|v| v as u32);
+        ServerToolUsage {
+            code_execution_requests,
+            tool_search_requests,
+        }
+    });
+
+    Usage {
+        input_tokens: input_tokens as u32,
+        output_tokens: output_tokens as u32,
+        prompt_cache_hit_tokens,
+        prompt_cache_miss_tokens,
+        reasoning_tokens,
+        reasoning_replay_tokens: None,
+        server_tool_use,
+    }
+}
+
+impl DeepSeekClient {
+    /// Call the DeepSeek `/beta/completions` FIM endpoint.
+    pub async fn fim_completion(
+        &self,
+        model: &str,
+        prompt: &str,
+        suffix: &str,
+        max_tokens: u32,
+    ) -> anyhow::Result<String> {
+        let url = api_url(&self.base_url, "beta/completions");
+        let body = json!({
+            "model": model,
+            "prompt": prompt,
+            "suffix": suffix,
+            "max_tokens": max_tokens,
+        });
+        let response = self
+            .send_with_retry(|| self.http_client.post(&url).json(&body))
+            .await?;
+        let status = response.status();
+        if !status.is_success() {
+            let error_text = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
+            anyhow::bail!("FIM API error: HTTP {status}: {error_text}");
+        }
+        let response_text = response.text().await.unwrap_or_default();
+        let value: serde_json::Value =
+            serde_json::from_str(&response_text).context("Failed to parse FIM API response")?;
+        let text = value
+            .pointer("/choices/0/text")
+            .and_then(serde_json::Value::as_str)
+            .ok_or_else(|| anyhow::anyhow!("FIM response missing choices[0].text"))?;
+        Ok(text.to_string())
+    }
+}
+
+mod chat;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::client::chat::{
+        build_chat_messages, build_chat_messages_for_request, count_reasoning_replay_chars,
+        parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
+    };
+    use crate::models::{
+        ContentBlock, ContentBlockStart, Delta, Message, MessageRequest, StreamEvent, Tool,
+    };
+    use serde_json::json;
+
+    #[test]
+    fn tool_name_roundtrip_dot() {
+        let original = "multi_tool_use.parallel";
+        let encoded = to_api_tool_name(original);
+        assert_eq!(encoded, "multi_tool_use-x00002E-parallel");
+        let decoded = from_api_tool_name(&encoded);
+        assert_eq!(decoded, original);
+    }
+
+    #[test]
+    fn tool_name_decode_mangled_dot_prefix() {
+        let mangled = "multi_tool_use.x00002E-parallel";
+        let decoded = from_api_tool_name(mangled);
+        assert_eq!(decoded, "multi_tool_use..parallel");
+    }
+
+    #[test]
+    fn tool_name_decode_bare_hex_no_trailing_dash() {
+        let mangled = "foo_x00002Ebar";
+        let decoded = from_api_tool_name(mangled);
+        assert_eq!(decoded, "foo_.bar");
+    }
+
+    #[test]
+    fn tool_name_bare_hex_preserves_alnum() {
+        let input = "foox000041bar";
+        let decoded = from_api_tool_name(input);
+        assert_eq!(decoded, input);
+    }
+
+    #[test]
+    fn tool_name_bare_hex_preserves_underscore() {
+        let input = "foox00005Fbar";
+        let decoded = from_api_tool_name(input);
+        assert_eq!(decoded, input);
+    }
+
+    #[test]
+    fn tool_name_roundtrip_colon() {
+        let original = "mcp__server:tool_name";
+        let encoded = to_api_tool_name(original);
+        let decoded = from_api_tool_name(&encoded);
+        assert_eq!(decoded, original);
+    }
+
+    #[test]
+    fn api_url_handles_default_v1_and_beta_base_urls() {
+        assert_eq!(
+            api_url("https://api.deepseek.com", "chat/completions"),
+            "https://api.deepseek.com/v1/chat/completions"
+        );
+        assert_eq!(
+            api_url("https://api.deepseek.com/v1", "chat/completions"),
+            "https://api.deepseek.com/v1/chat/completions"
+        );
+        assert_eq!(
+            api_url("https://api.deepseek.com/beta", "chat/completions"),
+            "https://api.deepseek.com/beta/chat/completions"
+        );
+    }
+
+    #[test]
+    fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
+        let message = Message {
+            role: "assistant".to_string(),
+            content: vec![
+                ContentBlock::Thinking {
+                    thinking: "plan".to_string(),
+                },
+                ContentBlock::Text {
+                    text: "done".to_string(),
+                    cache_control: None,
+                },
+            ],
+        };
+        let out = build_chat_messages(None, &[message], "deepseek-v4-pro");
+        let assistant = out
+            .iter()
+            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
+            .expect("assistant message");
+        assert_eq!(
+            assistant.get("content").and_then(Value::as_str),
+            Some("done")
+        );
+        assert_eq!(
+            assistant.get("reasoning_content").and_then(Value::as_str),
+            Some("plan"),
+            "thinking-mode models must keep reasoning_content on all assistant messages"
+        );
+    }
+
+    #[test]
+    fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() {
+        let messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Need the date".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![
+                    ContentBlock::Thinking {
+                        thinking: "Need to call a tool".to_string(),
+                    },
+                    ContentBlock::ToolUse {
+                        id: "tool-1".to_string(),
+                        name: "get_date".to_string(),
+                        input: json!({}),
+                        caller: None,
+                    },
+                ],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::ToolResult {
+                    tool_use_id: "tool-1".to_string(),
+                    content: "2026-04-23".to_string(),
+                    is_error: None,
+                    content_blocks: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "It is 2026-04-23.".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Thanks. Next question.".to_string(),
+                    cache_control: None,
+                }],
+            },
+        ];
+        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
+        let tool_assistant = out
+            .iter()
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool"),
+            "thinking-mode tool rounds must replay reasoning_content on later requests"
+        );
+    }
+
+    #[test]
+    fn chat_messages_allow_tool_round_without_reasoning_when_thinking_disabled() {
+        let request = MessageRequest {
+            model: "deepseek-v4-pro".to_string(),
+            messages: vec![
+                Message {
+                    role: "assistant".to_string(),
+                    content: vec![ContentBlock::ToolUse {
+                        id: "call-no-thinking".to_string(),
+                        name: "read_file".to_string(),
+                        input: json!({"path": "Cargo.toml"}),
+                        caller: None,
+                    }],
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: vec![ContentBlock::ToolResult {
+                        tool_use_id: "call-no-thinking".to_string(),
+                        content: "workspace manifest".to_string(),
+                        is_error: None,
+                        content_blocks: None,
+                    }],
+                },
+            ],
+            max_tokens: 1024,
+            system: None,
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: Some("off".to_string()),
+            stream: None,
+            temperature: None,
+            top_p: None,
+        };
+
+        let out = build_chat_messages_for_request(&request);
+        assert!(
+            out.iter().any(
+                |value| value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
            ),
            "tool calls remain valid when thinking mode is disabled"
        );
@@ -876,41 +1677,6 @@ impl Clone for DeepSeekClient {

    #[test]
    fn parse_usage_reads_deepseek_cache_and_reasoning_tokens() {
-        fn parse_usage(usage: Option<&Value>) -> Usage {
-            let usage = usage.expect("usage");
-            let input_tokens = usage
-                .get("prompt_tokens")
-                .and_then(Value::as_u64)
-                .expect("prompt tokens") as u32;
-            let output_tokens = usage
-                .get("completion_tokens")
-                .and_then(Value::as_u64)
-                .expect("completion tokens") as u32;
-            let prompt_cache_hit_tokens = usage
-                .get("prompt_cache_hit_tokens")
-                .and_then(Value::as_u64)
-                .map(|v| v as u32);
-            let prompt_cache_miss_tokens = usage
-                .get("prompt_cache_miss_tokens")
-                .and_then(Value::as_u64)
-                .map(|v| v as u32);
-            let reasoning_tokens = usage
-                .get("completion_tokens_details")
-                .and_then(|d| d.get("reasoning_tokens"))
-                .and_then(Value::as_u64)
-                .map(|v| v as u32);
-
-            Usage {
-                input_tokens,
-                output_tokens,
-                prompt_cache_hit_tokens,
-                prompt_cache_miss_tokens,
-                reasoning_tokens,
-                reasoning_replay_tokens: None,
-                server_tool_use: None,
-            }
-        }
-
        let usage = parse_usage(Some(&json!({
            "prompt_tokens": 100,
            "completion_tokens": 20,
@@ -928,6 +1694,22 @@ impl Clone for DeepSeekClient {
        assert_eq!(usage.reasoning_tokens, Some(12));
    }

+    #[test]
+    fn parse_usage_reads_v4_prompt_tokens_details_cached_tokens() {
+        let usage = parse_usage(Some(&json!({
+            "prompt_tokens": 4000,
+            "completion_tokens": 20,
+            "prompt_tokens_details": {
+                "cached_tokens": 3000
+            }
+        })));
+
+        assert_eq!(usage.input_tokens, 4000);
+        assert_eq!(usage.output_tokens, 20);
+        assert_eq!(usage.prompt_cache_hit_tokens, Some(3000));
+        assert_eq!(usage.prompt_cache_miss_tokens, Some(1000));
+    }
+
    #[test]
    fn sanitize_thinking_mode_counts_reasoning_replay_across_assistant_turns() {
        // Multi-turn body that mimics two prior tool-calling rounds: each
@@ -699,6 +699,134 @@ fn tail_chars(text: &str, max_chars: usize) -> String {
    text[start_idx..].to_string()
 }

+#[derive(Debug, Clone)]
+struct ToolUseInfo {
+    name: String,
+    key: String,
+    args_preview: String,
+}
+
+fn tool_use_key(name: &str, input: &serde_json::Value) -> String {
+    format!(
+        "{name}:{}",
+        serde_json::to_string(input).unwrap_or_else(|_| input.to_string())
+    )
+}
+
+fn tool_args_preview(input: &serde_json::Value) -> String {
+    let raw = serde_json::to_string(input).unwrap_or_else(|_| input.to_string());
+    truncate_chars(&raw, 120).to_string()
+}
+
+fn collect_tool_uses(messages: &[Message]) -> HashMap<String, ToolUseInfo> {
+    let mut tool_uses = HashMap::new();
+    for message in messages {
+        for block in &message.content {
+            if let ContentBlock::ToolUse {
+                id, name, input, ..
+            } = block
+            {
+                tool_uses.insert(
+                    id.clone(),
+                    ToolUseInfo {
+                        name: name.clone(),
+                        key: tool_use_key(name, input),
+                        args_preview: tool_args_preview(input),
+                    },
+                );
+            }
+        }
+    }
+    tool_uses
+}
+
+struct ToolResultPruneCandidate {
+    message_idx: usize,
+    block_idx: usize,
+    key: String,
+    tool_name: String,
+    args_preview: String,
+    original_len: usize,
+}
+
+/// Mechanically prune old verbose tool results before paying for an LLM summary.
+///
+/// The most recent `protected_window` messages stay byte-for-byte intact. Older
+/// duplicate tool results keep the freshest full body and replace earlier
+/// copies with one-line summaries; non-duplicate old results are summarized only
+/// when they exceed the normal summary snippet size.
+pub fn prune_tool_results(messages: &mut [Message], protected_window: usize) -> usize {
+    let cutoff = messages.len().saturating_sub(protected_window);
+    if cutoff == 0 {
+        return 0;
+    }
+
+    let tool_uses = collect_tool_uses(messages);
+    let mut candidates = Vec::new();
+    let mut latest_by_key: HashMap<String, usize> = HashMap::new();
+    let mut count_by_key: HashMap<String, usize> = HashMap::new();
+
+    for (message_idx, message) in messages.iter().take(cutoff).enumerate() {
+        for (block_idx, block) in message.content.iter().enumerate() {
+            let ContentBlock::ToolResult {
+                tool_use_id,
+                content,
+                ..
+            } = block
+            else {
+                continue;
+            };
+            let Some(info) = tool_uses.get(tool_use_id) else {
+                continue;
+            };
+            latest_by_key.insert(info.key.clone(), message_idx);
+            *count_by_key.entry(info.key.clone()).or_insert(0) += 1;
+            candidates.push(ToolResultPruneCandidate {
+                message_idx,
+                block_idx,
+                key: info.key.clone(),
+                tool_name: info.name.clone(),
+                args_preview: info.args_preview.clone(),
+                original_len: content.len(),
+            });
+        }
+    }
+
+    let mut bytes_saved = 0usize;
+    for candidate in candidates {
+        let duplicate_count = count_by_key.get(&candidate.key).copied().unwrap_or(0);
+        let is_latest_duplicate = duplicate_count > 1
+            && latest_by_key.get(&candidate.key) == Some(&candidate.message_idx);
+        if is_latest_duplicate {
+            continue;
+        }
+        if duplicate_count <= 1 && candidate.original_len <= SUMMARY_TOOL_RESULT_SNIPPET_CHARS {
+            continue;
+        }
+
+        let summary = format!(
+            "[{}] tool result pruned ({} bytes; args: {})",
+            candidate.tool_name, candidate.original_len, candidate.args_preview
+        );
+        if summary.len() >= candidate.original_len {
+            continue;
+        }
+
+        if let ContentBlock::ToolResult {
+            content,
+            content_blocks,
+            ..
+        } = &mut messages[candidate.message_idx].content[candidate.block_idx]
+        {
+            bytes_saved = bytes_saved.saturating_add(content.len().saturating_sub(summary.len()));
+            *content = summary;
+            *content_blocks = None;
+        }
+    }
+
+    bytes_saved
+}
+
 /// Result of a compaction operation with metadata.
 #[derive(Debug)]
 pub struct CompactionResult {
@@ -747,6 +875,39 @@ pub async fn compact_messages_safe(
    const MAX_RETRIES: u32 = 3;
    const BASE_DELAY_MS: u64 = 1000;

+    let mut pruned_messages = messages.to_vec();
+    let pruned_bytes = prune_tool_results(&mut pruned_messages, KEEP_RECENT_MESSAGES);
+    let compaction_input: &[Message] = if pruned_bytes > 0 {
+        logging::info(format!(
+            "Local tool-result prune saved {pruned_bytes} bytes before LLM compaction"
+        ));
+        let was_over_threshold = should_compact(
+            messages,
+            config,
+            workspace,
+            external_pins,
+            external_working_set_paths,
+        );
+        let now_under_threshold = !should_compact(
+            &pruned_messages,
+            config,
+            workspace,
+            external_pins,
+            external_working_set_paths,
+        );
+        if was_over_threshold && now_under_threshold {
+            return Ok(CompactionResult {
+                messages: pruned_messages,
+                summary_prompt: None,
+                removed_messages: Vec::new(),
+                retries_used: 0,
+            });
+        }
+        &pruned_messages
+    } else {
+        messages
+    };
+
    let mut last_error: Option<anyhow::Error> = None;

    for attempt in 0..MAX_RETRIES {
@@ -758,7 +919,7 @@ pub async fn compact_messages_safe(

        match compact_messages(
            client,
-            messages,
+            compaction_input,
            config,
            workspace,
            external_pins,
@@ -1269,6 +1430,30 @@ mod tests {
        }
    }

+    fn tool_use(id: &str, name: &str, input: serde_json::Value) -> Message {
+        Message {
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::ToolUse {
+                id: id.to_string(),
+                name: name.to_string(),
+                input,
+                caller: None,
+            }],
+        }
+    }
+
+    fn tool_result(id: &str, content: &str) -> Message {
+        Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::ToolResult {
+                tool_use_id: id.to_string(),
+                content: content.to_string(),
+                is_error: None,
+                content_blocks: None,
+            }],
+        }
+    }
+
    #[test]
    fn truncate_chars_respects_unicode_boundaries() {
        let text = "abc😀é";
@@ -1279,6 +1464,73 @@ mod tests {
        assert_eq!(truncate_chars(text, 5), "abc😀é");
    }

+    #[test]
+    fn prune_tool_results_summarizes_old_verbose_outputs() {
+        let verbose = "x".repeat(SUMMARY_TOOL_RESULT_SNIPPET_CHARS + 80);
+        let mut messages = vec![
+            tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
+            tool_result("call-1", &verbose),
+            msg("user", "recent question"),
+            msg("assistant", "recent answer"),
+        ];
+
+        let saved = prune_tool_results(&mut messages, 2);
+
+        assert!(saved > 0);
+        let ContentBlock::ToolResult { content, .. } = &messages[1].content[0] else {
+            panic!("expected tool result");
+        };
+        assert!(content.contains("[read_file] tool result pruned"));
+        assert!(content.contains("Cargo.toml"));
+        assert!(content.len() < verbose.len());
+    }
+
+    #[test]
+    fn prune_tool_results_preserves_protected_tail() {
+        let verbose = "x".repeat(SUMMARY_TOOL_RESULT_SNIPPET_CHARS + 80);
+        let mut messages = vec![
+            msg("user", "older context"),
+            tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
+            tool_result("call-1", &verbose),
+        ];
+
+        let saved = prune_tool_results(&mut messages, 2);
+
+        assert_eq!(saved, 0);
+        let ContentBlock::ToolResult { content, .. } = &messages[2].content[0] else {
+            panic!("expected tool result");
+        };
+        assert_eq!(content, &verbose);
+    }
+
+    #[test]
+    fn prune_tool_results_dedupes_identical_reads_but_keeps_latest_full_body() {
+        let first = "first ".repeat(80);
+        let second = "second ".repeat(80);
+        let mut messages = vec![
+            tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
+            tool_result("call-1", &first),
+            tool_use("call-2", "read_file", json!({"path": "Cargo.toml"})),
+            tool_result("call-2", &second),
+            msg("user", "tail"),
+        ];
+
+        let saved = prune_tool_results(&mut messages, 1);
+
+        assert!(saved > 0);
+        let ContentBlock::ToolResult { content: older, .. } = &messages[1].content[0] else {
+            panic!("expected older tool result");
+        };
+        assert!(older.contains("tool result pruned"));
+        let ContentBlock::ToolResult {
+            content: latest, ..
+        } = &messages[3].content[0]
+        else {
+            panic!("expected latest tool result");
+        };
+        assert_eq!(latest, &second);
+    }
+
    #[test]
    fn is_transient_error_detects_network_issues() {
        let timeout_err = anyhow::anyhow!("Connection timeout");
@@ -146,8 +146,6 @@ pub enum RequestPayloadMode {
    ChatCompletions,
 }

-
-
 /// Resolve the provider capability for a given [`ApiProvider`] and resolved
 /// model string.
 ///
@@ -201,26 +199,25 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
    }
 }

-/// Canonicalize common model aliases to stable DeepSeek IDs.
+/// Canonicalize compact DeepSeek model aliases to stable IDs.
 ///
-/// v4-pro/v4-flash provide canonical forms; v-series snapshots pass through
-/// unchanged. Legacy aliases (deepseek-chat, etc.) are no longer folded —
-/// DeepSeek's own `/v1/models` endpoint is the source of truth.
+/// Already-valid model IDs pass through unchanged. Only the compact
+/// `v4pro`/`v4flash` spellings are rewritten to their hyphenated forms.
 #[must_use]
 pub fn canonical_model_name(model: &str) -> Option<&'static str> {
    match model.trim().to_ascii_lowercase().as_str() {
-        "deepseek-v4-pro" | "deepseek-v4pro" => Some("deepseek-v4-pro"),
-        "deepseek-v4-flash" | "deepseek-v4flash" => Some("deepseek-v4-flash"),
+        "deepseek-v4pro" => Some("deepseek-v4-pro"),
+        "deepseek-v4flash" => Some("deepseek-v4-flash"),
        _ => None,
    }
 }

 /// Normalize a configured/runtime model name.
 ///
-/// Trims whitespace and lowercases. v-series snapshots (deepseek-v4-flash-20260423)
-/// pass through unchanged so users can pin dated variants. Non-DeepSeek or
-/// malformed names return `None`; DeepSeek's `/v1/models` endpoint is the
-/// authority on valid model IDs.
+/// Trims whitespace, preserves caller-provided case for already-valid model
+/// IDs, and only canonicalizes compact aliases like `deepseek-v4pro`.
+/// Non-DeepSeek or malformed names return `None`; DeepSeek's `/v1/models`
+/// endpoint is the authority on valid model IDs.
 #[must_use]
 pub fn normalize_model_name(model: &str) -> Option<String> {
    let trimmed = model.trim();
@@ -236,10 +233,11 @@ pub fn normalize_model_name(model: &str) -> Option<String> {
        return None;
    }

-    if normalized.chars().all(|ch| {
-        ch.is_ascii_lowercase() || ch.is_ascii_digit() || matches!(ch, '-' | '_' | '.' | ':' | '/')
-    }) {
-        return Some(normalized);
+    if trimmed
+        .chars()
+        .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.' | ':' | '/'))
+    {
+        return Some(trimmed.to_string());
    }

    None
@@ -1869,7 +1867,8 @@ fn normalize_model_for_provider(provider: ApiProvider, model: &str) -> Option<St
 }

 fn model_for_provider(provider: ApiProvider, normalized: String) -> String {
-    match (provider, normalized.as_str()) {
+    let lowered = normalized.to_ascii_lowercase();
+    match (provider, lowered.as_str()) {
        (ApiProvider::NvidiaNim, "deepseek-v4-pro") => DEFAULT_NVIDIA_NIM_MODEL.to_string(),
        (ApiProvider::NvidiaNim, "deepseek-v4-flash") => DEFAULT_NVIDIA_NIM_FLASH_MODEL.to_string(),
        (ApiProvider::Openrouter, "deepseek-v4-pro") => DEFAULT_OPENROUTER_MODEL.to_string(),
@@ -3212,6 +3211,27 @@ api_key = "old-openrouter-key"
            normalize_model_name("deepseek-ai/deepseek-v4-pro").as_deref(),
            Some("deepseek-ai/deepseek-v4-pro")
        );
+        // preserve exact case for providers that require case-sensitive model IDs
+        assert_eq!(
+            normalize_model_name("DeepSeek-V4-Pro").as_deref(),
+            Some("DeepSeek-V4-Pro")
+        );
+        assert_eq!(
+            normalize_model_name("deepseek-ai/DeepSeek-V4-Pro").as_deref(),
+            Some("deepseek-ai/DeepSeek-V4-Pro")
+        );
+    }
+
+    #[test]
+    fn normalize_model_for_provider_keeps_provider_remaps_when_case_is_preserved() {
+        assert_eq!(
+            normalize_model_for_provider(ApiProvider::Deepseek, "DeepSeek-V4-Pro").as_deref(),
+            Some("DeepSeek-V4-Pro")
+        );
+        assert_eq!(
+            normalize_model_for_provider(ApiProvider::NvidiaNim, "DeepSeek-V4-Pro").as_deref(),
+            Some(DEFAULT_NVIDIA_NIM_MODEL)
+        );
    }

    #[test]
@@ -4023,5 +4043,4 @@ model = "deepseek-v4-pro"
        let deserialized: ProviderCapability = serde_json::from_value(json).unwrap();
        assert_eq!(cap, deserialized);
    }
-
 }
@@ -1878,6 +1878,7 @@ use context::{
    turn_response_headroom_tokens,
 };
 mod dispatch;
+mod loop_guard;
 mod lsp_hooks;
 mod streaming;
 mod tool_catalog;
@@ -1893,6 +1894,7 @@ use self::dispatch::{
    parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first,
    should_parallelize_tool_batch, should_stop_after_plan_tool,
 };
+use self::loop_guard::{AttemptDecision, LoopGuard, OutcomeDecision};
 #[cfg(test)]
 use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
 #[cfg(test)]
@@ -48,6 +48,7 @@ pub(super) struct ToolExecutionPlan {
    pub(super) supports_parallel: bool,
    pub(super) read_only: bool,
    pub(super) blocked_error: Option<ToolError>,
+    pub(super) guard_result: Option<ToolResult>,
 }

 #[derive(Debug, serde::Serialize)]
@@ -0,0 +1,222 @@
+//! Pure-data guardrails for repeated tool-call loops.
+
+use std::collections::HashMap;
+use std::collections::hash_map::DefaultHasher;
+use std::fmt::Write as _;
+use std::hash::{Hash, Hasher};
+
+use serde_json::Value;
+
+const IDENTICAL_CALL_BLOCK_THRESHOLD: u32 = 3;
+const FAILURE_WARN_THRESHOLD: u32 = 3;
+const FAILURE_HALT_THRESHOLD: u32 = 8;
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(super) enum AttemptDecision {
+    Proceed,
+    Block(String),
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(super) enum OutcomeDecision {
+    Continue,
+    Warn(String),
+    Halt(String),
+}
+
+#[derive(Debug, Default)]
+pub(super) struct LoopGuard {
+    call_counts: HashMap<(String, u64), u32>,
+    failure_counts: HashMap<String, u32>,
+}
+
+impl LoopGuard {
+    pub(super) fn record_attempt(&mut self, tool: &str, args: &Value) -> AttemptDecision {
+        let key = (tool.to_string(), hash_args(args));
+        let count = self.call_counts.entry(key).or_insert(0);
+        *count = count.saturating_add(1);
+        if *count >= IDENTICAL_CALL_BLOCK_THRESHOLD {
+            return AttemptDecision::Block(format!(
+                "Blocked: this exact call (`{tool}` with these arguments) has already run {count} times this turn. Stop retrying it unchanged. Either change the arguments or pick a different tool."
+            ));
+        }
+        AttemptDecision::Proceed
+    }
+
+    pub(super) fn record_outcome(&mut self, tool: &str, ok: bool) -> OutcomeDecision {
+        let failures = self.failure_counts.entry(tool.to_string()).or_insert(0);
+        if ok {
+            *failures = 0;
+            return OutcomeDecision::Continue;
+        }
+
+        *failures = failures.saturating_add(1);
+        if *failures >= FAILURE_HALT_THRESHOLD {
+            return OutcomeDecision::Halt(format!(
+                "Stop retrying `{tool}` - it has failed {failures} consecutive times. Choose a different approach."
+            ));
+        }
+        if *failures == FAILURE_WARN_THRESHOLD {
+            return OutcomeDecision::Warn(format!(
+                "Tool `{tool}` has failed {failures} consecutive times this turn."
+            ));
+        }
+        OutcomeDecision::Continue
+    }
+}
+
+fn hash_args(args: &Value) -> u64 {
+    let mut canonical = String::new();
+    write_canonical_json(args, &mut canonical);
+    let mut hasher = DefaultHasher::new();
+    canonical.hash(&mut hasher);
+    hasher.finish()
+}
+
+fn write_canonical_json(value: &Value, out: &mut String) {
+    match value {
+        Value::Null => out.push_str("null"),
+        Value::Bool(value) => out.push_str(if *value { "true" } else { "false" }),
+        Value::Number(value) => {
+            let _ = write!(out, "{value}");
+        }
+        Value::String(value) => {
+            out.push_str(&serde_json::to_string(value).expect("serializing string cannot fail"));
+        }
+        Value::Array(values) => {
+            out.push('[');
+            for (idx, item) in values.iter().enumerate() {
+                if idx > 0 {
+                    out.push(',');
+                }
+                write_canonical_json(item, out);
+            }
+            out.push(']');
+        }
+        Value::Object(values) => {
+            out.push('{');
+            let mut entries = values.iter().collect::<Vec<_>>();
+            entries.sort_by(|a, b| a.0.cmp(b.0));
+            for (idx, (key, item)) in entries.into_iter().enumerate() {
+                if idx > 0 {
+                    out.push(',');
+                }
+                out.push_str(&serde_json::to_string(key).expect("serializing key cannot fail"));
+                out.push(':');
+                write_canonical_json(item, out);
+            }
+            out.push('}');
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn third_identical_tool_call_is_blocked() {
+        let mut guard = LoopGuard::default();
+        let args = json!({"path": "src/main.rs"});
+
+        assert_eq!(
+            guard.record_attempt("read_file", &args),
+            AttemptDecision::Proceed
+        );
+        assert_eq!(
+            guard.record_attempt("read_file", &args),
+            AttemptDecision::Proceed
+        );
+
+        let AttemptDecision::Block(message) = guard.record_attempt("read_file", &args) else {
+            panic!("third identical call should be blocked");
+        };
+        assert!(message.contains("read_file"));
+        assert!(message.contains("already run 3 times"));
+    }
+
+    #[test]
+    fn paginated_reads_are_not_false_positives() {
+        let mut guard = LoopGuard::default();
+
+        for offset in [0, 100, 200] {
+            assert_eq!(
+                guard.record_attempt(
+                    "read_file",
+                    &json!({"path": "src/main.rs", "offset": offset})
+                ),
+                AttemptDecision::Proceed
+            );
+        }
+    }
+
+    #[test]
+    fn tool_failure_counter_warns_at_three_and_halts_at_eight() {
+        let mut guard = LoopGuard::default();
+
+        assert_eq!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Continue
+        );
+        assert_eq!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Continue
+        );
+        assert!(matches!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Warn(message) if message.contains("failed 3 consecutive times")
+        ));
+
+        for _ in 4..8 {
+            assert_eq!(
+                guard.record_outcome("grep_files", false),
+                OutcomeDecision::Continue
+            );
+        }
+        assert!(matches!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Halt(message) if message.contains("failed 8 consecutive times")
+        ));
+    }
+
+    #[test]
+    fn successful_tool_call_resets_failure_counter() {
+        let mut guard = LoopGuard::default();
+
+        assert_eq!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Continue
+        );
+        assert_eq!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Continue
+        );
+        assert_eq!(
+            guard.record_outcome("grep_files", true),
+            OutcomeDecision::Continue
+        );
+        assert_eq!(
+            guard.record_outcome("grep_files", false),
+            OutcomeDecision::Continue
+        );
+    }
+
+    #[test]
+    fn argument_hash_is_independent_of_object_key_order() {
+        let mut guard = LoopGuard::default();
+
+        assert_eq!(
+            guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
+            AttemptDecision::Proceed
+        );
+        assert_eq!(
+            guard.record_attempt("read_file", &json!({"offset": 0, "path": "a"})),
+            AttemptDecision::Proceed
+        );
+        assert!(matches!(
+            guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
+            AttemptDecision::Block(_)
+        ));
+    }
+}
@@ -37,6 +37,7 @@ fn make_plan(
        supports_parallel,
        read_only,
        blocked_error: None,
+        guard_result: None,
    }
 }

@@ -29,6 +29,7 @@ impl Engine {
            ensure_advanced_tooling(&mut tool_catalog);
        }
        let mut active_tool_names = initial_active_tools(&tool_catalog);
+        let mut loop_guard = LoopGuard::default();

        // Transparent stream-retry counter: when the chunked-transfer
        // connection dies mid-stream and we got nothing useful out of it
@@ -974,6 +975,7 @@ impl Engine {
                let mut supports_parallel = false;
                let mut read_only = false;
                let mut blocked_error: Option<ToolError> = None;
+                let mut guard_result: Option<ToolResult> = None;
                if maybe_activate_requested_deferred_tool(
                    &tool_name,
                    &tool_catalog,
@@ -996,8 +998,7 @@ impl Engine {
                {
                    crate::logging::info(format!(
                        "Resolved hallucinated tool name '{}' -> '{}'",
-                        tool_name,
-                        canonical
+                        tool_name, canonical
                    ));
                    tool_def = tool_catalog.iter().find(|d| d.name == canonical);
                    if tool_def.is_some() {
@@ -1067,6 +1068,17 @@ impl Engine {
                    read_only = true;
                }

+                if blocked_error.is_none()
+                    && let AttemptDecision::Block(message) =
+                        loop_guard.record_attempt(&tool_name, &tool_input)
+                {
+                    crate::logging::warn(message.clone());
+                    guard_result = Some(
+                        ToolResult::success(message)
+                            .with_metadata(json!({"loop_guard": "identical_tool_call"})),
+                    );
+                }
+
                plans.push(ToolExecutionPlan {
                    index,
                    id: tool_id,
@@ -1079,6 +1091,7 @@ impl Engine {
                    supports_parallel,
                    read_only,
                    blocked_error,
+                    guard_result,
                });
            }

@@ -1106,6 +1119,26 @@ impl Engine {
            if parallel_allowed {
                let mut tool_tasks = FuturesUnordered::new();
                for plan in plans {
+                    if let Some(result) = plan.guard_result.clone() {
+                        let result = Ok(result);
+                        let _ = self
+                            .tx_event
+                            .send(Event::ToolCallComplete {
+                                id: plan.id.clone(),
+                                name: plan.name.clone(),
+                                result: result.clone(),
+                            })
+                            .await;
+                        outcomes[plan.index] = Some(ToolExecOutcome {
+                            index: plan.index,
+                            id: plan.id,
+                            name: plan.name,
+                            input: plan.input,
+                            started_at: Instant::now(),
+                            result,
+                        });
+                        continue;
+                    }
                    if let Some(err) = plan.blocked_error.clone() {
                        outcomes[plan.index] = Some(ToolExecOutcome {
                            index: plan.index,
@@ -1183,6 +1216,27 @@ impl Engine {
                    let tool_input = plan.input.clone();
                    let tool_caller = plan.caller.clone();

+                    if let Some(result) = plan.guard_result.clone() {
+                        let result = Ok(result);
+                        let _ = self
+                            .tx_event
+                            .send(Event::ToolCallComplete {
+                                id: tool_id.clone(),
+                                name: tool_name.clone(),
+                                result: result.clone(),
+                            })
+                            .await;
+                        outcomes[plan.index] = Some(ToolExecOutcome {
+                            index: plan.index,
+                            id: tool_id,
+                            name: tool_name,
+                            input: tool_input,
+                            started_at: Instant::now(),
+                            result,
+                        });
+                        continue;
+                    }
+
                    if let Some(err) = plan.blocked_error.clone() {
                        let result = Err(err);
                        let _ = self
@@ -1472,6 +1526,7 @@ impl Engine {
            // denial that should not.
            let mut step_error_categories: Vec<ErrorCategory> = Vec::new();
            let mut stop_after_plan_tool = false;
+            let mut loop_guard_halt: Option<String> = None;

            for outcome in outcomes.into_iter().flatten() {
                let duration = outcome.started_at.elapsed();
@@ -1484,6 +1539,16 @@ impl Engine {

                match outcome.result {
                    Ok(output) => {
+                        match loop_guard.record_outcome(&outcome.name, output.success) {
+                            OutcomeDecision::Continue => {}
+                            OutcomeDecision::Warn(message) => {
+                                crate::logging::warn(message.clone());
+                                let _ = self.tx_event.send(Event::status(message)).await;
+                            }
+                            OutcomeDecision::Halt(message) => {
+                                loop_guard_halt.get_or_insert(message);
+                            }
+                        }
                        emit_tool_audit(json!({
                            "event": "tool.result",
                            "tool_id": outcome.id.clone(),
@@ -1526,6 +1591,16 @@ impl Engine {
                        .await;
                    }
                    Err(e) => {
+                        match loop_guard.record_outcome(&outcome.name, false) {
+                            OutcomeDecision::Continue => {}
+                            OutcomeDecision::Warn(message) => {
+                                crate::logging::warn(message.clone());
+                                let _ = self.tx_event.send(Event::status(message)).await;
+                            }
+                            OutcomeDecision::Halt(message) => {
+                                loop_guard_halt.get_or_insert(message);
+                            }
+                        }
                        let envelope: ErrorEnvelope = e.clone().into();
                        emit_tool_audit(json!({
                            "event": "tool.result",
@@ -1567,6 +1642,12 @@ impl Engine {
                break;
            }

+            if let Some(message) = loop_guard_halt {
+                crate::logging::warn(message.clone());
+                let _ = self.tx_event.send(Event::status(message)).await;
+                break;
+            }
+
            if self
                .run_capacity_post_tool_checkpoint(
                    turn,
@@ -1619,16 +1700,6 @@ impl Engine {
                continue;
            }

-            if consecutive_tool_error_steps >= 3 {
-                let _ = self
-                    .tx_event
-                    .send(Event::status(
-                        "Stopping after repeated tool failures. Try a narrower scope or adjust approvals.",
-                    ))
-                    .await;
-                break;
-            }
-
            turn.next_step();
        }

@@ -1,6 +1,6 @@
 // Used by the deferred context-limit handoff feature (#667). The implementation
 // path is staged but not yet wired from the engine; suppress dead-code warnings
-// rather than delete the table, since v0.8.13 will consume it.
+// rather than delete the table until the follow-up feature consumes it.
 #[allow(dead_code)]
 pub const THRESHOLDS: [(f32, &str); 3] = [
    (
@@ -1574,45 +1574,6 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
    }
    println!("  · credential sources: env, ~/.deepseek/config.toml");

-    // #593: surface keyring/config disagreement explicitly. The runtime
-    // resolution order is `keyring → env → config-file`, so a stale
-    // keyring entry from a prior install can shadow the value the user
-    // sees in `~/.deepseek/config.toml`. We only check the DeepSeek
-    // slot — other providers don't write to the keyring today, and
-    // probing entries that aren't there triggers macOS keychain
-    // prompts for nothing.
-    let secrets = deepseek_secrets::Secrets::auto_detect();
-    let keyring_key = secrets.get("deepseek").ok().flatten();
-    let config_key = config
-        .api_key
-        .as_ref()
-        .filter(|v| !v.trim().is_empty() && v.as_str() != "__KEYRING__")
-        .map(|s| s.to_string());
-    match (keyring_key.as_deref(), config_key.as_deref()) {
-        (Some(k), Some(c)) if k.trim() != c.trim() => {
-            println!();
-            println!(
-                "  {} `deepseek`: OS keyring and config.toml hold different values.",
-                "⚠".truecolor(red_r, red_g, red_b)
-            );
-            println!(
-                "    Resolution order is keyring → env → config-file, so the keyring value wins."
-            );
-            println!("    Reconcile by overwriting both with the current key:");
-            println!("        deepseek auth set --provider deepseek");
-            println!(
-                "    (Or paste the key into the in-TUI onboarding screen — it now writes both layers.)"
-            );
-        }
-        (Some(_), None) => {
-            println!(
-                "  {} `deepseek`: key is in OS keyring only (config.toml has no copy).",
-                "·".dimmed()
-            );
-        }
-        _ => {}
-    }
-
    let has_api_key = if config.deepseek_api_key().is_ok() {
        println!(
            "  {} active provider key resolved",
@@ -191,7 +191,8 @@ impl ColorDepth {
 #[must_use]
 pub fn adapt_color(color: Color, depth: ColorDepth) -> Color {
    match (color, depth) {
-        (_, ColorDepth::TrueColor | ColorDepth::Ansi256) => color,
+        (_, ColorDepth::TrueColor) => color,
+        (Color::Rgb(r, g, b), ColorDepth::Ansi256) => Color::Indexed(rgb_to_ansi256(r, g, b)),
        (Color::Rgb(r, g, b), ColorDepth::Ansi16) => nearest_ansi16(r, g, b),
        _ => color,
    }
@@ -203,9 +204,11 @@ pub fn adapt_color(color: Color, depth: ColorDepth) -> Color {
 #[allow(dead_code)]
 #[must_use]
 pub fn adapt_bg(color: Color, depth: ColorDepth) -> Color {
-    match depth {
-        ColorDepth::TrueColor | ColorDepth::Ansi256 => color,
-        ColorDepth::Ansi16 => Color::Reset,
+    match (color, depth) {
+        (_, ColorDepth::TrueColor) => color,
+        (Color::Rgb(r, g, b), ColorDepth::Ansi256) => Color::Indexed(rgb_to_ansi256(r, g, b)),
+        (_, ColorDepth::Ansi256) => color,
+        (_, ColorDepth::Ansi16) => Color::Reset,
    }
 }

@@ -236,7 +239,10 @@ pub fn blend(fg: Color, bg: Color, alpha: f32) -> Color {
 pub fn reasoning_surface_tint(depth: ColorDepth) -> Option<Color> {
    match depth {
        ColorDepth::Ansi16 => None,
-        _ => Some(blend(SURFACE_REASONING, DEEPSEEK_INK, 0.12)),
+        _ => Some(adapt_bg(
+            blend(SURFACE_REASONING, DEEPSEEK_INK, 0.12),
+            depth,
+        )),
    }
 }

@@ -327,12 +333,59 @@ fn nearest_ansi16(r: u8, g: u8, b: u8) -> Color {
    }
 }

+/// Map an RGB color to the nearest xterm 256-color palette index. We use only
+/// the stable 6x6x6 cube and grayscale ramp (16..255), not the terminal's
+/// user-configurable 0..15 colors.
+#[allow(dead_code)]
+fn rgb_to_ansi256(r: u8, g: u8, b: u8) -> u8 {
+    const CUBE_LEVELS: [u8; 6] = [0, 95, 135, 175, 215, 255];
+
+    fn nearest_cube_level(channel: u8) -> usize {
+        CUBE_LEVELS
+            .iter()
+            .enumerate()
+            .min_by_key(|(_, level)| channel.abs_diff(**level))
+            .map(|(idx, _)| idx)
+            .unwrap_or(0)
+    }
+
+    fn dist_sq(a: (u8, u8, u8), b: (u8, u8, u8)) -> u32 {
+        let dr = i32::from(a.0) - i32::from(b.0);
+        let dg = i32::from(a.1) - i32::from(b.1);
+        let db = i32::from(a.2) - i32::from(b.2);
+        (dr * dr + dg * dg + db * db) as u32
+    }
+
+    let ri = nearest_cube_level(r);
+    let gi = nearest_cube_level(g);
+    let bi = nearest_cube_level(b);
+    let cube_rgb = (CUBE_LEVELS[ri], CUBE_LEVELS[gi], CUBE_LEVELS[bi]);
+    let cube_index = 16 + (36 * ri) as u8 + (6 * gi) as u8 + bi as u8;
+
+    let avg = ((u16::from(r) + u16::from(g) + u16::from(b)) / 3) as u8;
+    let gray_i = if avg <= 8 {
+        0
+    } else if avg >= 238 {
+        23
+    } else {
+        ((u16::from(avg) - 8 + 5) / 10).min(23) as u8
+    };
+    let gray = 8 + 10 * gray_i;
+    let gray_index = 232 + gray_i;
+
+    if dist_sq((r, g, b), (gray, gray, gray)) < dist_sq((r, g, b), cube_rgb) {
+        gray_index
+    } else {
+        cube_index
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::{
        ACCENT_REASONING_LIVE, ColorDepth, DEEPSEEK_INK, DEEPSEEK_RED, DEEPSEEK_SKY,
        SURFACE_REASONING, adapt_bg, adapt_color, blend, nearest_ansi16, pulse_brightness,
-        reasoning_surface_tint,
+        reasoning_surface_tint, rgb_to_ansi256,
    };
    use ratatui::style::Color;

@@ -340,7 +393,23 @@ mod tests {
    fn adapt_color_passes_through_truecolor() {
        let c = Color::Rgb(53, 120, 229);
        assert_eq!(adapt_color(c, ColorDepth::TrueColor), c);
-        assert_eq!(adapt_color(c, ColorDepth::Ansi256), c);
+    }
+
+    #[test]
+    fn adapt_color_maps_rgb_to_indexed_on_ansi256() {
+        let c = Color::Rgb(53, 120, 229);
+        assert!(matches!(
+            adapt_color(c, ColorDepth::Ansi256),
+            Color::Indexed(_)
+        ));
+    }
+
+    #[test]
+    fn adapt_bg_maps_rgb_to_indexed_on_ansi256() {
+        assert!(matches!(
+            adapt_bg(SURFACE_REASONING, ColorDepth::Ansi256),
+            Color::Indexed(_)
+        ));
    }

    #[test]
@@ -370,6 +439,10 @@ mod tests {
    fn reasoning_tint_is_none_on_ansi16() {
        assert!(reasoning_surface_tint(ColorDepth::Ansi16).is_none());
        assert!(reasoning_surface_tint(ColorDepth::TrueColor).is_some());
+        assert!(matches!(
+            reasoning_surface_tint(ColorDepth::Ansi256),
+            Some(Color::Indexed(_))
+        ));
    }

    #[test]
@@ -424,6 +497,12 @@ mod tests {
        assert_eq!(nearest_ansi16(11, 21, 38), Color::Black);
    }

+    #[test]
+    fn rgb_to_ansi256_uses_stable_extended_palette() {
+        assert!(rgb_to_ansi256(53, 120, 229) >= 16);
+        assert!(rgb_to_ansi256(11, 21, 38) >= 16);
+    }
+
    #[test]
    fn color_depth_detect_is_safe_without_env() {
        // Don't try to pin the result — env may be anything in CI. Just
@@ -36,7 +36,7 @@ use crate::localization::normalize_configured_locale;
 /// ```
 //
 // NOTE: the loader is defined but not yet called from startup — wiring is
-// deferred to v0.8.13 (#657). The `#[allow(dead_code)]` suppresses the CI
+// deferred to a later settings pass (#657). The `#[allow(dead_code)]` suppresses the CI
 // `-D warnings` failure until the call site lands.
 #[allow(dead_code)]
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -63,7 +63,7 @@ impl Default for TuiPrefs {
 }

 /// Per-action keybinding overrides stored inside [`TuiPrefs`].
-#[allow(dead_code)] // see TuiPrefs note above; deferred to v0.8.13 (#657).
+#[allow(dead_code)] // see TuiPrefs note above; deferred to a later settings pass (#657).
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 #[serde(default)]
 pub struct KeybindPrefs {
@@ -84,7 +84,7 @@ pub struct KeybindPrefs {
    pub toggle_sidebar: Option<String>,
 }

-#[allow(dead_code)] // see TuiPrefs note above; deferred to v0.8.13 (#657).
+#[allow(dead_code)] // see TuiPrefs note above; deferred to a later settings pass (#657).
 impl TuiPrefs {
    /// Return the canonical path of the TUI preferences file:
    /// `~/.deepseek/tui.toml`.
@@ -50,8 +50,7 @@ fn collapse_nullable_unions(schema: &mut Value) {
            Some(arr) => arr.clone(),
            None => continue,
        };
-        let (nulls, nons): (Vec<_>, Vec<_>) =
-            members.into_iter().partition(|m| is_null_type(m));
+        let (nulls, nons): (Vec<_>, Vec<_>) = members.into_iter().partition(is_null_type);
        if nulls.len() == 1 && nons.len() == 1 {
            obj.remove(key);
            if let Value::Object(non_obj) = nons.into_iter().next().unwrap() {
@@ -85,10 +84,7 @@ fn inject_properties_on_bare_objects(schema: &mut Value) {
    if obj.contains_key("properties") || obj.contains_key("additionalProperties") {
        return;
    }
-    obj.insert(
-        "properties".into(),
-        Value::Object(Map::new()),
-    );
+    obj.insert("properties".into(), Value::Object(Map::new()));
 }

 /// Remove entries from `required` that aren't keys in `properties`.
@@ -0,0 +1,169 @@
+//! Terminal color compatibility shim.
+//!
+//! Ratatui's crossterm backend emits truecolor SGR for every `Color::Rgb`
+//! cell. That is correct for truecolor terminals, but macOS Terminal.app often
+//! advertises only `xterm-256color`; sending `38;2` / `48;2` there can render
+//! as stray green/cyan backgrounds. This backend adapts every cell to the
+//! detected color depth before handing it to crossterm.
+
+use std::io::{self, Write};
+
+use ratatui::{
+    backend::{Backend, ClearType, CrosstermBackend, WindowSize},
+    buffer::Cell,
+    layout::{Position, Size},
+};
+
+use crate::palette::{self, ColorDepth};
+
+#[derive(Debug)]
+pub(crate) struct ColorCompatBackend<W: Write> {
+    inner: CrosstermBackend<W>,
+    depth: ColorDepth,
+}
+
+impl<W: Write> ColorCompatBackend<W> {
+    pub(crate) fn new(writer: W, depth: ColorDepth) -> Self {
+        Self {
+            inner: CrosstermBackend::new(writer),
+            depth,
+        }
+    }
+}
+
+impl<W: Write> Write for ColorCompatBackend<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.inner.write(buf)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Write::flush(&mut self.inner)
+    }
+}
+
+impl<W: Write> Backend for ColorCompatBackend<W> {
+    fn draw<'a, I>(&mut self, content: I) -> io::Result<()>
+    where
+        I: Iterator<Item = (u16, u16, &'a Cell)>,
+    {
+        let adapted = content
+            .map(|(x, y, cell)| {
+                let mut cell = cell.clone();
+                adapt_cell_colors(&mut cell, self.depth);
+                (x, y, cell)
+            })
+            .collect::<Vec<_>>();
+        self.inner
+            .draw(adapted.iter().map(|(x, y, cell)| (*x, *y, cell)))
+    }
+
+    fn append_lines(&mut self, n: u16) -> io::Result<()> {
+        self.inner.append_lines(n)
+    }
+
+    fn hide_cursor(&mut self) -> io::Result<()> {
+        self.inner.hide_cursor()
+    }
+
+    fn show_cursor(&mut self) -> io::Result<()> {
+        self.inner.show_cursor()
+    }
+
+    fn get_cursor_position(&mut self) -> io::Result<Position> {
+        self.inner.get_cursor_position()
+    }
+
+    fn set_cursor_position<P: Into<Position>>(&mut self, position: P) -> io::Result<()> {
+        self.inner.set_cursor_position(position)
+    }
+
+    fn clear(&mut self) -> io::Result<()> {
+        self.inner.clear()
+    }
+
+    fn clear_region(&mut self, clear_type: ClearType) -> io::Result<()> {
+        self.inner.clear_region(clear_type)
+    }
+
+    fn size(&self) -> io::Result<Size> {
+        self.inner.size()
+    }
+
+    fn window_size(&mut self) -> io::Result<WindowSize> {
+        self.inner.window_size()
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Backend::flush(&mut self.inner)
+    }
+}
+
+fn adapt_cell_colors(cell: &mut Cell, depth: ColorDepth) {
+    cell.fg = palette::adapt_color(cell.fg, depth);
+    cell.bg = palette::adapt_bg(cell.bg, depth);
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{cell::RefCell, io::Write, rc::Rc};
+
+    use ratatui::backend::Backend;
+    use ratatui::{buffer::Cell, style::Color};
+
+    use super::*;
+
+    #[derive(Clone, Default)]
+    struct SharedWriter(Rc<RefCell<Vec<u8>>>);
+
+    impl Write for SharedWriter {
+        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+            self.0.borrow_mut().extend_from_slice(buf);
+            Ok(buf.len())
+        }
+
+        fn flush(&mut self) -> io::Result<()> {
+            Ok(())
+        }
+    }
+
+    #[test]
+    fn adapts_rgb_cells_to_indexed_on_ansi256() {
+        let mut cell = Cell::default();
+        cell.set_fg(Color::Rgb(53, 120, 229));
+        cell.set_bg(Color::Rgb(11, 21, 38));
+
+        adapt_cell_colors(&mut cell, ColorDepth::Ansi256);
+
+        assert!(matches!(cell.fg, Color::Indexed(_)));
+        assert!(matches!(cell.bg, Color::Indexed(_)));
+    }
+
+    #[test]
+    fn leaves_truecolor_cells_unchanged() {
+        let mut cell = Cell::default();
+        cell.set_fg(Color::Rgb(53, 120, 229));
+        cell.set_bg(Color::Rgb(11, 21, 38));
+
+        adapt_cell_colors(&mut cell, ColorDepth::TrueColor);
+
+        assert_eq!(cell.fg, Color::Rgb(53, 120, 229));
+        assert_eq!(cell.bg, Color::Rgb(11, 21, 38));
+    }
+
+    #[test]
+    fn ansi256_backend_output_does_not_emit_truecolor_sgr() {
+        let writer = SharedWriter::default();
+        let capture = writer.0.clone();
+        let mut backend = ColorCompatBackend::new(writer, ColorDepth::Ansi256);
+        let mut cell = Cell::default();
+        cell.set_symbol("x")
+            .set_fg(Color::Rgb(53, 120, 229))
+            .set_bg(Color::Rgb(11, 21, 38));
+
+        backend.draw(std::iter::once((0, 0, &cell))).unwrap();
+
+        let output = String::from_utf8_lossy(&capture.borrow()).to_string();
+        assert!(!output.contains("38;2;"), "{output:?}");
+        assert!(!output.contains("48;2;"), "{output:?}");
+    }
+}
@@ -22,9 +22,11 @@ use crossterm::{
    execute,
    terminal::{EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode},
 };
-use ratatui::{Terminal, backend::CrosstermBackend};
+use ratatui::Terminal;
 use tempfile::Builder;

+use super::color_compat::ColorCompatBackend;
+
 /// Outcome of a single external-editor invocation.
 #[derive(Debug, PartialEq, Eq)]
 pub enum EditorOutcome {
@@ -120,8 +122,8 @@ pub fn run_editor_raw(seed: &str) -> io::Result<EditorOutcome> {
 ///
 /// On any error (raw-mode toggle, IO, editor spawn failure), the function
 /// still attempts to fully restore the terminal before returning.
-pub fn spawn_editor_for_input(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+pub(crate) fn spawn_editor_for_input(
+    terminal: &mut Terminal<ColorCompatBackend<Stdout>>,
    use_alt_screen: bool,
    use_mouse_capture: bool,
    use_bracketed_paste: bool,
@@ -7,6 +7,7 @@ pub mod app;
 pub mod approval;
 pub mod backtrack;
 pub mod clipboard;
+mod color_compat;
 pub mod command_palette;
 pub mod context_inspector;
 pub mod context_menu;
@@ -18,7 +18,6 @@ use crossterm::{
 };
 use ratatui::{
    Frame, Terminal,
-    backend::CrosstermBackend,
    layout::{Constraint, Direction, Layout, Rect},
    prelude::Widget,
    style::{Color, Style},
@@ -52,6 +51,7 @@ use crate::task_manager::{
 };
 use crate::tools::spec::RuntimeToolServices;
 use crate::tools::subagent::SubAgentStatus;
+use crate::tui::color_compat::ColorCompatBackend;
 use crate::tui::command_palette::{
    CommandPaletteView, build_entries as build_command_palette_entries,
 };
@@ -132,6 +132,8 @@ const WORKSPACE_CONTEXT_REFRESH_SECS: u64 = 15;
 const SIDEBAR_VISIBLE_MIN_WIDTH: u16 = 100;
 const DEFAULT_TERMINAL_PROBE_TIMEOUT_MS: u64 = 500;

+type AppTerminal = Terminal<ColorCompatBackend<Stdout>>;
+
 /// Run the interactive TUI event loop.
 ///
 /// # Examples
@@ -230,7 +232,9 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> {
            "PushKeyboardEnhancementFlags ignored (terminal lacks support)"
        );
    }
-    let backend = CrosstermBackend::new(stdout);
+    let color_depth = palette::ColorDepth::detect();
+    tracing::debug!(?color_depth, "terminal color depth detected");
+    let backend = ColorCompatBackend::new(stdout, color_depth);
    let mut terminal = Terminal::new(backend)?;
    terminal.clear()?;
    let event_broker = EventBroker::new();
@@ -576,7 +580,7 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag

 #[allow(clippy::too_many_lines)]
 async fn run_event_loop(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    app: &mut App,
    config: &mut Config,
    mut engine_handle: EngineHandle,
@@ -3289,7 +3293,7 @@ async fn dispatch_user_message(
        app.model.clone()
    };

-    engine_handle
+    if let Err(err) = engine_handle
        .send(Op::SendMessage {
            content,
            mode: app.mode,
@@ -3300,7 +3304,12 @@ async fn dispatch_user_message(
            trust_mode: app.trust_mode,
            auto_approve: app.mode == AppMode::Yolo,
        })
-        .await?;
+        .await
+    {
+        app.is_loading = false;
+        app.last_send_at = None;
+        return Err(err);
+    }

    Ok(())
 }
@@ -3815,7 +3824,7 @@ fn workspace_path_to_picker_string(path: &Path) -> Option<String> {
 }

 async fn apply_command_result(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    app: &mut App,
    engine_handle: &mut EngineHandle,
    task_manager: &SharedTaskManager,
@@ -4305,7 +4314,7 @@ fn handle_shell_job_action(app: &mut App, action: crate::tui::app::ShellJobActio
 }

 async fn execute_command_input(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    app: &mut App,
    engine_handle: &mut EngineHandle,
    task_manager: &SharedTaskManager,
@@ -4864,7 +4873,7 @@ fn toggle_live_transcript_overlay(app: &mut App) {
 }

 async fn handle_view_events(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    app: &mut App,
    config: &mut Config,
    task_manager: &SharedTaskManager,
@@ -5552,7 +5561,7 @@ fn run_git_query(workspace: &Path, args: &[&str]) -> std::io::Result<String> {
 }

 fn pause_terminal(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    use_alt_screen: bool,
    use_mouse_capture: bool,
    use_bracketed_paste: bool,
@@ -5576,7 +5585,7 @@ fn pause_terminal(
 }

 fn resume_terminal(
-    terminal: &mut Terminal<CrosstermBackend<Stdout>>,
+    terminal: &mut AppTerminal,
    use_alt_screen: bool,
    use_mouse_capture: bool,
    use_bracketed_paste: bool,
@@ -695,6 +695,30 @@ async fn model_change_update_syncs_engine_model_before_compaction() {
    }
 }

+#[tokio::test]
+async fn dispatch_user_message_failed_send_clears_loading_state() {
+    let mut app = create_test_app();
+    let engine = mock_engine_handle();
+    drop(engine.rx_op);
+
+    let result = dispatch_user_message(
+        &mut app,
+        &engine.handle,
+        QueuedMessage::new("hello".to_string(), None),
+    )
+    .await;
+
+    assert!(
+        result.is_err(),
+        "dispatch should fail when engine channel is closed"
+    );
+    assert!(
+        !app.is_loading,
+        "failed dispatch must not leave the composer in a permanent busy state"
+    );
+    assert!(app.last_send_at.is_none());
+}
+
 fn init_git_repo() -> TempDir {
    let dir = tempfile::tempdir().expect("tempdir");

@@ -1,7 +1,7 @@
 {
  "name": "deepseek-tui",
-  "version": "0.8.12",
-  "deepseekBinaryVersion": "0.8.12",
+  "version": "0.8.13",
+  "deepseekBinaryVersion": "0.8.13",
  "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
  "author": "Hmbown",
  "license": "MIT",