chore(release): finalize v0.8.13 stabilization

This commit is contained in:
Hunter Bown
2026-05-05 13:06:09 -05:00
parent 6b0a01d054
commit c4cbd7c19f
34 changed files with 1860 additions and 230 deletions
+43
View File
@@ -5,6 +5,49 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.8.13] - 2026-05-05
A stabilization release for DeepSeek V4 runtime and TUI reliability. The
v0.8.13 milestone was narrowed to direct runtime/TUI fixes; prompt hygiene,
trajectory logging, Anthropic-wire support, and larger UI cleanup were moved
out of this release.
### Added
- **No-LLM tool-result prune before compaction** (#710) — old verbose tool
results are mechanically summarized before the paid summary pass. Duplicate
reads keep the freshest full body and replace older copies with one-line
summaries; if that gets the session back under the compaction threshold, the
LLM summary call is skipped entirely.
- **Repeated-tool anti-loop guard** (#714) — the engine now tracks
`(tool_name, args)` pairs per user turn. On the third identical call it
inserts a synthetic corrective tool result instead of running the same tool
again unchanged; per-tool failures warn at three and halt at eight.
- **V4 cache-hit telemetry fallback** (#721) — usage parsing now recognizes
`usage.prompt_tokens_details.cached_tokens`, so the existing footer cache-hit
chip works with DeepSeek V4's automatic prefix-cache telemetry as well as the
older explicit hit/miss fields.
### Fixed
- **Invalid tool-call JSON repair** (#712) — malformed streamed tool arguments
now pass through a deterministic repair ladder before dispatch.
- **Hallucinated tool-name recovery** (#713) — common non-canonical tool names
are resolved through the registry before the engine reports a missing tool.
- **Tool-schema sanitation** (#715) — schemas are normalized before API
emission so provider-strict JSON Schema handling does not reject valid tools.
- **Case-sensitive model IDs** (#717, #729) — valid configured model IDs keep
caller-provided case while compact DeepSeek aliases still canonicalize.
- **Stale `working...` state after failed dispatch** (#738) — if the UI fails
to send a message to the engine before a turn starts, the composer loading
state is cleared instead of trapping later input in pending state.
- **Prompt-free doctor key checks** — `deepseek doctor` no longer reads the OS
keyring, avoiding macOS Keychain prompts during diagnostics.
- **macOS Terminal color compatibility** — `xterm-256color` sessions now
receive 256-color palette indexes instead of truecolor SGR, preventing
Apple Terminal from misrendering whale blues as green/cyan blocks.
- **Chat client repair after Responses cleanup** — restored the chat client
body and regression coverage after removing the dead experimental Responses
fallback path.
## [0.8.11] - 2026-05-04
### Changed
Generated
+14 -14
View File
@@ -1080,7 +1080,7 @@ dependencies = [
[[package]]
name = "deepseek-agent"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"deepseek-config",
"serde",
@@ -1088,7 +1088,7 @@ dependencies = [
[[package]]
name = "deepseek-app-server"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"axum",
@@ -1110,7 +1110,7 @@ dependencies = [
[[package]]
name = "deepseek-config"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"deepseek-secrets",
@@ -1122,7 +1122,7 @@ dependencies = [
[[package]]
name = "deepseek-core"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"chrono",
@@ -1140,7 +1140,7 @@ dependencies = [
[[package]]
name = "deepseek-execpolicy"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1149,7 +1149,7 @@ dependencies = [
[[package]]
name = "deepseek-hooks"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"async-trait",
@@ -1163,7 +1163,7 @@ dependencies = [
[[package]]
name = "deepseek-mcp"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"serde",
@@ -1172,7 +1172,7 @@ dependencies = [
[[package]]
name = "deepseek-protocol"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"serde",
"serde_json",
@@ -1180,7 +1180,7 @@ dependencies = [
[[package]]
name = "deepseek-secrets"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"dirs",
"keyring",
@@ -1193,7 +1193,7 @@ dependencies = [
[[package]]
name = "deepseek-state"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"chrono",
@@ -1205,7 +1205,7 @@ dependencies = [
[[package]]
name = "deepseek-tools"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"async-trait",
@@ -1218,7 +1218,7 @@ dependencies = [
[[package]]
name = "deepseek-tui"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"arboard",
@@ -1277,7 +1277,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-cli"
version = "0.8.12"
version = "0.8.13"
dependencies = [
"anyhow",
"chrono",
@@ -1301,7 +1301,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-core"
version = "0.8.12"
version = "0.8.13"
[[package]]
name = "deranged"
+1 -1
View File
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
resolver = "2"
[workspace.package]
version = "0.8.12"
version = "0.8.13"
edition = "2024"
# Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the
# codebase relies on extensively. Cargo enforces this so users on older
+10 -16
View File
@@ -174,24 +174,18 @@ SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model d
---
## What's New In v0.8.12
## What's New In v0.8.13
A feature release with 20 community PRs on top of the v0.8.11 cache-maxing foundation. [Full changelog](CHANGELOG.md).
A stabilization release focused on DeepSeek V4 runtime reliability, tool-call recovery, and TUI truthfulness. [Full changelog](CHANGELOG.md).
- **Reasoning-effort auto mode**`reasoning_effort = "auto"` picks the right tier from the prompt: debug/error → Max, search/lookup → Low, default → High
- **Bash arity dictionary**`auto_allow = ["git status"]` matches `git status -s` but not `git push`. Knows git, cargo, npm, docker, kubectl, and more
- **Vim modal editing** — normal/insert mode in the composer with standard Vim keybindings
- **Skill registry sync**`/skills sync` fetches and installs/updates the community registry
- **FIM edit tool** — surgical code edits via DeepSeek's `/beta` fill-in-the-middle endpoint
- **Large-tool-output routing** — outsized tool results get truncated previews with spillover, protecting parent context
- **Pluggable sandbox backends**`exec_shell` can route to Alibaba OpenSandbox or other remote backends
- **Layered permission rulesets** — builtin/agent/user priority layers for execpolicy deny/allow rules
- **Cache-aware resident sub-agents** — file content prepended for V4 prefix-cache locality; global lease table
- **Unified slash-command namespace** — user commands with `$1`/`$2`/`$ARGUMENTS` templates
- **Color::Reset migration** — all hardcoded backgrounds replaced with `Color::Reset` for light-terminal support
- **New docs**: SECURITY.md (#648), CODE_OF_CONDUCT.md (#686), zh-Hans locale activation (#652)
*28 community PRs by [@merchloubna70-dot](https://github.com/merchloubna70-dot). First-time contributor [@zichen0116](https://github.com/zichen0116) (#686).*
- **No-LLM compaction prune** — old verbose tool results are mechanically summarized before any paid summary call; duplicate reads keep only the freshest full body
- **Repeated-tool anti-loop guard**the third identical `(tool, args)` call in a turn becomes a corrective tool result instead of another stuck retry
- **V4 cache-hit footer telemetry** — the status line now understands `usage.prompt_tokens_details.cached_tokens`
- **Tool-call recovery** — invalid JSON arguments, hallucinated tool names, and strict schema issues are repaired or sanitized before dispatch
- **Case-sensitive model IDs** — provider-specific model names keep caller-provided case while compact DeepSeek aliases still normalize
- **Stale busy-state fix** — failed dispatch before turn start clears `working...` so input does not get stuck in pending state
- **Prompt-free doctor key checks** — diagnostics no longer read the OS keyring
- **macOS Terminal color compatibility**`xterm-256color` sessions no longer render whale blues as green/cyan blocks
---
+10 -16
View File
@@ -168,24 +168,18 @@ SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model d
---
## v0.8.12 新功能
## v0.8.13 新功能
功能发布:在 v0.8.11 缓存优化基础上合并了 20 个社区 PR。[完整更新日志](CHANGELOG.md)。
稳定性发布:聚焦 DeepSeek V4 运行时可靠性、工具调用恢复和 TUI 状态准确性。[完整更新日志](CHANGELOG.md)。
- **推理强度自动模式** —— `reasoning_effort = "auto"` 根据提示词自动选择档位:debug/error → Maxsearch/lookup → Low,默认 → High
- **Bash 参数匹配字典** —— `auto_allow = ["git status"]` 匹配 `git status -s` 但不匹配 `git push`。支持 git、cargo、npm、docker、kubectl 等
- **Vim 模态编辑** —— 在输入框中支持 Vim 普通/插入模式切换
- **技能注册表同步** —— `/skills sync` 拉取并安装/更新社区技能注册表
- **FIM 编辑工具** —— 通过 DeepSeek `/beta` 的 fill-in-the-middle 端点进行精确代码编辑
- **大工具输出路由** —— 超大工具结果被截断预览,保护父上下文窗口
- **可插拔沙箱后端** —— `exec_shell` 可路由到 Alibaba OpenSandbox 或其他远程后端
- **分层权限规则** —— builtin/agent/user 三层优先级,deny 永远优先
- **缓存感知常驻子智能体** —— 文件内容预置于系统提示中以利用 V4 前缀缓存;全局租约表
- **统一斜杠命令命名空间** —— 用户命令支持 `$1`/`$2`/`$ARGUMENTS` 模板
- **Color::Reset 迁移** —— 所有硬编码背景替换为 `Color::Reset`,适配浅色终端
- **新文档**SECURITY.md (#648)、CODE_OF_CONDUCT.md (#686)、zh-Hans 语言激活 (#652)
**28 个社区 PR 由 [@merchloubna70-dot](https://github.com/merchloubna70-dot) 贡献。首次贡献者 [@zichen0116](https://github.com/zichen0116) (#686)。**
- **无需 LLM 的压缩预剪枝** —— 付费摘要前先机械压缩旧的大型工具结果;重复读取只保留最新完整内容
- **重复工具调用防循环** —— 同一轮内第三次完全相同的 `(tool, args)` 会变成纠正性工具结果,而不是继续卡住重试
- **V4 缓存命中率状态栏** —— 状态栏现在识别 `usage.prompt_tokens_details.cached_tokens`
- **工具调用恢复** —— 无效 JSON 参数、幻觉工具名和严格 schema 问题会在分发前修复或清理
- **区分大小写的模型 ID** —— 第三方 provider 的模型名保留用户输入大小写,同时继续规范化紧凑 DeepSeek 别名
- **忙碌状态修复** —— 如果 turn 开始前分发失败,会清除 `working...`,避免后续输入一直进入 pending
- **不会弹出 Keychain 的 doctor 密钥检查** —— 诊断流程不再读取 OS keyring
- **macOS Terminal 颜色兼容** —— `xterm-256color` 会使用 256 色索引,避免鲸蓝主题被渲染成绿色/青色块
---
+1 -1
View File
@@ -7,5 +7,5 @@ repository.workspace = true
description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
[dependencies]
deepseek-config = { path = "../config", version = "0.8.12" }
deepseek-config = { path = "../config", version = "0.8.13" }
serde.workspace = true
+9 -9
View File
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
anyhow.workspace = true
axum.workspace = true
clap.workspace = true
deepseek-agent = { path = "../agent", version = "0.8.12" }
deepseek-config = { path = "../config", version = "0.8.12" }
deepseek-core = { path = "../core", version = "0.8.12" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
deepseek-hooks = { path = "../hooks", version = "0.8.12" }
deepseek-mcp = { path = "../mcp", version = "0.8.12" }
deepseek-protocol = { path = "../protocol", version = "0.8.12" }
deepseek-state = { path = "../state", version = "0.8.12" }
deepseek-tools = { path = "../tools", version = "0.8.12" }
deepseek-agent = { path = "../agent", version = "0.8.13" }
deepseek-config = { path = "../config", version = "0.8.13" }
deepseek-core = { path = "../core", version = "0.8.13" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
deepseek-hooks = { path = "../hooks", version = "0.8.13" }
deepseek-mcp = { path = "../mcp", version = "0.8.13" }
deepseek-protocol = { path = "../protocol", version = "0.8.13" }
deepseek-state = { path = "../state", version = "0.8.13" }
deepseek-tools = { path = "../tools", version = "0.8.13" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+7 -7
View File
@@ -14,13 +14,13 @@ path = "src/main.rs"
anyhow.workspace = true
clap.workspace = true
clap_complete.workspace = true
deepseek-agent = { path = "../agent", version = "0.8.12" }
deepseek-app-server = { path = "../app-server", version = "0.8.12" }
deepseek-config = { path = "../config", version = "0.8.12" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
deepseek-mcp = { path = "../mcp", version = "0.8.12" }
deepseek-secrets = { path = "../secrets", version = "0.8.12" }
deepseek-state = { path = "../state", version = "0.8.12" }
deepseek-agent = { path = "../agent", version = "0.8.13" }
deepseek-app-server = { path = "../app-server", version = "0.8.13" }
deepseek-config = { path = "../config", version = "0.8.13" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
deepseek-mcp = { path = "../mcp", version = "0.8.13" }
deepseek-secrets = { path = "../secrets", version = "0.8.13" }
deepseek-state = { path = "../state", version = "0.8.13" }
chrono.workspace = true
dirs.workspace = true
serde.workspace = true
+1 -1
View File
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
[dependencies]
anyhow.workspace = true
deepseek-secrets = { path = "../secrets", version = "0.8.12" }
deepseek-secrets = { path = "../secrets", version = "0.8.13" }
dirs.workspace = true
serde.workspace = true
toml.workspace = true
+8 -8
View File
@@ -9,13 +9,13 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
deepseek-agent = { path = "../agent", version = "0.8.12" }
deepseek-config = { path = "../config", version = "0.8.12" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.12" }
deepseek-hooks = { path = "../hooks", version = "0.8.12" }
deepseek-mcp = { path = "../mcp", version = "0.8.12" }
deepseek-protocol = { path = "../protocol", version = "0.8.12" }
deepseek-state = { path = "../state", version = "0.8.12" }
deepseek-tools = { path = "../tools", version = "0.8.12" }
deepseek-agent = { path = "../agent", version = "0.8.13" }
deepseek-config = { path = "../config", version = "0.8.13" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.8.13" }
deepseek-hooks = { path = "../hooks", version = "0.8.13" }
deepseek-mcp = { path = "../mcp", version = "0.8.13" }
deepseek-protocol = { path = "../protocol", version = "0.8.13" }
deepseek-state = { path = "../state", version = "0.8.13" }
deepseek-tools = { path = "../tools", version = "0.8.13" }
serde_json.workspace = true
uuid.workspace = true
+1 -1
View File
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
[dependencies]
anyhow.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.8.12" }
deepseek-protocol = { path = "../protocol", version = "0.8.13" }
serde.workspace = true
+1 -1
View File
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
anyhow.workspace = true
async-trait.workspace = true
chrono.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.8.12" }
deepseek-protocol = { path = "../protocol", version = "0.8.13" }
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
+10 -2
View File
@@ -88,10 +88,18 @@ impl DefaultKeyringStore {
/// Probe the OS keyring without writing anything. Returns `Ok(())` if
/// a backend is reachable, otherwise an error describing why not.
pub fn probe(&self) -> Result<(), SecretsError> {
// `Entry::new` is enough to surface "no backend / no storage" on
// headless Linux; no actual read happens until `.get_password()`.
// `Entry::new` is enough to validate the native macOS/Windows
// backend path. Avoid a dummy read there because it can trigger
// a second user-visible Keychain/Credential Manager access before
// the real provider key lookup.
let entry = keyring::Entry::new(&self.service, "__probe__")
.map_err(|err| SecretsError::Keyring(err.to_string()))?;
#[cfg(any(target_os = "macos", target_os = "windows"))]
{
let _ = entry;
Ok(())
}
#[cfg(not(any(target_os = "macos", target_os = "windows")))]
match entry.get_password() {
Ok(_) | Err(keyring::Error::NoEntry) => Ok(()),
Err(keyring::Error::PlatformFailure(err)) => {
+1 -1
View File
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.8.12" }
deepseek-protocol = { path = "../protocol", version = "0.8.13" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+2 -2
View File
@@ -21,8 +21,8 @@ path = "src/main.rs"
[dependencies]
anyhow = "1.0.100"
arboard = "3.4"
deepseek-secrets = { path = "../secrets", version = "0.8.12" }
deepseek-tools = { path = "../tools", version = "0.8.12" }
deepseek-secrets = { path = "../secrets", version = "0.8.13" }
deepseek-tools = { path = "../tools", version = "0.8.13" }
schemaui = { version = "0.12.0", default-features = false, optional = true }
async-stream = "0.3.6"
async-trait = "0.1"
+830 -48
View File
@@ -1,10 +1,8 @@
//! HTTP client for DeepSeek's OpenAI-compatible Chat Completions API.
//!
//! DeepSeek documents `/chat/completions` as the primary endpoint. A legacy
//! Responses probe remains available behind `DEEPSEEK_EXPERIMENTAL_RESPONSES_API`
//! for local compatibility experiments, but normal traffic uses chat completions.
//! DeepSeek documents `/chat/completions` as the primary endpoint, and this
//! client now routes all normal traffic through that surface.
use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
use std::sync::{Arc, Mutex as StdMutex, OnceLock};
use std::time::{Duration, Instant};
@@ -16,8 +14,7 @@ use tokio::sync::Mutex as AsyncMutex;
use crate::config::{ApiProvider, Config, RetryPolicy};
use crate::llm_client::{
LlmClient, LlmError, RetryConfig as LlmRetryConfig, StreamEventBox, extract_retry_after,
with_retry,
LlmClient, LlmError, RetryConfig as LlmRetryConfig, extract_retry_after, with_retry,
};
use crate::logging;
use crate::models::{MessageRequest, MessageResponse, ServerToolUsage, SystemPrompt, Usage};
@@ -130,15 +127,10 @@ pub struct DeepSeekClient {
pub(super) api_provider: ApiProvider,
retry: RetryPolicy,
default_model: String,
use_chat_completions: AtomicBool,
/// Counter of chat-completions requests since last experimental Responses API probe.
/// After RESPONSES_RECOVERY_INTERVAL requests, we retry the Responses API when
connection_health: Arc<AsyncMutex<ConnectionHealth>>,
rate_limiter: Arc<AsyncMutex<TokenBucket>>,
}
/// After this many chat-completions requests, retry the experimental Responses
/// API to see if it has recovered.
const CONNECTION_FAILURE_THRESHOLD: u32 = 2;
const RECOVERY_PROBE_COOLDOWN: Duration = Duration::from_secs(15);
@@ -302,8 +294,817 @@ impl Clone for DeepSeekClient {
api_provider: self.api_provider,
retry: self.retry.clone(),
default_model: self.default_model.clone(),
use_chat_completions: AtomicBool::new(
self.use_chat_completions.load(Ordering::Relaxed),
connection_health: self.connection_health.clone(),
rate_limiter: self.rate_limiter.clone(),
}
}
}
// === Helpers ===
/// Maximum bytes to read from an error response body (64 KB).
pub(super) const ERROR_BODY_MAX_BYTES: usize = 64 * 1024;
/// Read an error response body with a size limit to prevent unbounded allocation.
pub(super) async fn bounded_error_text(response: reqwest::Response, max_bytes: usize) -> String {
use futures_util::StreamExt;
let mut stream = response.bytes_stream();
let mut buf = Vec::with_capacity(max_bytes.min(8192));
while let Some(chunk) = stream.next().await {
let Ok(chunk) = chunk else { break };
let remaining = max_bytes.saturating_sub(buf.len());
if remaining == 0 {
break;
}
buf.extend_from_slice(&chunk[..chunk.len().min(remaining)]);
}
String::from_utf8_lossy(&buf).into_owned()
}
fn validate_base_url_security(base_url: &str) -> Result<()> {
if base_url.starts_with("https://")
|| base_url.starts_with("http://localhost")
|| base_url.starts_with("http://127.0.0.1")
|| base_url.starts_with("http://[::1]")
{
return Ok(());
}
if base_url.starts_with("http://")
&& std::env::var(ALLOW_INSECURE_HTTP_ENV)
.ok()
.as_deref()
.is_some_and(|v| v == "1" || v.eq_ignore_ascii_case("true"))
{
logging::warn(format!(
"Using insecure HTTP base URL because {} is set",
ALLOW_INSECURE_HTTP_ENV
));
return Ok(());
}
if base_url.starts_with("http://") {
anyhow::bail!(
"Refusing insecure base URL '{}'. Use HTTPS or set {}=1 to override for trusted environments.",
base_url,
ALLOW_INSECURE_HTTP_ENV
);
}
anyhow::bail!(
"Refusing base URL '{}': only HTTPS (or explicitly allowed HTTP) URLs are supported.",
base_url,
)
}
pub(super) fn versioned_base_url(base_url: &str) -> String {
let trimmed = base_url.trim_end_matches('/');
if trimmed.ends_with("/v1") || trimmed.ends_with("/beta") {
trimmed.to_string()
} else {
format!("{trimmed}/v1")
}
}
pub(super) fn api_url(base_url: &str, path: &str) -> String {
format!(
"{}/{}",
versioned_base_url(base_url).trim_end_matches('/'),
path.trim_start_matches('/')
)
}
// === DeepSeekClient ===
/// Returns true when DEEPSEEK_FORCE_HTTP1 is set to a truthy value
/// (`1`, `true`, `yes`, `on`, case-insensitive). Used by `build_http_client`
/// to opt out of HTTP/2 entirely when DeepSeek's edge mishandles long-lived H2
/// streams (#103). Anything else (unset, `0`, `false`, ...) leaves HTTP/2 on.
fn force_http1_from_env() -> bool {
std::env::var("DEEPSEEK_FORCE_HTTP1")
.ok()
.map(|v| v.trim().to_ascii_lowercase())
.is_some_and(|v| matches!(v.as_str(), "1" | "true" | "yes" | "on"))
}
/// Read `SSL_CERT_FILE` and add its contents as extra root
/// certificates on the reqwest builder (#418). Tries the PEM-bundle
/// parser first (covers single-cert files too), then falls back to
/// DER. All failures log a warning and return the builder unchanged
/// so a malformed env var degrades gracefully.
fn add_extra_root_certs(
mut builder: reqwest::ClientBuilder,
cert_path: &str,
) -> reqwest::ClientBuilder {
let bytes = match std::fs::read(cert_path) {
Ok(b) => b,
Err(err) => {
logging::warn(format!(
"SSL_CERT_FILE={cert_path} could not be read: {err}"
));
return builder;
}
};
if let Ok(certs) = reqwest::Certificate::from_pem_bundle(&bytes) {
let added = certs.len();
for cert in certs {
builder = builder.add_root_certificate(cert);
}
logging::info(format!(
"SSL_CERT_FILE={cert_path} loaded ({added} cert(s))"
));
return builder;
}
match reqwest::Certificate::from_der(&bytes) {
Ok(cert) => {
builder = builder.add_root_certificate(cert);
logging::info(format!("SSL_CERT_FILE={cert_path} loaded (1 DER cert)"));
}
Err(err) => {
logging::warn(format!(
"SSL_CERT_FILE={cert_path} could not be parsed as PEM bundle or DER: {err}"
));
}
}
builder
}
impl DeepSeekClient {
/// Create a DeepSeek client from CLI configuration.
pub fn new(config: &Config) -> Result<Self> {
let api_key = config.deepseek_api_key()?;
let base_url = config.deepseek_base_url();
let api_provider = config.api_provider();
validate_base_url_security(&base_url)?;
let retry = config.retry_policy();
let default_model = config.default_model();
logging::info(format!("API provider: {}", api_provider.as_str()));
logging::info(format!("API base URL: {base_url}"));
logging::info(format!(
"Retry policy: enabled={}, max_retries={}, initial_delay={}s, max_delay={}s",
retry.enabled, retry.max_retries, retry.initial_delay, retry.max_delay
));
let http_client = Self::build_http_client(&api_key)?;
Ok(Self {
http_client,
api_key,
base_url,
api_provider,
retry,
default_model,
connection_health: Arc::new(AsyncMutex::new(ConnectionHealth::default())),
rate_limiter: Arc::new(AsyncMutex::new(TokenBucket::from_env())),
})
}
fn build_http_client(api_key: &str) -> Result<reqwest::Client> {
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
if !api_key.trim().is_empty() {
headers.insert(
AUTHORIZATION,
HeaderValue::from_str(&format!("Bearer {api_key}"))?,
);
}
let mut builder = reqwest::Client::builder()
.default_headers(headers)
.connect_timeout(Duration::from_secs(30))
.tcp_keepalive(Some(Duration::from_secs(30)))
.http2_keep_alive_interval(Some(Duration::from_secs(15)))
.http2_keep_alive_timeout(Duration::from_secs(20))
.min_tls_version(reqwest::tls::Version::TLS_1_2);
if force_http1_from_env() {
logging::info("DEEPSEEK_FORCE_HTTP1=1 — pinning HTTP client to HTTP/1.1");
builder = builder.http1_only();
}
if let Ok(cert_path) = std::env::var("SSL_CERT_FILE")
&& !cert_path.is_empty()
{
builder = add_extra_root_certs(builder, &cert_path);
}
builder.build().map_err(Into::into)
}
/// List available models from the provider.
pub async fn list_models(&self) -> Result<Vec<AvailableModel>> {
let url = api_url(&self.base_url, "models");
let response = self.send_with_retry(|| self.http_client.get(&url)).await?;
let status = response.status();
if !status.is_success() {
let error_text = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
anyhow::bail!("Failed to list models: HTTP {status}: {error_text}");
}
let response_text = response.text().await.unwrap_or_default();
parse_models_response(&response_text)
}
async fn wait_for_rate_limit(&self) {
let maybe_delay = {
let mut limiter = self.rate_limiter.lock().await;
limiter.delay_until_available(1.0)
};
if let Some(delay) = maybe_delay {
tokio::time::sleep(delay).await;
}
}
async fn mark_request_success(&self) {
let mut health = self.connection_health.lock().await;
if apply_request_success(&mut health, Instant::now()) {
logging::info("Connection recovered");
}
}
async fn mark_request_failure(&self, reason: &str) {
let mut health = self.connection_health.lock().await;
apply_request_failure(&mut health, Instant::now());
logging::warn(format!(
"Connection degraded (failures={}): {}",
health.consecutive_failures, reason
));
}
async fn maybe_probe_recovery(&self) {
let should_probe = {
let mut health = self.connection_health.lock().await;
mark_recovery_probe_if_due(&mut health, Instant::now())
};
if !should_probe {
return;
}
let health_url = api_url(&self.base_url, "models");
let probe = self.http_client.get(health_url).send().await;
match probe {
Ok(resp) if resp.status().is_success() => {
self.mark_request_success().await;
logging::info("Recovery probe succeeded");
}
Ok(resp) => {
self.mark_request_failure(&format!("probe status={}", resp.status()))
.await;
}
Err(err) => {
self.mark_request_failure(&format!("probe error={err}"))
.await;
}
}
}
pub(super) async fn send_with_retry<F>(&self, mut build: F) -> Result<reqwest::Response>
where
F: FnMut() -> reqwest::RequestBuilder,
{
let retry_cfg: LlmRetryConfig = self.retry.clone().into();
let request_result = with_retry(
&retry_cfg,
|| {
let request = build();
async move {
self.wait_for_rate_limit().await;
let response = request
.send()
.await
.map_err(|err| LlmError::from_reqwest(&err))?;
let status = response.status();
if status.is_success() {
return Ok(response);
}
let retryable = status.as_u16() == 429 || status.is_server_error();
if !retryable {
return Ok(response);
}
let retry_after = extract_retry_after(response.headers());
let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
Err(LlmError::from_http_response_with_retry_after(
status.as_u16(),
&body,
retry_after,
))
}
},
Some(Box::new(|err, attempt, delay| {
let (reason_label, human_reason) = retry_reason_label_and_human(err);
logging::warn(format!(
"HTTP retry reason={} attempt={} delay={:.2}s",
reason_label,
attempt + 1,
delay.as_secs_f64(),
));
crate::retry_status::start(attempt + 1, delay, human_reason);
})),
)
.await;
match request_result {
Ok(response) => {
crate::retry_status::succeeded();
self.mark_request_success().await;
Ok(response)
}
Err(err) => {
let last = err.last_error.to_string();
if err.attempts > 1 {
crate::retry_status::failed(last.clone());
} else {
crate::retry_status::clear();
}
self.mark_request_failure(&last).await;
self.maybe_probe_recovery().await;
Err(anyhow::anyhow!(last))
}
}
}
}
/// Translate the structured `LlmError` into both a categorical label
/// (for structured logs / metrics) and a short human reason string
/// (for the retry banner). Returning both from one match avoids the
/// double-classification we had before.
fn retry_reason_label_and_human(err: &LlmError) -> (&'static str, String) {
match err {
LlmError::RateLimited { retry_after, .. } => {
let human = if let Some(after) = retry_after {
format!("rate limited (Retry-After {}s)", after.as_secs())
} else {
"rate limited".to_string()
};
("rate_limited", human)
}
LlmError::ServerError { status, .. } => ("server_error", format!("upstream {status}")),
LlmError::NetworkError(_) => ("network_error", "network error".to_string()),
LlmError::Timeout(_) => ("timeout", "timeout".to_string()),
_ => ("other", "other".to_string()),
}
}
impl LlmClient for DeepSeekClient {
fn provider_name(&self) -> &'static str {
self.api_provider.as_str()
}
fn model(&self) -> &str {
&self.default_model
}
async fn health_check(&self) -> Result<bool> {
let health_url = api_url(&self.base_url, "models");
self.wait_for_rate_limit().await;
let response = self.http_client.get(health_url).send().await;
match response {
Ok(resp) if resp.status().is_success() => {
self.mark_request_success().await;
Ok(true)
}
Ok(resp) => {
self.mark_request_failure(&format!("health status={}", resp.status()))
.await;
Ok(false)
}
Err(err) => {
self.mark_request_failure(&format!("health error={err}"))
.await;
Ok(false)
}
}
}
async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse> {
self.create_message_chat(&request).await
}
async fn create_message_stream(
&self,
request: MessageRequest,
) -> Result<crate::llm_client::StreamEventBox> {
self.handle_chat_completion_stream(request).await
}
}
#[derive(Debug, Deserialize)]
struct ModelsListResponse {
data: Vec<ModelListItem>,
}
#[derive(Debug, Deserialize)]
struct ModelListItem {
id: String,
#[serde(default)]
owned_by: Option<String>,
#[serde(default)]
created: Option<u64>,
}
pub(super) fn parse_models_response(payload: &str) -> Result<Vec<AvailableModel>> {
let parsed: ModelsListResponse =
serde_json::from_str(payload).context("Failed to parse model list JSON")?;
let mut models = parsed
.data
.into_iter()
.map(|item| AvailableModel {
id: item.id,
owned_by: item.owned_by,
created: item.created,
})
.collect::<Vec<_>>();
models.sort_by(|a, b| a.id.cmp(&b.id));
models.dedup_by(|a, b| a.id == b.id);
Ok(models)
}
pub(super) fn system_to_instructions(system: Option<SystemPrompt>) -> Option<String> {
match system {
Some(SystemPrompt::Text(text)) => Some(text),
Some(SystemPrompt::Blocks(blocks)) => {
let joined = blocks
.into_iter()
.map(|b| b.text)
.collect::<Vec<_>>()
.join("\n\n---\n\n");
if joined.trim().is_empty() {
None
} else {
Some(joined)
}
}
None => None,
}
}
pub(super) fn apply_reasoning_effort(
body: &mut Value,
effort: Option<&str>,
provider: ApiProvider,
) {
let Some(effort) = effort else {
return;
};
let normalized = effort.trim().to_ascii_lowercase();
match normalized.as_str() {
"off" | "disabled" | "none" | "false" => match provider {
ApiProvider::Deepseek
| ApiProvider::DeepseekCN
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": false,
});
}
},
"low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
ApiProvider::Deepseek
| ApiProvider::DeepseekCN
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
body["reasoning_effort"] = json!("high");
body["thinking"] = json!({ "type": "enabled" });
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": true,
"reasoning_effort": "high",
});
}
},
"xhigh" | "max" | "highest" => match provider {
ApiProvider::Deepseek
| ApiProvider::DeepseekCN
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
body["reasoning_effort"] = json!("max");
body["thinking"] = json!({ "type": "enabled" });
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": true,
"reasoning_effort": "max",
});
}
},
_ => {}
}
}
pub(super) fn parse_usage(usage: Option<&Value>) -> Usage {
let input_tokens = usage
.and_then(|u| u.get("input_tokens").or_else(|| u.get("prompt_tokens")))
.and_then(Value::as_u64)
.unwrap_or(0);
let output_tokens = usage
.and_then(|u| {
u.get("output_tokens")
.or_else(|| u.get("completion_tokens"))
})
.and_then(Value::as_u64)
.unwrap_or(0);
let cached_tokens = usage
.and_then(|u| u.get("prompt_tokens_details"))
.and_then(|details| details.get("cached_tokens"))
.and_then(Value::as_u64);
let prompt_cache_hit_tokens = usage
.and_then(|u| u.get("prompt_cache_hit_tokens"))
.and_then(Value::as_u64)
.or(cached_tokens)
.map(|v| v as u32);
let prompt_cache_miss_tokens = usage
.and_then(|u| u.get("prompt_cache_miss_tokens"))
.and_then(Value::as_u64)
.or_else(|| cached_tokens.map(|cached| input_tokens.saturating_sub(cached)))
.map(|v| v as u32);
let reasoning_tokens = usage
.and_then(|u| u.get("completion_tokens_details"))
.and_then(|details| details.get("reasoning_tokens"))
.and_then(Value::as_u64)
.map(|v| v as u32);
let server_tool_use = usage.and_then(|u| u.get("server_tool_use")).map(|server| {
let code_execution_requests = server
.get("code_execution_requests")
.and_then(Value::as_u64)
.map(|v| v as u32);
let tool_search_requests = server
.get("tool_search_requests")
.and_then(Value::as_u64)
.map(|v| v as u32);
ServerToolUsage {
code_execution_requests,
tool_search_requests,
}
});
Usage {
input_tokens: input_tokens as u32,
output_tokens: output_tokens as u32,
prompt_cache_hit_tokens,
prompt_cache_miss_tokens,
reasoning_tokens,
reasoning_replay_tokens: None,
server_tool_use,
}
}
impl DeepSeekClient {
/// Call the DeepSeek `/beta/completions` FIM endpoint.
pub async fn fim_completion(
&self,
model: &str,
prompt: &str,
suffix: &str,
max_tokens: u32,
) -> anyhow::Result<String> {
let url = api_url(&self.base_url, "beta/completions");
let body = json!({
"model": model,
"prompt": prompt,
"suffix": suffix,
"max_tokens": max_tokens,
});
let response = self
.send_with_retry(|| self.http_client.post(&url).json(&body))
.await?;
let status = response.status();
if !status.is_success() {
let error_text = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
anyhow::bail!("FIM API error: HTTP {status}: {error_text}");
}
let response_text = response.text().await.unwrap_or_default();
let value: serde_json::Value =
serde_json::from_str(&response_text).context("Failed to parse FIM API response")?;
let text = value
.pointer("/choices/0/text")
.and_then(serde_json::Value::as_str)
.ok_or_else(|| anyhow::anyhow!("FIM response missing choices[0].text"))?;
Ok(text.to_string())
}
}
mod chat;
#[cfg(test)]
mod tests {
use super::*;
use crate::client::chat::{
build_chat_messages, build_chat_messages_for_request, count_reasoning_replay_chars,
parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
};
use crate::models::{
ContentBlock, ContentBlockStart, Delta, Message, MessageRequest, StreamEvent, Tool,
};
use serde_json::json;
#[test]
fn tool_name_roundtrip_dot() {
let original = "multi_tool_use.parallel";
let encoded = to_api_tool_name(original);
assert_eq!(encoded, "multi_tool_use-x00002E-parallel");
let decoded = from_api_tool_name(&encoded);
assert_eq!(decoded, original);
}
#[test]
fn tool_name_decode_mangled_dot_prefix() {
let mangled = "multi_tool_use.x00002E-parallel";
let decoded = from_api_tool_name(mangled);
assert_eq!(decoded, "multi_tool_use..parallel");
}
#[test]
fn tool_name_decode_bare_hex_no_trailing_dash() {
let mangled = "foo_x00002Ebar";
let decoded = from_api_tool_name(mangled);
assert_eq!(decoded, "foo_.bar");
}
#[test]
fn tool_name_bare_hex_preserves_alnum() {
let input = "foox000041bar";
let decoded = from_api_tool_name(input);
assert_eq!(decoded, input);
}
#[test]
fn tool_name_bare_hex_preserves_underscore() {
let input = "foox00005Fbar";
let decoded = from_api_tool_name(input);
assert_eq!(decoded, input);
}
#[test]
fn tool_name_roundtrip_colon() {
let original = "mcp__server:tool_name";
let encoded = to_api_tool_name(original);
let decoded = from_api_tool_name(&encoded);
assert_eq!(decoded, original);
}
#[test]
fn api_url_handles_default_v1_and_beta_base_urls() {
assert_eq!(
api_url("https://api.deepseek.com", "chat/completions"),
"https://api.deepseek.com/v1/chat/completions"
);
assert_eq!(
api_url("https://api.deepseek.com/v1", "chat/completions"),
"https://api.deepseek.com/v1/chat/completions"
);
assert_eq!(
api_url("https://api.deepseek.com/beta", "chat/completions"),
"https://api.deepseek.com/beta/chat/completions"
);
}
#[test]
fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
let message = Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "plan".to_string(),
},
ContentBlock::Text {
text: "done".to_string(),
cache_control: None,
},
],
};
let out = build_chat_messages(None, &[message], "deepseek-v4-pro");
let assistant = out
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert_eq!(
assistant.get("content").and_then(Value::as_str),
Some("done")
);
assert_eq!(
assistant.get("reasoning_content").and_then(Value::as_str),
Some("plan"),
"thinking-mode models must keep reasoning_content on all assistant messages"
);
}
#[test]
fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() {
let messages = vec![
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Need the date".to_string(),
cache_control: None,
}],
},
Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "Need to call a tool".to_string(),
},
ContentBlock::ToolUse {
id: "tool-1".to_string(),
name: "get_date".to_string(),
input: json!({}),
caller: None,
},
],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "tool-1".to_string(),
content: "2026-04-23".to_string(),
is_error: None,
content_blocks: None,
}],
},
Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Text {
text: "It is 2026-04-23.".to_string(),
cache_control: None,
}],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Thanks. Next question.".to_string(),
cache_control: None,
}],
},
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let tool_assistant = out
.iter()
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message");
assert_eq!(
tool_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need to call a tool"),
"thinking-mode tool rounds must replay reasoning_content on later requests"
);
}
#[test]
fn chat_messages_allow_tool_round_without_reasoning_when_thinking_disabled() {
let request = MessageRequest {
model: "deepseek-v4-pro".to_string(),
messages: vec![
Message {
role: "assistant".to_string(),
content: vec![ContentBlock::ToolUse {
id: "call-no-thinking".to_string(),
name: "read_file".to_string(),
input: json!({"path": "Cargo.toml"}),
caller: None,
}],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "call-no-thinking".to_string(),
content: "workspace manifest".to_string(),
is_error: None,
content_blocks: None,
}],
},
],
max_tokens: 1024,
system: None,
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("off".to_string()),
stream: None,
temperature: None,
top_p: None,
};
let out = build_chat_messages_for_request(&request);
assert!(
out.iter().any(
|value| value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
),
"tool calls remain valid when thinking mode is disabled"
);
@@ -876,41 +1677,6 @@ impl Clone for DeepSeekClient {
#[test]
fn parse_usage_reads_deepseek_cache_and_reasoning_tokens() {
fn parse_usage(usage: Option<&Value>) -> Usage {
let usage = usage.expect("usage");
let input_tokens = usage
.get("prompt_tokens")
.and_then(Value::as_u64)
.expect("prompt tokens") as u32;
let output_tokens = usage
.get("completion_tokens")
.and_then(Value::as_u64)
.expect("completion tokens") as u32;
let prompt_cache_hit_tokens = usage
.get("prompt_cache_hit_tokens")
.and_then(Value::as_u64)
.map(|v| v as u32);
let prompt_cache_miss_tokens = usage
.get("prompt_cache_miss_tokens")
.and_then(Value::as_u64)
.map(|v| v as u32);
let reasoning_tokens = usage
.get("completion_tokens_details")
.and_then(|d| d.get("reasoning_tokens"))
.and_then(Value::as_u64)
.map(|v| v as u32);
Usage {
input_tokens,
output_tokens,
prompt_cache_hit_tokens,
prompt_cache_miss_tokens,
reasoning_tokens,
reasoning_replay_tokens: None,
server_tool_use: None,
}
}
let usage = parse_usage(Some(&json!({
"prompt_tokens": 100,
"completion_tokens": 20,
@@ -928,6 +1694,22 @@ impl Clone for DeepSeekClient {
assert_eq!(usage.reasoning_tokens, Some(12));
}
#[test]
fn parse_usage_reads_v4_prompt_tokens_details_cached_tokens() {
let usage = parse_usage(Some(&json!({
"prompt_tokens": 4000,
"completion_tokens": 20,
"prompt_tokens_details": {
"cached_tokens": 3000
}
})));
assert_eq!(usage.input_tokens, 4000);
assert_eq!(usage.output_tokens, 20);
assert_eq!(usage.prompt_cache_hit_tokens, Some(3000));
assert_eq!(usage.prompt_cache_miss_tokens, Some(1000));
}
#[test]
fn sanitize_thinking_mode_counts_reasoning_replay_across_assistant_turns() {
// Multi-turn body that mimics two prior tool-calling rounds: each
+253 -1
View File
@@ -699,6 +699,134 @@ fn tail_chars(text: &str, max_chars: usize) -> String {
text[start_idx..].to_string()
}
#[derive(Debug, Clone)]
struct ToolUseInfo {
name: String,
key: String,
args_preview: String,
}
fn tool_use_key(name: &str, input: &serde_json::Value) -> String {
format!(
"{name}:{}",
serde_json::to_string(input).unwrap_or_else(|_| input.to_string())
)
}
fn tool_args_preview(input: &serde_json::Value) -> String {
let raw = serde_json::to_string(input).unwrap_or_else(|_| input.to_string());
truncate_chars(&raw, 120).to_string()
}
fn collect_tool_uses(messages: &[Message]) -> HashMap<String, ToolUseInfo> {
let mut tool_uses = HashMap::new();
for message in messages {
for block in &message.content {
if let ContentBlock::ToolUse {
id, name, input, ..
} = block
{
tool_uses.insert(
id.clone(),
ToolUseInfo {
name: name.clone(),
key: tool_use_key(name, input),
args_preview: tool_args_preview(input),
},
);
}
}
}
tool_uses
}
struct ToolResultPruneCandidate {
message_idx: usize,
block_idx: usize,
key: String,
tool_name: String,
args_preview: String,
original_len: usize,
}
/// Mechanically prune old verbose tool results before paying for an LLM summary.
///
/// The most recent `protected_window` messages stay byte-for-byte intact. Older
/// duplicate tool results keep the freshest full body and replace earlier
/// copies with one-line summaries; non-duplicate old results are summarized only
/// when they exceed the normal summary snippet size.
pub fn prune_tool_results(messages: &mut [Message], protected_window: usize) -> usize {
let cutoff = messages.len().saturating_sub(protected_window);
if cutoff == 0 {
return 0;
}
let tool_uses = collect_tool_uses(messages);
let mut candidates = Vec::new();
let mut latest_by_key: HashMap<String, usize> = HashMap::new();
let mut count_by_key: HashMap<String, usize> = HashMap::new();
for (message_idx, message) in messages.iter().take(cutoff).enumerate() {
for (block_idx, block) in message.content.iter().enumerate() {
let ContentBlock::ToolResult {
tool_use_id,
content,
..
} = block
else {
continue;
};
let Some(info) = tool_uses.get(tool_use_id) else {
continue;
};
latest_by_key.insert(info.key.clone(), message_idx);
*count_by_key.entry(info.key.clone()).or_insert(0) += 1;
candidates.push(ToolResultPruneCandidate {
message_idx,
block_idx,
key: info.key.clone(),
tool_name: info.name.clone(),
args_preview: info.args_preview.clone(),
original_len: content.len(),
});
}
}
let mut bytes_saved = 0usize;
for candidate in candidates {
let duplicate_count = count_by_key.get(&candidate.key).copied().unwrap_or(0);
let is_latest_duplicate = duplicate_count > 1
&& latest_by_key.get(&candidate.key) == Some(&candidate.message_idx);
if is_latest_duplicate {
continue;
}
if duplicate_count <= 1 && candidate.original_len <= SUMMARY_TOOL_RESULT_SNIPPET_CHARS {
continue;
}
let summary = format!(
"[{}] tool result pruned ({} bytes; args: {})",
candidate.tool_name, candidate.original_len, candidate.args_preview
);
if summary.len() >= candidate.original_len {
continue;
}
if let ContentBlock::ToolResult {
content,
content_blocks,
..
} = &mut messages[candidate.message_idx].content[candidate.block_idx]
{
bytes_saved = bytes_saved.saturating_add(content.len().saturating_sub(summary.len()));
*content = summary;
*content_blocks = None;
}
}
bytes_saved
}
/// Result of a compaction operation with metadata.
#[derive(Debug)]
pub struct CompactionResult {
@@ -747,6 +875,39 @@ pub async fn compact_messages_safe(
const MAX_RETRIES: u32 = 3;
const BASE_DELAY_MS: u64 = 1000;
let mut pruned_messages = messages.to_vec();
let pruned_bytes = prune_tool_results(&mut pruned_messages, KEEP_RECENT_MESSAGES);
let compaction_input: &[Message] = if pruned_bytes > 0 {
logging::info(format!(
"Local tool-result prune saved {pruned_bytes} bytes before LLM compaction"
));
let was_over_threshold = should_compact(
messages,
config,
workspace,
external_pins,
external_working_set_paths,
);
let now_under_threshold = !should_compact(
&pruned_messages,
config,
workspace,
external_pins,
external_working_set_paths,
);
if was_over_threshold && now_under_threshold {
return Ok(CompactionResult {
messages: pruned_messages,
summary_prompt: None,
removed_messages: Vec::new(),
retries_used: 0,
});
}
&pruned_messages
} else {
messages
};
let mut last_error: Option<anyhow::Error> = None;
for attempt in 0..MAX_RETRIES {
@@ -758,7 +919,7 @@ pub async fn compact_messages_safe(
match compact_messages(
client,
messages,
compaction_input,
config,
workspace,
external_pins,
@@ -1269,6 +1430,30 @@ mod tests {
}
}
fn tool_use(id: &str, name: &str, input: serde_json::Value) -> Message {
Message {
role: "assistant".to_string(),
content: vec![ContentBlock::ToolUse {
id: id.to_string(),
name: name.to_string(),
input,
caller: None,
}],
}
}
fn tool_result(id: &str, content: &str) -> Message {
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: id.to_string(),
content: content.to_string(),
is_error: None,
content_blocks: None,
}],
}
}
#[test]
fn truncate_chars_respects_unicode_boundaries() {
let text = "abc😀é";
@@ -1279,6 +1464,73 @@ mod tests {
assert_eq!(truncate_chars(text, 5), "abc😀é");
}
#[test]
fn prune_tool_results_summarizes_old_verbose_outputs() {
let verbose = "x".repeat(SUMMARY_TOOL_RESULT_SNIPPET_CHARS + 80);
let mut messages = vec![
tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
tool_result("call-1", &verbose),
msg("user", "recent question"),
msg("assistant", "recent answer"),
];
let saved = prune_tool_results(&mut messages, 2);
assert!(saved > 0);
let ContentBlock::ToolResult { content, .. } = &messages[1].content[0] else {
panic!("expected tool result");
};
assert!(content.contains("[read_file] tool result pruned"));
assert!(content.contains("Cargo.toml"));
assert!(content.len() < verbose.len());
}
#[test]
fn prune_tool_results_preserves_protected_tail() {
let verbose = "x".repeat(SUMMARY_TOOL_RESULT_SNIPPET_CHARS + 80);
let mut messages = vec![
msg("user", "older context"),
tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
tool_result("call-1", &verbose),
];
let saved = prune_tool_results(&mut messages, 2);
assert_eq!(saved, 0);
let ContentBlock::ToolResult { content, .. } = &messages[2].content[0] else {
panic!("expected tool result");
};
assert_eq!(content, &verbose);
}
#[test]
fn prune_tool_results_dedupes_identical_reads_but_keeps_latest_full_body() {
let first = "first ".repeat(80);
let second = "second ".repeat(80);
let mut messages = vec![
tool_use("call-1", "read_file", json!({"path": "Cargo.toml"})),
tool_result("call-1", &first),
tool_use("call-2", "read_file", json!({"path": "Cargo.toml"})),
tool_result("call-2", &second),
msg("user", "tail"),
];
let saved = prune_tool_results(&mut messages, 1);
assert!(saved > 0);
let ContentBlock::ToolResult { content: older, .. } = &messages[1].content[0] else {
panic!("expected older tool result");
};
assert!(older.contains("tool result pruned"));
let ContentBlock::ToolResult {
content: latest, ..
} = &messages[3].content[0]
else {
panic!("expected latest tool result");
};
assert_eq!(latest, &second);
}
#[test]
fn is_transient_error_detects_network_issues() {
let timeout_err = anyhow::anyhow!("Connection timeout");
+37 -18
View File
@@ -146,8 +146,6 @@ pub enum RequestPayloadMode {
ChatCompletions,
}
/// Resolve the provider capability for a given [`ApiProvider`] and resolved
/// model string.
///
@@ -201,26 +199,25 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
}
}
/// Canonicalize common model aliases to stable DeepSeek IDs.
/// Canonicalize compact DeepSeek model aliases to stable IDs.
///
/// v4-pro/v4-flash provide canonical forms; v-series snapshots pass through
/// unchanged. Legacy aliases (deepseek-chat, etc.) are no longer folded —
/// DeepSeek's own `/v1/models` endpoint is the source of truth.
/// Already-valid model IDs pass through unchanged. Only the compact
/// `v4pro`/`v4flash` spellings are rewritten to their hyphenated forms.
#[must_use]
pub fn canonical_model_name(model: &str) -> Option<&'static str> {
match model.trim().to_ascii_lowercase().as_str() {
"deepseek-v4-pro" | "deepseek-v4pro" => Some("deepseek-v4-pro"),
"deepseek-v4-flash" | "deepseek-v4flash" => Some("deepseek-v4-flash"),
"deepseek-v4pro" => Some("deepseek-v4-pro"),
"deepseek-v4flash" => Some("deepseek-v4-flash"),
_ => None,
}
}
/// Normalize a configured/runtime model name.
///
/// Trims whitespace and lowercases. v-series snapshots (deepseek-v4-flash-20260423)
/// pass through unchanged so users can pin dated variants. Non-DeepSeek or
/// malformed names return `None`; DeepSeek's `/v1/models` endpoint is the
/// authority on valid model IDs.
/// Trims whitespace, preserves caller-provided case for already-valid model
/// IDs, and only canonicalizes compact aliases like `deepseek-v4pro`.
/// Non-DeepSeek or malformed names return `None`; DeepSeek's `/v1/models`
/// endpoint is the authority on valid model IDs.
#[must_use]
pub fn normalize_model_name(model: &str) -> Option<String> {
let trimmed = model.trim();
@@ -236,10 +233,11 @@ pub fn normalize_model_name(model: &str) -> Option<String> {
return None;
}
if normalized.chars().all(|ch| {
ch.is_ascii_lowercase() || ch.is_ascii_digit() || matches!(ch, '-' | '_' | '.' | ':' | '/')
}) {
return Some(normalized);
if trimmed
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.' | ':' | '/'))
{
return Some(trimmed.to_string());
}
None
@@ -1869,7 +1867,8 @@ fn normalize_model_for_provider(provider: ApiProvider, model: &str) -> Option<St
}
fn model_for_provider(provider: ApiProvider, normalized: String) -> String {
match (provider, normalized.as_str()) {
let lowered = normalized.to_ascii_lowercase();
match (provider, lowered.as_str()) {
(ApiProvider::NvidiaNim, "deepseek-v4-pro") => DEFAULT_NVIDIA_NIM_MODEL.to_string(),
(ApiProvider::NvidiaNim, "deepseek-v4-flash") => DEFAULT_NVIDIA_NIM_FLASH_MODEL.to_string(),
(ApiProvider::Openrouter, "deepseek-v4-pro") => DEFAULT_OPENROUTER_MODEL.to_string(),
@@ -3212,6 +3211,27 @@ api_key = "old-openrouter-key"
normalize_model_name("deepseek-ai/deepseek-v4-pro").as_deref(),
Some("deepseek-ai/deepseek-v4-pro")
);
// preserve exact case for providers that require case-sensitive model IDs
assert_eq!(
normalize_model_name("DeepSeek-V4-Pro").as_deref(),
Some("DeepSeek-V4-Pro")
);
assert_eq!(
normalize_model_name("deepseek-ai/DeepSeek-V4-Pro").as_deref(),
Some("deepseek-ai/DeepSeek-V4-Pro")
);
}
#[test]
fn normalize_model_for_provider_keeps_provider_remaps_when_case_is_preserved() {
assert_eq!(
normalize_model_for_provider(ApiProvider::Deepseek, "DeepSeek-V4-Pro").as_deref(),
Some("DeepSeek-V4-Pro")
);
assert_eq!(
normalize_model_for_provider(ApiProvider::NvidiaNim, "DeepSeek-V4-Pro").as_deref(),
Some(DEFAULT_NVIDIA_NIM_MODEL)
);
}
#[test]
@@ -4023,5 +4043,4 @@ model = "deepseek-v4-pro"
let deserialized: ProviderCapability = serde_json::from_value(json).unwrap();
assert_eq!(cap, deserialized);
}
}
+2
View File
@@ -1878,6 +1878,7 @@ use context::{
turn_response_headroom_tokens,
};
mod dispatch;
mod loop_guard;
mod lsp_hooks;
mod streaming;
mod tool_catalog;
@@ -1893,6 +1894,7 @@ use self::dispatch::{
parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first,
should_parallelize_tool_batch, should_stop_after_plan_tool,
};
use self::loop_guard::{AttemptDecision, LoopGuard, OutcomeDecision};
#[cfg(test)]
use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
#[cfg(test)]
+1
View File
@@ -48,6 +48,7 @@ pub(super) struct ToolExecutionPlan {
pub(super) supports_parallel: bool,
pub(super) read_only: bool,
pub(super) blocked_error: Option<ToolError>,
pub(super) guard_result: Option<ToolResult>,
}
#[derive(Debug, serde::Serialize)]
+222
View File
@@ -0,0 +1,222 @@
//! Pure-data guardrails for repeated tool-call loops.
use std::collections::HashMap;
use std::collections::hash_map::DefaultHasher;
use std::fmt::Write as _;
use std::hash::{Hash, Hasher};
use serde_json::Value;
const IDENTICAL_CALL_BLOCK_THRESHOLD: u32 = 3;
const FAILURE_WARN_THRESHOLD: u32 = 3;
const FAILURE_HALT_THRESHOLD: u32 = 8;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum AttemptDecision {
Proceed,
Block(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(super) enum OutcomeDecision {
Continue,
Warn(String),
Halt(String),
}
#[derive(Debug, Default)]
pub(super) struct LoopGuard {
call_counts: HashMap<(String, u64), u32>,
failure_counts: HashMap<String, u32>,
}
impl LoopGuard {
pub(super) fn record_attempt(&mut self, tool: &str, args: &Value) -> AttemptDecision {
let key = (tool.to_string(), hash_args(args));
let count = self.call_counts.entry(key).or_insert(0);
*count = count.saturating_add(1);
if *count >= IDENTICAL_CALL_BLOCK_THRESHOLD {
return AttemptDecision::Block(format!(
"Blocked: this exact call (`{tool}` with these arguments) has already run {count} times this turn. Stop retrying it unchanged. Either change the arguments or pick a different tool."
));
}
AttemptDecision::Proceed
}
pub(super) fn record_outcome(&mut self, tool: &str, ok: bool) -> OutcomeDecision {
let failures = self.failure_counts.entry(tool.to_string()).or_insert(0);
if ok {
*failures = 0;
return OutcomeDecision::Continue;
}
*failures = failures.saturating_add(1);
if *failures >= FAILURE_HALT_THRESHOLD {
return OutcomeDecision::Halt(format!(
"Stop retrying `{tool}` - it has failed {failures} consecutive times. Choose a different approach."
));
}
if *failures == FAILURE_WARN_THRESHOLD {
return OutcomeDecision::Warn(format!(
"Tool `{tool}` has failed {failures} consecutive times this turn."
));
}
OutcomeDecision::Continue
}
}
fn hash_args(args: &Value) -> u64 {
let mut canonical = String::new();
write_canonical_json(args, &mut canonical);
let mut hasher = DefaultHasher::new();
canonical.hash(&mut hasher);
hasher.finish()
}
fn write_canonical_json(value: &Value, out: &mut String) {
match value {
Value::Null => out.push_str("null"),
Value::Bool(value) => out.push_str(if *value { "true" } else { "false" }),
Value::Number(value) => {
let _ = write!(out, "{value}");
}
Value::String(value) => {
out.push_str(&serde_json::to_string(value).expect("serializing string cannot fail"));
}
Value::Array(values) => {
out.push('[');
for (idx, item) in values.iter().enumerate() {
if idx > 0 {
out.push(',');
}
write_canonical_json(item, out);
}
out.push(']');
}
Value::Object(values) => {
out.push('{');
let mut entries = values.iter().collect::<Vec<_>>();
entries.sort_by(|a, b| a.0.cmp(b.0));
for (idx, (key, item)) in entries.into_iter().enumerate() {
if idx > 0 {
out.push(',');
}
out.push_str(&serde_json::to_string(key).expect("serializing key cannot fail"));
out.push(':');
write_canonical_json(item, out);
}
out.push('}');
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn third_identical_tool_call_is_blocked() {
let mut guard = LoopGuard::default();
let args = json!({"path": "src/main.rs"});
assert_eq!(
guard.record_attempt("read_file", &args),
AttemptDecision::Proceed
);
assert_eq!(
guard.record_attempt("read_file", &args),
AttemptDecision::Proceed
);
let AttemptDecision::Block(message) = guard.record_attempt("read_file", &args) else {
panic!("third identical call should be blocked");
};
assert!(message.contains("read_file"));
assert!(message.contains("already run 3 times"));
}
#[test]
fn paginated_reads_are_not_false_positives() {
let mut guard = LoopGuard::default();
for offset in [0, 100, 200] {
assert_eq!(
guard.record_attempt(
"read_file",
&json!({"path": "src/main.rs", "offset": offset})
),
AttemptDecision::Proceed
);
}
}
#[test]
fn tool_failure_counter_warns_at_three_and_halts_at_eight() {
let mut guard = LoopGuard::default();
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
assert!(matches!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Warn(message) if message.contains("failed 3 consecutive times")
));
for _ in 4..8 {
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
}
assert!(matches!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Halt(message) if message.contains("failed 8 consecutive times")
));
}
#[test]
fn successful_tool_call_resets_failure_counter() {
let mut guard = LoopGuard::default();
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
assert_eq!(
guard.record_outcome("grep_files", true),
OutcomeDecision::Continue
);
assert_eq!(
guard.record_outcome("grep_files", false),
OutcomeDecision::Continue
);
}
#[test]
fn argument_hash_is_independent_of_object_key_order() {
let mut guard = LoopGuard::default();
assert_eq!(
guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
AttemptDecision::Proceed
);
assert_eq!(
guard.record_attempt("read_file", &json!({"offset": 0, "path": "a"})),
AttemptDecision::Proceed
);
assert!(matches!(
guard.record_attempt("read_file", &json!({"path": "a", "offset": 0})),
AttemptDecision::Block(_)
));
}
}
+1
View File
@@ -37,6 +37,7 @@ fn make_plan(
supports_parallel,
read_only,
blocked_error: None,
guard_result: None,
}
}
+83 -12
View File
@@ -29,6 +29,7 @@ impl Engine {
ensure_advanced_tooling(&mut tool_catalog);
}
let mut active_tool_names = initial_active_tools(&tool_catalog);
let mut loop_guard = LoopGuard::default();
// Transparent stream-retry counter: when the chunked-transfer
// connection dies mid-stream and we got nothing useful out of it
@@ -974,6 +975,7 @@ impl Engine {
let mut supports_parallel = false;
let mut read_only = false;
let mut blocked_error: Option<ToolError> = None;
let mut guard_result: Option<ToolResult> = None;
if maybe_activate_requested_deferred_tool(
&tool_name,
&tool_catalog,
@@ -996,8 +998,7 @@ impl Engine {
{
crate::logging::info(format!(
"Resolved hallucinated tool name '{}' -> '{}'",
tool_name,
canonical
tool_name, canonical
));
tool_def = tool_catalog.iter().find(|d| d.name == canonical);
if tool_def.is_some() {
@@ -1067,6 +1068,17 @@ impl Engine {
read_only = true;
}
if blocked_error.is_none()
&& let AttemptDecision::Block(message) =
loop_guard.record_attempt(&tool_name, &tool_input)
{
crate::logging::warn(message.clone());
guard_result = Some(
ToolResult::success(message)
.with_metadata(json!({"loop_guard": "identical_tool_call"})),
);
}
plans.push(ToolExecutionPlan {
index,
id: tool_id,
@@ -1079,6 +1091,7 @@ impl Engine {
supports_parallel,
read_only,
blocked_error,
guard_result,
});
}
@@ -1106,6 +1119,26 @@ impl Engine {
if parallel_allowed {
let mut tool_tasks = FuturesUnordered::new();
for plan in plans {
if let Some(result) = plan.guard_result.clone() {
let result = Ok(result);
let _ = self
.tx_event
.send(Event::ToolCallComplete {
id: plan.id.clone(),
name: plan.name.clone(),
result: result.clone(),
})
.await;
outcomes[plan.index] = Some(ToolExecOutcome {
index: plan.index,
id: plan.id,
name: plan.name,
input: plan.input,
started_at: Instant::now(),
result,
});
continue;
}
if let Some(err) = plan.blocked_error.clone() {
outcomes[plan.index] = Some(ToolExecOutcome {
index: plan.index,
@@ -1183,6 +1216,27 @@ impl Engine {
let tool_input = plan.input.clone();
let tool_caller = plan.caller.clone();
if let Some(result) = plan.guard_result.clone() {
let result = Ok(result);
let _ = self
.tx_event
.send(Event::ToolCallComplete {
id: tool_id.clone(),
name: tool_name.clone(),
result: result.clone(),
})
.await;
outcomes[plan.index] = Some(ToolExecOutcome {
index: plan.index,
id: tool_id,
name: tool_name,
input: tool_input,
started_at: Instant::now(),
result,
});
continue;
}
if let Some(err) = plan.blocked_error.clone() {
let result = Err(err);
let _ = self
@@ -1472,6 +1526,7 @@ impl Engine {
// denial that should not.
let mut step_error_categories: Vec<ErrorCategory> = Vec::new();
let mut stop_after_plan_tool = false;
let mut loop_guard_halt: Option<String> = None;
for outcome in outcomes.into_iter().flatten() {
let duration = outcome.started_at.elapsed();
@@ -1484,6 +1539,16 @@ impl Engine {
match outcome.result {
Ok(output) => {
match loop_guard.record_outcome(&outcome.name, output.success) {
OutcomeDecision::Continue => {}
OutcomeDecision::Warn(message) => {
crate::logging::warn(message.clone());
let _ = self.tx_event.send(Event::status(message)).await;
}
OutcomeDecision::Halt(message) => {
loop_guard_halt.get_or_insert(message);
}
}
emit_tool_audit(json!({
"event": "tool.result",
"tool_id": outcome.id.clone(),
@@ -1526,6 +1591,16 @@ impl Engine {
.await;
}
Err(e) => {
match loop_guard.record_outcome(&outcome.name, false) {
OutcomeDecision::Continue => {}
OutcomeDecision::Warn(message) => {
crate::logging::warn(message.clone());
let _ = self.tx_event.send(Event::status(message)).await;
}
OutcomeDecision::Halt(message) => {
loop_guard_halt.get_or_insert(message);
}
}
let envelope: ErrorEnvelope = e.clone().into();
emit_tool_audit(json!({
"event": "tool.result",
@@ -1567,6 +1642,12 @@ impl Engine {
break;
}
if let Some(message) = loop_guard_halt {
crate::logging::warn(message.clone());
let _ = self.tx_event.send(Event::status(message)).await;
break;
}
if self
.run_capacity_post_tool_checkpoint(
turn,
@@ -1619,16 +1700,6 @@ impl Engine {
continue;
}
if consecutive_tool_error_steps >= 3 {
let _ = self
.tx_event
.send(Event::status(
"Stopping after repeated tool failures. Try a narrower scope or adjust approvals.",
))
.await;
break;
}
turn.next_step();
}
+1 -1
View File
@@ -1,6 +1,6 @@
// Used by the deferred context-limit handoff feature (#667). The implementation
// path is staged but not yet wired from the engine; suppress dead-code warnings
// rather than delete the table, since v0.8.13 will consume it.
// rather than delete the table until the follow-up feature consumes it.
#[allow(dead_code)]
pub const THRESHOLDS: [(f32, &str); 3] = [
(
-39
View File
@@ -1574,45 +1574,6 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
}
println!(" · credential sources: env, ~/.deepseek/config.toml");
// #593: surface keyring/config disagreement explicitly. The runtime
// resolution order is `keyring → env → config-file`, so a stale
// keyring entry from a prior install can shadow the value the user
// sees in `~/.deepseek/config.toml`. We only check the DeepSeek
// slot — other providers don't write to the keyring today, and
// probing entries that aren't there triggers macOS keychain
// prompts for nothing.
let secrets = deepseek_secrets::Secrets::auto_detect();
let keyring_key = secrets.get("deepseek").ok().flatten();
let config_key = config
.api_key
.as_ref()
.filter(|v| !v.trim().is_empty() && v.as_str() != "__KEYRING__")
.map(|s| s.to_string());
match (keyring_key.as_deref(), config_key.as_deref()) {
(Some(k), Some(c)) if k.trim() != c.trim() => {
println!();
println!(
" {} `deepseek`: OS keyring and config.toml hold different values.",
"".truecolor(red_r, red_g, red_b)
);
println!(
" Resolution order is keyring → env → config-file, so the keyring value wins."
);
println!(" Reconcile by overwriting both with the current key:");
println!(" deepseek auth set --provider deepseek");
println!(
" (Or paste the key into the in-TUI onboarding screen — it now writes both layers.)"
);
}
(Some(_), None) => {
println!(
" {} `deepseek`: key is in OS keyring only (config.toml has no copy).",
"·".dimmed()
);
}
_ => {}
}
let has_api_key = if config.deepseek_api_key().is_ok() {
println!(
" {} active provider key resolved",
+86 -7
View File
@@ -191,7 +191,8 @@ impl ColorDepth {
#[must_use]
pub fn adapt_color(color: Color, depth: ColorDepth) -> Color {
match (color, depth) {
(_, ColorDepth::TrueColor | ColorDepth::Ansi256) => color,
(_, ColorDepth::TrueColor) => color,
(Color::Rgb(r, g, b), ColorDepth::Ansi256) => Color::Indexed(rgb_to_ansi256(r, g, b)),
(Color::Rgb(r, g, b), ColorDepth::Ansi16) => nearest_ansi16(r, g, b),
_ => color,
}
@@ -203,9 +204,11 @@ pub fn adapt_color(color: Color, depth: ColorDepth) -> Color {
#[allow(dead_code)]
#[must_use]
pub fn adapt_bg(color: Color, depth: ColorDepth) -> Color {
match depth {
ColorDepth::TrueColor | ColorDepth::Ansi256 => color,
ColorDepth::Ansi16 => Color::Reset,
match (color, depth) {
(_, ColorDepth::TrueColor) => color,
(Color::Rgb(r, g, b), ColorDepth::Ansi256) => Color::Indexed(rgb_to_ansi256(r, g, b)),
(_, ColorDepth::Ansi256) => color,
(_, ColorDepth::Ansi16) => Color::Reset,
}
}
@@ -236,7 +239,10 @@ pub fn blend(fg: Color, bg: Color, alpha: f32) -> Color {
pub fn reasoning_surface_tint(depth: ColorDepth) -> Option<Color> {
match depth {
ColorDepth::Ansi16 => None,
_ => Some(blend(SURFACE_REASONING, DEEPSEEK_INK, 0.12)),
_ => Some(adapt_bg(
blend(SURFACE_REASONING, DEEPSEEK_INK, 0.12),
depth,
)),
}
}
@@ -327,12 +333,59 @@ fn nearest_ansi16(r: u8, g: u8, b: u8) -> Color {
}
}
/// Map an RGB color to the nearest xterm 256-color palette index. We use only
/// the stable 6x6x6 cube and grayscale ramp (16..255), not the terminal's
/// user-configurable 0..15 colors.
#[allow(dead_code)]
fn rgb_to_ansi256(r: u8, g: u8, b: u8) -> u8 {
const CUBE_LEVELS: [u8; 6] = [0, 95, 135, 175, 215, 255];
fn nearest_cube_level(channel: u8) -> usize {
CUBE_LEVELS
.iter()
.enumerate()
.min_by_key(|(_, level)| channel.abs_diff(**level))
.map(|(idx, _)| idx)
.unwrap_or(0)
}
fn dist_sq(a: (u8, u8, u8), b: (u8, u8, u8)) -> u32 {
let dr = i32::from(a.0) - i32::from(b.0);
let dg = i32::from(a.1) - i32::from(b.1);
let db = i32::from(a.2) - i32::from(b.2);
(dr * dr + dg * dg + db * db) as u32
}
let ri = nearest_cube_level(r);
let gi = nearest_cube_level(g);
let bi = nearest_cube_level(b);
let cube_rgb = (CUBE_LEVELS[ri], CUBE_LEVELS[gi], CUBE_LEVELS[bi]);
let cube_index = 16 + (36 * ri) as u8 + (6 * gi) as u8 + bi as u8;
let avg = ((u16::from(r) + u16::from(g) + u16::from(b)) / 3) as u8;
let gray_i = if avg <= 8 {
0
} else if avg >= 238 {
23
} else {
((u16::from(avg) - 8 + 5) / 10).min(23) as u8
};
let gray = 8 + 10 * gray_i;
let gray_index = 232 + gray_i;
if dist_sq((r, g, b), (gray, gray, gray)) < dist_sq((r, g, b), cube_rgb) {
gray_index
} else {
cube_index
}
}
#[cfg(test)]
mod tests {
use super::{
ACCENT_REASONING_LIVE, ColorDepth, DEEPSEEK_INK, DEEPSEEK_RED, DEEPSEEK_SKY,
SURFACE_REASONING, adapt_bg, adapt_color, blend, nearest_ansi16, pulse_brightness,
reasoning_surface_tint,
reasoning_surface_tint, rgb_to_ansi256,
};
use ratatui::style::Color;
@@ -340,7 +393,23 @@ mod tests {
fn adapt_color_passes_through_truecolor() {
let c = Color::Rgb(53, 120, 229);
assert_eq!(adapt_color(c, ColorDepth::TrueColor), c);
assert_eq!(adapt_color(c, ColorDepth::Ansi256), c);
}
#[test]
fn adapt_color_maps_rgb_to_indexed_on_ansi256() {
let c = Color::Rgb(53, 120, 229);
assert!(matches!(
adapt_color(c, ColorDepth::Ansi256),
Color::Indexed(_)
));
}
#[test]
fn adapt_bg_maps_rgb_to_indexed_on_ansi256() {
assert!(matches!(
adapt_bg(SURFACE_REASONING, ColorDepth::Ansi256),
Color::Indexed(_)
));
}
#[test]
@@ -370,6 +439,10 @@ mod tests {
fn reasoning_tint_is_none_on_ansi16() {
assert!(reasoning_surface_tint(ColorDepth::Ansi16).is_none());
assert!(reasoning_surface_tint(ColorDepth::TrueColor).is_some());
assert!(matches!(
reasoning_surface_tint(ColorDepth::Ansi256),
Some(Color::Indexed(_))
));
}
#[test]
@@ -424,6 +497,12 @@ mod tests {
assert_eq!(nearest_ansi16(11, 21, 38), Color::Black);
}
#[test]
fn rgb_to_ansi256_uses_stable_extended_palette() {
assert!(rgb_to_ansi256(53, 120, 229) >= 16);
assert!(rgb_to_ansi256(11, 21, 38) >= 16);
}
#[test]
fn color_depth_detect_is_safe_without_env() {
// Don't try to pin the result — env may be anything in CI. Just
+3 -3
View File
@@ -36,7 +36,7 @@ use crate::localization::normalize_configured_locale;
/// ```
//
// NOTE: the loader is defined but not yet called from startup — wiring is
// deferred to v0.8.13 (#657). The `#[allow(dead_code)]` suppresses the CI
// deferred to a later settings pass (#657). The `#[allow(dead_code)]` suppresses the CI
// `-D warnings` failure until the call site lands.
#[allow(dead_code)]
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -63,7 +63,7 @@ impl Default for TuiPrefs {
}
/// Per-action keybinding overrides stored inside [`TuiPrefs`].
#[allow(dead_code)] // see TuiPrefs note above; deferred to v0.8.13 (#657).
#[allow(dead_code)] // see TuiPrefs note above; deferred to a later settings pass (#657).
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(default)]
pub struct KeybindPrefs {
@@ -84,7 +84,7 @@ pub struct KeybindPrefs {
pub toggle_sidebar: Option<String>,
}
#[allow(dead_code)] // see TuiPrefs note above; deferred to v0.8.13 (#657).
#[allow(dead_code)] // see TuiPrefs note above; deferred to a later settings pass (#657).
impl TuiPrefs {
/// Return the canonical path of the TUI preferences file:
/// `~/.deepseek/tui.toml`.
+2 -6
View File
@@ -50,8 +50,7 @@ fn collapse_nullable_unions(schema: &mut Value) {
Some(arr) => arr.clone(),
None => continue,
};
let (nulls, nons): (Vec<_>, Vec<_>) =
members.into_iter().partition(|m| is_null_type(m));
let (nulls, nons): (Vec<_>, Vec<_>) = members.into_iter().partition(is_null_type);
if nulls.len() == 1 && nons.len() == 1 {
obj.remove(key);
if let Value::Object(non_obj) = nons.into_iter().next().unwrap() {
@@ -85,10 +84,7 @@ fn inject_properties_on_bare_objects(schema: &mut Value) {
if obj.contains_key("properties") || obj.contains_key("additionalProperties") {
return;
}
obj.insert(
"properties".into(),
Value::Object(Map::new()),
);
obj.insert("properties".into(), Value::Object(Map::new()));
}
/// Remove entries from `required` that aren't keys in `properties`.
+169
View File
@@ -0,0 +1,169 @@
//! Terminal color compatibility shim.
//!
//! Ratatui's crossterm backend emits truecolor SGR for every `Color::Rgb`
//! cell. That is correct for truecolor terminals, but macOS Terminal.app often
//! advertises only `xterm-256color`; sending `38;2` / `48;2` there can render
//! as stray green/cyan backgrounds. This backend adapts every cell to the
//! detected color depth before handing it to crossterm.
use std::io::{self, Write};
use ratatui::{
backend::{Backend, ClearType, CrosstermBackend, WindowSize},
buffer::Cell,
layout::{Position, Size},
};
use crate::palette::{self, ColorDepth};
#[derive(Debug)]
pub(crate) struct ColorCompatBackend<W: Write> {
inner: CrosstermBackend<W>,
depth: ColorDepth,
}
impl<W: Write> ColorCompatBackend<W> {
pub(crate) fn new(writer: W, depth: ColorDepth) -> Self {
Self {
inner: CrosstermBackend::new(writer),
depth,
}
}
}
impl<W: Write> Write for ColorCompatBackend<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.inner.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
Write::flush(&mut self.inner)
}
}
impl<W: Write> Backend for ColorCompatBackend<W> {
fn draw<'a, I>(&mut self, content: I) -> io::Result<()>
where
I: Iterator<Item = (u16, u16, &'a Cell)>,
{
let adapted = content
.map(|(x, y, cell)| {
let mut cell = cell.clone();
adapt_cell_colors(&mut cell, self.depth);
(x, y, cell)
})
.collect::<Vec<_>>();
self.inner
.draw(adapted.iter().map(|(x, y, cell)| (*x, *y, cell)))
}
fn append_lines(&mut self, n: u16) -> io::Result<()> {
self.inner.append_lines(n)
}
fn hide_cursor(&mut self) -> io::Result<()> {
self.inner.hide_cursor()
}
fn show_cursor(&mut self) -> io::Result<()> {
self.inner.show_cursor()
}
fn get_cursor_position(&mut self) -> io::Result<Position> {
self.inner.get_cursor_position()
}
fn set_cursor_position<P: Into<Position>>(&mut self, position: P) -> io::Result<()> {
self.inner.set_cursor_position(position)
}
fn clear(&mut self) -> io::Result<()> {
self.inner.clear()
}
fn clear_region(&mut self, clear_type: ClearType) -> io::Result<()> {
self.inner.clear_region(clear_type)
}
fn size(&self) -> io::Result<Size> {
self.inner.size()
}
fn window_size(&mut self) -> io::Result<WindowSize> {
self.inner.window_size()
}
fn flush(&mut self) -> io::Result<()> {
Backend::flush(&mut self.inner)
}
}
fn adapt_cell_colors(cell: &mut Cell, depth: ColorDepth) {
cell.fg = palette::adapt_color(cell.fg, depth);
cell.bg = palette::adapt_bg(cell.bg, depth);
}
#[cfg(test)]
mod tests {
use std::{cell::RefCell, io::Write, rc::Rc};
use ratatui::backend::Backend;
use ratatui::{buffer::Cell, style::Color};
use super::*;
#[derive(Clone, Default)]
struct SharedWriter(Rc<RefCell<Vec<u8>>>);
impl Write for SharedWriter {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.0.borrow_mut().extend_from_slice(buf);
Ok(buf.len())
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
#[test]
fn adapts_rgb_cells_to_indexed_on_ansi256() {
let mut cell = Cell::default();
cell.set_fg(Color::Rgb(53, 120, 229));
cell.set_bg(Color::Rgb(11, 21, 38));
adapt_cell_colors(&mut cell, ColorDepth::Ansi256);
assert!(matches!(cell.fg, Color::Indexed(_)));
assert!(matches!(cell.bg, Color::Indexed(_)));
}
#[test]
fn leaves_truecolor_cells_unchanged() {
let mut cell = Cell::default();
cell.set_fg(Color::Rgb(53, 120, 229));
cell.set_bg(Color::Rgb(11, 21, 38));
adapt_cell_colors(&mut cell, ColorDepth::TrueColor);
assert_eq!(cell.fg, Color::Rgb(53, 120, 229));
assert_eq!(cell.bg, Color::Rgb(11, 21, 38));
}
#[test]
fn ansi256_backend_output_does_not_emit_truecolor_sgr() {
let writer = SharedWriter::default();
let capture = writer.0.clone();
let mut backend = ColorCompatBackend::new(writer, ColorDepth::Ansi256);
let mut cell = Cell::default();
cell.set_symbol("x")
.set_fg(Color::Rgb(53, 120, 229))
.set_bg(Color::Rgb(11, 21, 38));
backend.draw(std::iter::once((0, 0, &cell))).unwrap();
let output = String::from_utf8_lossy(&capture.borrow()).to_string();
assert!(!output.contains("38;2;"), "{output:?}");
assert!(!output.contains("48;2;"), "{output:?}");
}
}
+5 -3
View File
@@ -22,9 +22,11 @@ use crossterm::{
execute,
terminal::{EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode},
};
use ratatui::{Terminal, backend::CrosstermBackend};
use ratatui::Terminal;
use tempfile::Builder;
use super::color_compat::ColorCompatBackend;
/// Outcome of a single external-editor invocation.
#[derive(Debug, PartialEq, Eq)]
pub enum EditorOutcome {
@@ -120,8 +122,8 @@ pub fn run_editor_raw(seed: &str) -> io::Result<EditorOutcome> {
///
/// On any error (raw-mode toggle, IO, editor spawn failure), the function
/// still attempts to fully restore the terminal before returning.
pub fn spawn_editor_for_input(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
pub(crate) fn spawn_editor_for_input(
terminal: &mut Terminal<ColorCompatBackend<Stdout>>,
use_alt_screen: bool,
use_mouse_capture: bool,
use_bracketed_paste: bool,
+1
View File
@@ -7,6 +7,7 @@ pub mod app;
pub mod approval;
pub mod backtrack;
pub mod clipboard;
mod color_compat;
pub mod command_palette;
pub mod context_inspector;
pub mod context_menu;
+19 -10
View File
@@ -18,7 +18,6 @@ use crossterm::{
};
use ratatui::{
Frame, Terminal,
backend::CrosstermBackend,
layout::{Constraint, Direction, Layout, Rect},
prelude::Widget,
style::{Color, Style},
@@ -52,6 +51,7 @@ use crate::task_manager::{
};
use crate::tools::spec::RuntimeToolServices;
use crate::tools::subagent::SubAgentStatus;
use crate::tui::color_compat::ColorCompatBackend;
use crate::tui::command_palette::{
CommandPaletteView, build_entries as build_command_palette_entries,
};
@@ -132,6 +132,8 @@ const WORKSPACE_CONTEXT_REFRESH_SECS: u64 = 15;
const SIDEBAR_VISIBLE_MIN_WIDTH: u16 = 100;
const DEFAULT_TERMINAL_PROBE_TIMEOUT_MS: u64 = 500;
type AppTerminal = Terminal<ColorCompatBackend<Stdout>>;
/// Run the interactive TUI event loop.
///
/// # Examples
@@ -230,7 +232,9 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> {
"PushKeyboardEnhancementFlags ignored (terminal lacks support)"
);
}
let backend = CrosstermBackend::new(stdout);
let color_depth = palette::ColorDepth::detect();
tracing::debug!(?color_depth, "terminal color depth detected");
let backend = ColorCompatBackend::new(stdout, color_depth);
let mut terminal = Terminal::new(backend)?;
terminal.clear()?;
let event_broker = EventBroker::new();
@@ -576,7 +580,7 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag
#[allow(clippy::too_many_lines)]
async fn run_event_loop(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
app: &mut App,
config: &mut Config,
mut engine_handle: EngineHandle,
@@ -3289,7 +3293,7 @@ async fn dispatch_user_message(
app.model.clone()
};
engine_handle
if let Err(err) = engine_handle
.send(Op::SendMessage {
content,
mode: app.mode,
@@ -3300,7 +3304,12 @@ async fn dispatch_user_message(
trust_mode: app.trust_mode,
auto_approve: app.mode == AppMode::Yolo,
})
.await?;
.await
{
app.is_loading = false;
app.last_send_at = None;
return Err(err);
}
Ok(())
}
@@ -3815,7 +3824,7 @@ fn workspace_path_to_picker_string(path: &Path) -> Option<String> {
}
async fn apply_command_result(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
app: &mut App,
engine_handle: &mut EngineHandle,
task_manager: &SharedTaskManager,
@@ -4305,7 +4314,7 @@ fn handle_shell_job_action(app: &mut App, action: crate::tui::app::ShellJobActio
}
async fn execute_command_input(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
app: &mut App,
engine_handle: &mut EngineHandle,
task_manager: &SharedTaskManager,
@@ -4864,7 +4873,7 @@ fn toggle_live_transcript_overlay(app: &mut App) {
}
async fn handle_view_events(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
app: &mut App,
config: &mut Config,
task_manager: &SharedTaskManager,
@@ -5552,7 +5561,7 @@ fn run_git_query(workspace: &Path, args: &[&str]) -> std::io::Result<String> {
}
fn pause_terminal(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
use_alt_screen: bool,
use_mouse_capture: bool,
use_bracketed_paste: bool,
@@ -5576,7 +5585,7 @@ fn pause_terminal(
}
fn resume_terminal(
terminal: &mut Terminal<CrosstermBackend<Stdout>>,
terminal: &mut AppTerminal,
use_alt_screen: bool,
use_mouse_capture: bool,
use_bracketed_paste: bool,
+24
View File
@@ -695,6 +695,30 @@ async fn model_change_update_syncs_engine_model_before_compaction() {
}
}
#[tokio::test]
async fn dispatch_user_message_failed_send_clears_loading_state() {
let mut app = create_test_app();
let engine = mock_engine_handle();
drop(engine.rx_op);
let result = dispatch_user_message(
&mut app,
&engine.handle,
QueuedMessage::new("hello".to_string(), None),
)
.await;
assert!(
result.is_err(),
"dispatch should fail when engine channel is closed"
);
assert!(
!app.is_loading,
"failed dispatch must not leave the composer in a permanent busy state"
);
assert!(app.last_send_at.is_none());
}
fn init_git_repo() -> TempDir {
let dir = tempfile::tempdir().expect("tempdir");
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "deepseek-tui",
"version": "0.8.12",
"deepseekBinaryVersion": "0.8.12",
"version": "0.8.13",
"deepseekBinaryVersion": "0.8.13",
"description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
"author": "Hmbown",
"license": "MIT",