Release v0.4.9: thinking-mode reasoning_content fix + README refresh

### Fixed
- DeepSeek thinking-mode tool-call rounds now always replay reasoning_content
  in all subsequent requests (including across new user turns), matching the
  documented API contract that assistant tool-call messages must retain their
  reasoning content forever. Previously, reasoning_content was cleared after
  the current user turn completed, which could cause HTTP 400 errors.
- Missing reasoning_content on a tool-call assistant message now substitutes
  a safe placeholder ("(reasoning omitted)") instead of dropping the tool
  calls and their matching tool results, preventing orphaned conversation
  chains and API 400 rejections.
- Session checkpoint now persists a Thinking-block placeholder for tool-call
  turns that produced no streamed reasoning text, keeping on-disk sessions
  structurally correct for subsequent requests.
- Token estimation for compaction now counts thinking tokens across ALL
  tool-call rounds (not just the current user turn), aligning with the
  updated reasoning_content replay rule.

### Changed
- Internal crate dependency pins bumped 0.4.5 → 0.4.9 to match workspace.
- npm wrapper version and deepseekBinaryVersion bumped to 0.4.9.
- README fully rewritten: clearer feature highlights, V4 model focus,
  keyboard shortcut table, improved docs index, and more engaging layout.
- CHANGELOG entry for 0.4.9 with comparison URLs.
This commit is contained in:
Hunter Bown
2026-04-25 12:00:08 -05:00
parent 41c54f08aa
commit 67b232b063
16 changed files with 284 additions and 245 deletions
+11 -1
View File
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## [0.4.9] - 2026-04-27
### Fixed
- DeepSeek thinking-mode tool-call rounds now always replay `reasoning_content` in all subsequent requests (including across new user turns), matching DeepSeek's documented API contract that assistant messages with tool calls must retain their reasoning content forever.
- Missing `reasoning_content` on a tool-call assistant message now substitutes a safe placeholder (`"(reasoning omitted)"`) instead of dropping the tool calls and their matching tool results, preventing orphaned conversation chains and API 400 errors.
- Session checkpoint now persists a Thinking-block placeholder for tool-call turns that produced no streamed reasoning text, keeping on-disk sessions structurally correct so subsequent requests avoid HTTP 400 rejections.
- Token estimation for compaction now counts thinking tokens across all tool-call rounds (not just the current user turn), aligning with the updated reasoning_content replay rule.
## [0.4.8] - 2026-04-25
### Fixed
@@ -490,7 +498,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Hooks system and config profiles
- Example skills and launch assets
[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...HEAD
[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.9...HEAD
[0.4.9]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.8...v0.4.9
[0.4.8]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...v0.4.8
[0.3.33]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.32...v0.3.33
[0.3.32]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.31...v0.3.32
[0.3.31]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.28...v0.3.31
Generated
+13 -13
View File
@@ -806,7 +806,7 @@ dependencies = [
[[package]]
name = "deepseek-agent"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"deepseek-config",
"serde",
@@ -814,7 +814,7 @@ dependencies = [
[[package]]
name = "deepseek-app-server"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"axum",
@@ -837,7 +837,7 @@ dependencies = [
[[package]]
name = "deepseek-config"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"dirs",
@@ -848,7 +848,7 @@ dependencies = [
[[package]]
name = "deepseek-core"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"chrono",
@@ -867,7 +867,7 @@ dependencies = [
[[package]]
name = "deepseek-execpolicy"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -876,7 +876,7 @@ dependencies = [
[[package]]
name = "deepseek-hooks"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"async-trait",
@@ -890,7 +890,7 @@ dependencies = [
[[package]]
name = "deepseek-mcp"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -900,7 +900,7 @@ dependencies = [
[[package]]
name = "deepseek-protocol"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"serde",
"serde_json",
@@ -908,7 +908,7 @@ dependencies = [
[[package]]
name = "deepseek-state"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"chrono",
@@ -920,7 +920,7 @@ dependencies = [
[[package]]
name = "deepseek-tools"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"async-trait",
@@ -933,7 +933,7 @@ dependencies = [
[[package]]
name = "deepseek-tui"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"arboard",
@@ -987,7 +987,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-cli"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"anyhow",
"chrono",
@@ -1005,7 +1005,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-core"
version = "0.4.8"
version = "0.4.9"
[[package]]
name = "deranged"
+1 -1
View File
@@ -18,7 +18,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
resolver = "2"
[workspace.package]
version = "0.4.8"
version = "0.4.9"
edition = "2024"
license = "MIT"
repository = "https://github.com/Hmbown/DeepSeek-TUI"
+116 -80
View File
@@ -1,45 +1,64 @@
# DeepSeek TUI
`npm i -g deepseek-tui`
> **A terminal-native coding agent for [DeepSeek V4](https://platform.deepseek.com) models — with 1M-token context, thinking-mode reasoning, and full tool-use.**
A coding agent for [DeepSeek](https://platform.deepseek.com) models that runs in your terminal.
```bash
npm i -g deepseek-tui
```
[![CI](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml/badge.svg)](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml)
[![crates.io](https://img.shields.io/crates/v/deepseek-tui)](https://crates.io/crates/deepseek-tui)
[![npm](https://img.shields.io/npm/v/deepseek-tui)](https://www.npmjs.com/package/deepseek-tui)
---
## What is it?
DeepSeek TUI is a coding agent that runs entirely in your terminal. It gives DeepSeek's frontier models direct access to your workspace — reading and editing files, running shell commands, searching the web, managing git, and orchestrating sub-agents — all through a fast, keyboard-driven TUI.
**Built for DeepSeek V4** (`deepseek-v4-pro` / `deepseek-v4-flash`) with 1M-token context windows and native thinking-mode (chain-of-thought) streaming. See the model's reasoning unfold in real time as it works through your tasks.
### Key Features
- 🧠 **Thinking-mode streaming** — watch DeepSeek's chain-of-thought as it reasons about your code
- 🔧 **Full tool suite** — file ops, shell execution, git, web search/browse, apply-patch, sub-agents, MCP servers, and more
- 🪟 **1M-token context** — feed entire codebases; automatic intelligent compaction when context fills up
- 🎛️ **Three interaction modes** — Plan (read-only explore), Agent (interactive with approval), YOLO (auto-approved)
-**Reasoning-effort tiers** — cycle through `off → high → max` with Shift+Tab
- 🔄 **Session save/resume** — checkpoint and resume long sessions, fork conversations
- 🌐 **HTTP/SSE runtime API**`deepseek serve --http` for headless agent workflows
- 📦 **MCP protocol** — connect to Model Context Protocol servers for extended tooling
- 💰 **Live cost tracking** — per-turn and session-level token usage and cost estimates
- 🎨 **Dark & light themes** — with a DeepSeek-blue branded palette
---
## Quickstart
```bash
npm install -g deepseek-tui
```
Start the TUI:
```bash
deepseek
```
On first launch, it will prompt for your API key if one is not already configured.
The package also installs `deepseek-tui`; both commands share the same
`~/.deepseek/config.toml` for DeepSeek auth and default model settings.
You can also set auth ahead of time with either of these:
On first launch you'll be prompted for your [DeepSeek API key](https://platform.deepseek.com/api_keys). You can also set it ahead of time:
```bash
# via CLI
deepseek login --api-key "YOUR_DEEPSEEK_API_KEY"
deepseek-tui login --api-key "YOUR_DEEPSEEK_API_KEY"
DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" deepseek-tui
# via env var
export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY"
deepseek
```
To use NVIDIA NIM-hosted DeepSeek V4 Pro instead:
### Using NVIDIA NIM
```bash
deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"
deepseek --provider nvidia-nim
# or for one process:
DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek
# or per-process:
DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="..." deepseek
```
<details>
@@ -47,8 +66,8 @@ DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek
```bash
# From crates.io (requires Rust 1.85+)
cargo install deepseek-tui --locked # TUI
cargo install deepseek-tui-cli --locked # deepseek CLI facade
cargo install deepseek-tui --locked # TUI binary
cargo install deepseek-tui-cli --locked # CLI facade (deepseek command)
# From source
git clone https://github.com/Hmbown/DeepSeek-TUI.git
@@ -56,99 +75,116 @@ cd DeepSeek-TUI
cargo install --path crates/tui --locked
```
The canonical crates.io packages for this repository are `deepseek-tui` and
`deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this
project. crates.io publication can lag the repository workspace version and the
npm wrapper, so use npm or install from source if you need the newest release
surface immediately.
The canonical crates.io packages are `deepseek-tui` and `deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this project. crates.io publication can lag the workspace version — use npm or install from source for the latest release surface immediately.
</details>
## What it does
---
A terminal coding agent for DeepSeek models with file editing, shell execution, `web.run` browsing, git operations, session resume, and [MCP](https://modelcontextprotocol.io) server integration.
## Models & Pricing
Three visible modes (**Tab** to cycle):
DeepSeek TUI targets **DeepSeek V4** models with 1M-token context windows by default.
| Mode | Behavior |
|------|----------|
| **Plan** | Review a plan before the agent starts making changes |
| **Agent** | Default interactive mode with multi-step tool use |
| **YOLO** | Auto-approve tools in a trusted workspace |
| Model | Context | Input (cache hit) | Input (cache miss) | Output |
|---|---|---|---|---|
| `deepseek-v4-pro` | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
| `deepseek-v4-flash` | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |
**Shift+Tab** cycles the reasoning-effort tier for DeepSeek thinking mode:
`off``high``max`. The current tier is shown as a ⚡ chip in the header.
Set a default in config with `reasoning_effort = "max"` (or `off` / `low` /
`medium` / `high`).
Legacy aliases `deepseek-chat` and `deepseek-reasoner` silently map to `deepseek-v4-flash`.
## Models & pricing
**NVIDIA NIM** hosted variants (`deepseek-ai/deepseek-v4-pro`, `deepseek-ai/deepseek-v4-flash`) use your NVIDIA account terms — no DeepSeek platform billing.
| Model | Thinking | Context | Input cache hit | Input cache miss | Output |
|---|---|---|---|---|---|
| `deepseek-v4-pro` | default | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
| `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |
| `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
| `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
*\*DeepSeek lists the Pro rates above as a limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI cost estimator falls back to base Pro rates after that timestamp.*
Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for
`deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published
by DeepSeek and is subject to change. *DeepSeek lists the Pro rates above as a
limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI estimator
falls back to the base Pro rates after that timestamp.
---
## Usage
```bash
deepseek # interactive TUI
deepseek "explain this in 2 sentences" # one-shot prompt
deepseek --model deepseek-v4-flash "summarize" # one-shot with model override
deepseek --yolo # YOLO mode
deepseek login --api-key "..." # save API key to shared config
deepseek doctor # check setup
deepseek models # list live DeepSeek API models
deepseek "explain this function" # one-shot prompt
deepseek --model deepseek-v4-flash "summarize" # model override
deepseek --yolo # YOLO mode (auto-approve tools)
deepseek login --api-key "..." # save API key
deepseek doctor # check setup & connectivity
deepseek models # list live API models
deepseek sessions # list saved sessions
deepseek resume --last # resume the latest session
deepseek resume --last # resume latest session
deepseek serve --http # HTTP/SSE API server
```
Controls: `F1` help, `Esc` backs out of the current action, `Ctrl+K` command palette.
In the composer, `@path/to/file` adds local text file or directory context to
the next message. Use `/attach <path>` for local image/video media references.
### Keyboard shortcuts
| Key | Action |
|---|---|
| `Tab` | Cycle mode: Plan → Agent → YOLO |
| `Shift+Tab` | Cycle reasoning-effort: off → high → max |
| `F1` | Help |
| `Esc` | Back / dismiss |
| `Ctrl+K` | Command palette |
| `@path` | Attach file/directory context in composer |
| `/attach <path>` | Attach image/video media references |
---
## Modes
| Mode | Behavior |
|---|---|
| **Plan** 🔍 | Read-only investigation — model explores and proposes a plan before making changes |
| **Agent** 🤖 | Default interactive mode — multi-step tool use with approval gates |
| **YOLO** ⚡ | Auto-approve all tools in a trusted workspace (use with caution) |
---
## Configuration
`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for all options.
`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for every option.
Key environment overrides: `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`,
`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`, `DEEPSEEK_PROVIDER`.
For NVIDIA NIM, use `DEEPSEEK_PROVIDER=nvidia-nim` plus `NVIDIA_API_KEY`
or `NVIDIA_NIM_API_KEY` (with `DEEPSEEK_API_KEY` as a compatibility fallback);
the default model is `deepseek-ai/deepseek-v4-pro` and the default base URL is
`https://integrate.api.nvidia.com/v1`. With `--provider nvidia-nim`,
`--model deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`.
Key environment overrides:
Quick checks and scaffolding:
| Variable | Purpose |
|---|---|
| `DEEPSEEK_API_KEY` | API key |
| `DEEPSEEK_BASE_URL` | API base URL |
| `DEEPSEEK_MODEL` | Default model |
| `DEEPSEEK_PROVIDER` | Provider: `deepseek` (default) or `nvidia-nim` |
| `DEEPSEEK_PROFILE` | Config profile name |
| `NVIDIA_API_KEY` | NVIDIA NIM API key |
- `deepseek-tui setup --status` — read-only, network-free status of API key,
MCP/skills/tools/plugins, sandbox, and `.env`.
- `deepseek-tui setup --tools --plugins` — scaffold `~/.deepseek/tools/` and
`~/.deepseek/plugins/` with self-describing example templates.
- `deepseek-tui doctor --json` — machine-readable doctor output for CI.
Quick diagnostics:
The client targets DeepSeek's documented OpenAI-compatible Chat Completions API
(`/chat/completions`). DeepSeek context caching is automatic; when the API
returns cache hit/miss token fields, the TUI includes them in usage and cost
tracking.
```bash
deepseek-tui setup --status # read-only status check (API key, MCP, sandbox, .env)
deepseek-tui doctor --json # machine-readable doctor output for CI
deepseek-tui setup --tools --plugins # scaffold tools/ and plugins/ directories
```
Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md).
DeepSeek context caching is automatic — when the API returns cache hit/miss token fields, the TUI includes them in usage and cost tracking.
## Docs
Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md)
[docs/](docs/) — configuration, modes, MCP integration, runtime API, and release runbooks.
---
## Documentation
| Doc | Topic |
|---|---|
| [ARCHITECTURE.md](docs/ARCHITECTURE.md) | Codebase internals |
| [CONFIGURATION.md](docs/CONFIGURATION.md) | Full config reference |
| [MODES.md](docs/MODES.md) | Plan / Agent / YOLO modes |
| [MCP.md](docs/MCP.md) | Model Context Protocol integration |
| [RUNTIME_API.md](docs/RUNTIME_API.md) | HTTP/SSE API server |
| [RELEASE_RUNBOOK.md](docs/RELEASE_RUNBOOK.md) | Release process |
| [OPERATIONS_RUNBOOK.md](docs/OPERATIONS_RUNBOOK.md) | Ops & recovery |
---
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md). Not affiliated with DeepSeek Inc.
See [CONTRIBUTING.md](CONTRIBUTING.md). Pull requests welcome!
*Not affiliated with DeepSeek Inc.*
## License
+1 -1
View File
@@ -7,5 +7,5 @@ repository.workspace = true
description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
[dependencies]
deepseek-config = { path = "../config", version = "0.4.5" }
deepseek-config = { path = "../config", version = "0.4.9" }
serde.workspace = true
+9 -9
View File
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
anyhow.workspace = true
axum.workspace = true
clap.workspace = true
deepseek-agent = { path = "../agent", version = "0.4.5" }
deepseek-config = { path = "../config", version = "0.4.5" }
deepseek-core = { path = "../core", version = "0.4.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
deepseek-hooks = { path = "../hooks", version = "0.4.5" }
deepseek-mcp = { path = "../mcp", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-state = { path = "../state", version = "0.4.5" }
deepseek-tools = { path = "../tools", version = "0.4.5" }
deepseek-agent = { path = "../agent", version = "0.4.9" }
deepseek-config = { path = "../config", version = "0.4.9" }
deepseek-core = { path = "../core", version = "0.4.9" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
deepseek-hooks = { path = "../hooks", version = "0.4.9" }
deepseek-mcp = { path = "../mcp", version = "0.4.9" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
deepseek-state = { path = "../state", version = "0.4.9" }
deepseek-tools = { path = "../tools", version = "0.4.9" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+6 -6
View File
@@ -14,12 +14,12 @@ path = "src/main.rs"
anyhow.workspace = true
clap.workspace = true
clap_complete.workspace = true
deepseek-agent = { path = "../agent", version = "0.4.5" }
deepseek-app-server = { path = "../app-server", version = "0.4.5" }
deepseek-config = { path = "../config", version = "0.4.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
deepseek-mcp = { path = "../mcp", version = "0.4.5" }
deepseek-state = { path = "../state", version = "0.4.5" }
deepseek-agent = { path = "../agent", version = "0.4.9" }
deepseek-app-server = { path = "../app-server", version = "0.4.9" }
deepseek-config = { path = "../config", version = "0.4.9" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
deepseek-mcp = { path = "../mcp", version = "0.4.9" }
deepseek-state = { path = "../state", version = "0.4.9" }
chrono.workspace = true
serde_json.workspace = true
tokio.workspace = true
+8 -8
View File
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
deepseek-agent = { path = "../agent", version = "0.4.5" }
deepseek-config = { path = "../config", version = "0.4.5" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
deepseek-hooks = { path = "../hooks", version = "0.4.5" }
deepseek-mcp = { path = "../mcp", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-state = { path = "../state", version = "0.4.5" }
deepseek-tools = { path = "../tools", version = "0.4.5" }
deepseek-agent = { path = "../agent", version = "0.4.9" }
deepseek-config = { path = "../config", version = "0.4.9" }
deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
deepseek-hooks = { path = "../hooks", version = "0.4.9" }
deepseek-mcp = { path = "../mcp", version = "0.4.9" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
deepseek-state = { path = "../state", version = "0.4.9" }
deepseek-tools = { path = "../tools", version = "0.4.9" }
serde_json.workspace = true
tokio.workspace = true
uuid.workspace = true
+1 -1
View File
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
[dependencies]
anyhow.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
serde.workspace = true
+1 -1
View File
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
anyhow.workspace = true
async-trait.workspace = true
chrono.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
+1 -1
View File
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo
[dependencies]
anyhow.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
serde.workspace = true
serde_json.workspace = true
+1 -1
View File
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
deepseek-protocol = { path = "../protocol", version = "0.4.5" }
deepseek-protocol = { path = "../protocol", version = "0.4.9" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
+82 -75
View File
@@ -1351,7 +1351,6 @@ fn build_chat_messages_with_reasoning(
) -> Vec<Value> {
let mut out = Vec::new();
let mut pending_tool_calls: HashSet<String> = HashSet::new();
let current_turn_start = messages.iter().rposition(is_text_user_message);
if let Some(instructions) = system_to_instructions(system.cloned())
&& !instructions.trim().is_empty()
@@ -1362,7 +1361,7 @@ fn build_chat_messages_with_reasoning(
}));
}
for (message_index, message) in messages.iter().enumerate() {
for message in messages.iter() {
let role = message.role.as_str();
let mut text_parts = Vec::new();
let mut thinking_parts = Vec::new();
@@ -1421,32 +1420,28 @@ fn build_chat_messages_with_reasoning(
if role == "assistant" {
let content = text_parts.join("\n");
let reasoning_content = thinking_parts.join("\n");
let mut reasoning_content = thinking_parts.join("\n");
let has_text = !content.trim().is_empty();
let mut has_tool_calls = !tool_calls.is_empty();
let include_reasoning_for_turn = include_reasoning
&& has_tool_calls
&& current_turn_start.is_some_and(|start| message_index > start)
&& !has_later_assistant_text(messages, message_index);
let has_reasoning = include_reasoning_for_turn && !reasoning_content.trim().is_empty();
// DeepSeek thinking-mode tool turns are stateful within the
// stateless Chat Completions transcript: if an assistant performed
// a tool call in the current user turn, its `reasoning_content`
// must be replayed while continuing that tool round. Once a new
// user text turn starts, DeepSeek recommends clearing historical
// reasoning content so the context is not dominated by old CoT.
// Older checkpoints could lose the current-round field because the
// UI display stream had no visible text block. Do not forward those
// malformed current tool calls; dropping that round is better than
// guaranteeing a provider-side 400.
let has_tool_calls = !tool_calls.is_empty();
// DeepSeek thinking-mode rule: any assistant message that performed
// a tool call must keep its `reasoning_content` and replay it in
// ALL subsequent requests, including across new user turns. Final
// text-only answers may drop reasoning_content (the API ignores
// it). If a tool-call round somehow lost its reasoning_content
// (e.g. a session checkpoint from before this rule was enforced,
// or a sub-turn where the model emitted no reasoning text),
// substitute a non-empty placeholder so the API accepts the
// request. Dropping tool_calls instead would orphan matching
// tool_results and fragment the conversation chain.
let include_reasoning_for_turn = include_reasoning && has_tool_calls;
let mut has_reasoning =
include_reasoning_for_turn && !reasoning_content.trim().is_empty();
if include_reasoning_for_turn && !has_reasoning {
logging::warn(
"Dropping DeepSeek tool_calls with missing reasoning_content from assistant message",
"Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
);
tool_calls.clear();
tool_call_ids.clear();
has_tool_calls = false;
reasoning_content = String::from("(reasoning omitted)");
has_reasoning = true;
}
// DeepSeek rejects assistant messages where both `content` and
@@ -1618,33 +1613,6 @@ fn build_chat_messages_with_reasoning(
out
}
fn is_text_user_message(message: &Message) -> bool {
message.role == "user"
&& message.content.iter().any(|block| {
matches!(
block,
ContentBlock::Text { text, .. } if !text.trim().is_empty()
)
})
}
fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
messages
.iter()
.skip(message_index.saturating_add(1))
.any(is_text_assistant_message)
}
fn is_text_assistant_message(message: &Message) -> bool {
message.role == "assistant"
&& message.content.iter().any(|block| {
matches!(
block,
ContentBlock::Text { text, .. } if !text.trim().is_empty()
)
})
}
fn tool_to_chat(tool: &Tool) -> Value {
let mut value = json!({
"type": "function",
@@ -2437,7 +2405,7 @@ mod tests {
}
#[test]
fn chat_messages_clear_prior_tool_round_reasoning_after_new_user_turn() {
fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() {
let messages = vec![
Message {
role: "user".to_string(),
@@ -2485,16 +2453,24 @@ mod tests {
},
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let assistant = out
let tool_assistant = out
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert!(assistant.get("tool_calls").is_some());
assert!(assistant.get("reasoning_content").is_none());
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message");
assert_eq!(
tool_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need to call a tool"),
"DeepSeek thinking mode requires reasoning_content to be replayed for tool-call rounds across all subsequent user turns"
);
}
#[test]
fn chat_messages_clear_completed_tool_round_reasoning_after_final_answer() {
fn chat_messages_replay_completed_tool_round_reasoning_after_final_answer() {
let messages = vec![
Message {
role: "user".to_string(),
@@ -2535,16 +2511,31 @@ mod tests {
},
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let assistant = out
let tool_assistant = out
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert!(assistant.get("tool_calls").is_some());
assert!(assistant.get("reasoning_content").is_none());
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message");
assert_eq!(
tool_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need to call a tool")
);
let final_assistant = out
.iter()
.rfind(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("final assistant message");
assert!(
final_assistant.get("reasoning_content").is_none(),
"final text answer can drop reasoning_content (API ignores it)"
);
}
#[test]
fn chat_messages_clear_v4_tool_round_reasoning_after_new_user_turn() {
fn chat_messages_replay_v4_tool_round_reasoning_after_new_user_turn() {
let messages = vec![
Message {
role: "user".to_string(),
@@ -2593,16 +2584,23 @@ mod tests {
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let assistant = out
let tool_assistant = out
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert!(assistant.get("tool_calls").is_some());
assert!(assistant.get("reasoning_content").is_none());
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message");
assert_eq!(
tool_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need a tool for this")
);
}
#[test]
fn chat_messages_drop_v4_tool_round_missing_reasoning() {
fn chat_messages_substitute_placeholder_when_v4_tool_round_missing_reasoning() {
let messages = vec![
Message {
role: "user".to_string(),
@@ -2633,15 +2631,24 @@ mod tests {
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let assistant = out
.iter()
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message should be retained with placeholder");
assert!(
!out.iter()
.any(|value| value.get("role").and_then(Value::as_str) == Some("assistant")),
"malformed assistant tool round should be removed"
assistant
.get("reasoning_content")
.and_then(Value::as_str)
.is_some_and(|value| !value.trim().is_empty()),
"missing reasoning_content should be substituted with a non-empty placeholder so the API accepts the request"
);
assert!(
!out.iter()
out.iter()
.any(|value| value.get("role").and_then(Value::as_str) == Some("tool")),
"tool result tied to missing reasoning should be removed"
"matching tool_result must remain so the conversation chain stays intact"
);
}
+13 -40
View File
@@ -523,30 +523,15 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
}
pub fn estimate_tokens(messages: &[Message]) -> usize {
// Rough estimate: ~4 chars per token
let current_turn_start = messages.iter().rposition(is_text_user_message);
// Rough estimate: ~4 chars per token. DeepSeek thinking-mode rule: any
// assistant message with tool_calls keeps its reasoning_content forever
// (replayed in all subsequent requests). Final text-only answers drop it.
messages
.iter()
.enumerate()
.map(|(index, message)| {
let include_thinking = current_turn_start.is_some_and(|start| index > start)
&& message_has_tool_use(message)
&& !has_later_assistant_text(messages, index);
estimate_tokens_for_message(message, include_thinking)
})
.map(|message| estimate_tokens_for_message(message, message_has_tool_use(message)))
.sum()
}
fn is_text_user_message(message: &Message) -> bool {
message.role == "user"
&& message.content.iter().any(|block| {
matches!(
block,
ContentBlock::Text { text, .. } if !text.trim().is_empty()
)
})
}
fn message_has_tool_use(message: &Message) -> bool {
message
.content
@@ -554,23 +539,6 @@ fn message_has_tool_use(message: &Message) -> bool {
.any(|block| matches!(block, ContentBlock::ToolUse { .. }))
}
fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
messages
.iter()
.skip(message_index.saturating_add(1))
.any(is_text_assistant_message)
}
fn is_text_assistant_message(message: &Message) -> bool {
message.role == "assistant"
&& message.content.iter().any(|block| {
matches!(
block,
ContentBlock::Text { text, .. } if !text.trim().is_empty()
)
})
}
fn estimate_text_tokens_conservative(text: &str) -> usize {
text.chars().count().div_ceil(3)
}
@@ -1158,7 +1126,11 @@ mod tests {
}
#[test]
fn estimate_tokens_counts_current_tool_round_thinking_only() {
fn estimate_tokens_counts_tool_round_thinking_across_turns() {
// Per DeepSeek thinking-mode rules, any assistant message that
// performed a tool call keeps its reasoning_content in the request
// forever, including across new user turns. Token estimates must
// count those bytes.
let thinking = "reasoning ".repeat(800);
let current_messages = vec![
Message {
@@ -1222,9 +1194,10 @@ mod tests {
messages
};
assert!(estimate_tokens(&current_messages) > thinking.len() / 5);
assert!(estimate_tokens(&completed_messages) < thinking.len() / 8);
assert!(estimate_tokens(&historical_messages) < thinking.len() / 8);
let lower_bound = thinking.len() / 5;
assert!(estimate_tokens(&current_messages) > lower_bound);
assert!(estimate_tokens(&completed_messages) > lower_bound);
assert!(estimate_tokens(&historical_messages) > lower_bound);
}
#[test]
+18 -5
View File
@@ -2888,11 +2888,24 @@ impl Engine {
// Update turn usage
turn.add_usage(&usage);
// Build content blocks
if !current_thinking.is_empty() {
content_blocks.push(ContentBlock::Thinking {
thinking: current_thinking.clone(),
});
// Build content blocks. If this assistant turn produced tool
// calls, ensure a Thinking block is present even when the model
// didn't stream any reasoning text — DeepSeek's thinking-mode
// API requires `reasoning_content` to accompany every tool-call
// assistant message in the conversation history. Saving a
// placeholder here keeps the on-disk session structurally
// correct so subsequent requests won't 400.
let needs_thinking_block = !tool_uses.is_empty()
|| tool_parser::has_tool_call_markers(&current_text_raw);
let thinking_to_persist = if !current_thinking.is_empty() {
Some(current_thinking.clone())
} else if needs_thinking_block {
Some(String::from("(reasoning omitted)"))
} else {
None
};
if let Some(thinking) = thinking_to_persist {
content_blocks.push(ContentBlock::Thinking { thinking });
}
let mut final_text = current_text_visible.clone();
if tool_uses.is_empty() && tool_parser::has_tool_call_markers(&current_text_raw) {
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "deepseek-tui",
"version": "0.4.8",
"deepseekBinaryVersion": "0.4.8",
"version": "0.4.9",
"deepseekBinaryVersion": "0.4.9",
"description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
"author": "Hmbown",
"license": "MIT",