Release v0.4.9: thinking-mode reasoning_content fix + README refresh

### Fixed - DeepSeek thinking-mode tool-call rounds now always replay reasoning_content in all subsequent requests (including across new user turns), matching the documented API contract that assistant tool-call messages must retain their reasoning content forever. Previously, reasoning_content was cleared after the current user turn completed, which could cause HTTP 400 errors. - Missing reasoning_content on a tool-call assistant message now substitutes a safe placeholder ("(reasoning omitted)") instead of dropping the tool calls and their matching tool results, preventing orphaned conversation chains and API 400 rejections. - Session checkpoint now persists a Thinking-block placeholder for tool-call turns that produced no streamed reasoning text, keeping on-disk sessions structurally correct for subsequent requests. - Token estimation for compaction now counts thinking tokens across ALL tool-call rounds (not just the current user turn), aligning with the updated reasoning_content replay rule. ### Changed - Internal crate dependency pins bumped 0.4.5 → 0.4.9 to match workspace. - npm wrapper version and deepseekBinaryVersion bumped to 0.4.9. - README fully rewritten: clearer feature highlights, V4 model focus, keyboard shortcut table, improved docs index, and more engaging layout. - CHANGELOG entry for 0.4.9 with comparison URLs.
2026-04-25 12:00:08 -05:00
parent 41c54f08aa
commit 67b232b063
16 changed files with 284 additions and 245 deletions
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ## [Unreleased]

+## [0.4.9] - 2026-04-27
+
+### Fixed
+- DeepSeek thinking-mode tool-call rounds now always replay `reasoning_content` in all subsequent requests (including across new user turns), matching DeepSeek's documented API contract that assistant messages with tool calls must retain their reasoning content forever.
+- Missing `reasoning_content` on a tool-call assistant message now substitutes a safe placeholder (`"(reasoning omitted)"`) instead of dropping the tool calls and their matching tool results, preventing orphaned conversation chains and API 400 errors.
+- Session checkpoint now persists a Thinking-block placeholder for tool-call turns that produced no streamed reasoning text, keeping on-disk sessions structurally correct so subsequent requests avoid HTTP 400 rejections.
+- Token estimation for compaction now counts thinking tokens across all tool-call rounds (not just the current user turn), aligning with the updated reasoning_content replay rule.
+
 ## [0.4.8] - 2026-04-25

 ### Fixed
@@ -490,7 +498,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Hooks system and config profiles
 - Example skills and launch assets

-[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...HEAD
+[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.9...HEAD
+[0.4.9]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.8...v0.4.9
+[0.4.8]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...v0.4.8
 [0.3.33]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.32...v0.3.33
 [0.3.32]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.31...v0.3.32
 [0.3.31]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.28...v0.3.31
@@ -806,7 +806,7 @@ dependencies = [

 [[package]]
 name = "deepseek-agent"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "deepseek-config",
 "serde",
@@ -814,7 +814,7 @@ dependencies = [

 [[package]]
 name = "deepseek-app-server"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "axum",
@@ -837,7 +837,7 @@ dependencies = [

 [[package]]
 name = "deepseek-config"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "dirs",
@@ -848,7 +848,7 @@ dependencies = [

 [[package]]
 name = "deepseek-core"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "chrono",
@@ -867,7 +867,7 @@ dependencies = [

 [[package]]
 name = "deepseek-execpolicy"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -876,7 +876,7 @@ dependencies = [

 [[package]]
 name = "deepseek-hooks"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -890,7 +890,7 @@ dependencies = [

 [[package]]
 name = "deepseek-mcp"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "deepseek-protocol",
@@ -900,7 +900,7 @@ dependencies = [

 [[package]]
 name = "deepseek-protocol"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "serde",
 "serde_json",
@@ -908,7 +908,7 @@ dependencies = [

 [[package]]
 name = "deepseek-state"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "chrono",
@@ -920,7 +920,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tools"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -933,7 +933,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "arboard",
@@ -987,7 +987,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-cli"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
 "anyhow",
 "chrono",
@@ -1005,7 +1005,7 @@ dependencies = [

 [[package]]
 name = "deepseek-tui-core"
-version = "0.4.8"
+version = "0.4.9"

 [[package]]
 name = "deranged"
@@ -18,7 +18,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"

 [workspace.package]
-version = "0.4.8"
+version = "0.4.9"
 edition = "2024"
 license = "MIT"
 repository = "https://github.com/Hmbown/DeepSeek-TUI"
@@ -1,45 +1,64 @@
 # DeepSeek TUI

-`npm i -g deepseek-tui`
+> **A terminal-native coding agent for [DeepSeek V4](https://platform.deepseek.com) models — with 1M-token context, thinking-mode reasoning, and full tool-use.**

-A coding agent for [DeepSeek](https://platform.deepseek.com) models that runs in your terminal.
+```bash
+npm i -g deepseek-tui
+```

 [![CI](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml/badge.svg)](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml)
 [![crates.io](https://img.shields.io/crates/v/deepseek-tui)](https://crates.io/crates/deepseek-tui)
 [![npm](https://img.shields.io/npm/v/deepseek-tui)](https://www.npmjs.com/package/deepseek-tui)

+---
+
+## What is it?
+
+DeepSeek TUI is a coding agent that runs entirely in your terminal. It gives DeepSeek's frontier models direct access to your workspace — reading and editing files, running shell commands, searching the web, managing git, and orchestrating sub-agents — all through a fast, keyboard-driven TUI.
+
+**Built for DeepSeek V4** (`deepseek-v4-pro` / `deepseek-v4-flash`) with 1M-token context windows and native thinking-mode (chain-of-thought) streaming. See the model's reasoning unfold in real time as it works through your tasks.
+
+### Key Features
+
+- 🧠 **Thinking-mode streaming** — watch DeepSeek's chain-of-thought as it reasons about your code
+- 🔧 **Full tool suite** — file ops, shell execution, git, web search/browse, apply-patch, sub-agents, MCP servers, and more
+- 🪟 **1M-token context** — feed entire codebases; automatic intelligent compaction when context fills up
+- 🎛️ **Three interaction modes** — Plan (read-only explore), Agent (interactive with approval), YOLO (auto-approved)
+- ⚡ **Reasoning-effort tiers** — cycle through `off → high → max` with Shift+Tab
+- 🔄 **Session save/resume** — checkpoint and resume long sessions, fork conversations
+- 🌐 **HTTP/SSE runtime API** — `deepseek serve --http` for headless agent workflows
+- 📦 **MCP protocol** — connect to Model Context Protocol servers for extended tooling
+- 💰 **Live cost tracking** — per-turn and session-level token usage and cost estimates
+- 🎨 **Dark & light themes** — with a DeepSeek-blue branded palette
+
+---
+
 ## Quickstart

 ```bash
 npm install -g deepseek-tui
-```
-
-Start the TUI:
-
-```bash
 deepseek
 ```

-On first launch, it will prompt for your API key if one is not already configured.
-The package also installs `deepseek-tui`; both commands share the same
-`~/.deepseek/config.toml` for DeepSeek auth and default model settings.
-
-You can also set auth ahead of time with either of these:
+On first launch you'll be prompted for your [DeepSeek API key](https://platform.deepseek.com/api_keys). You can also set it ahead of time:

 ```bash
+# via CLI
 deepseek login --api-key "YOUR_DEEPSEEK_API_KEY"
-deepseek-tui login --api-key "YOUR_DEEPSEEK_API_KEY"
-DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" deepseek-tui
+
+# via env var
+export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY"
+deepseek
 ```

-To use NVIDIA NIM-hosted DeepSeek V4 Pro instead:
+### Using NVIDIA NIM

 ```bash
 deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"
 deepseek --provider nvidia-nim

-# or for one process:
-DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek
+# or per-process:
+DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="..." deepseek
 ```

 <details>
@@ -47,8 +66,8 @@ DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek

 ```bash
 # From crates.io (requires Rust 1.85+)
-cargo install deepseek-tui --locked       # TUI
-cargo install deepseek-tui-cli --locked   # deepseek CLI facade
+cargo install deepseek-tui --locked       # TUI binary
+cargo install deepseek-tui-cli --locked   # CLI facade (deepseek command)

 # From source
 git clone https://github.com/Hmbown/DeepSeek-TUI.git
@@ -56,99 +75,116 @@ cd DeepSeek-TUI
 cargo install --path crates/tui --locked
 ```

-The canonical crates.io packages for this repository are `deepseek-tui` and
-`deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this
-project. crates.io publication can lag the repository workspace version and the
-npm wrapper, so use npm or install from source if you need the newest release
-surface immediately.
+The canonical crates.io packages are `deepseek-tui` and `deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this project. crates.io publication can lag the workspace version — use npm or install from source for the latest release surface immediately.

 </details>

-## What it does
+---

-A terminal coding agent for DeepSeek models with file editing, shell execution, `web.run` browsing, git operations, session resume, and [MCP](https://modelcontextprotocol.io) server integration.
+## Models & Pricing

-Three visible modes (**Tab** to cycle):
+DeepSeek TUI targets **DeepSeek V4** models with 1M-token context windows by default.

-| Mode | Behavior |
-|------|----------|
-| **Plan** | Review a plan before the agent starts making changes |
-| **Agent** | Default interactive mode with multi-step tool use |
-| **YOLO** | Auto-approve tools in a trusted workspace |
+| Model | Context | Input (cache hit) | Input (cache miss) | Output |
+|---|---|---|---|---|
+| `deepseek-v4-pro` | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
+| `deepseek-v4-flash` | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |

-**Shift+Tab** cycles the reasoning-effort tier for DeepSeek thinking mode:
-`off` → `high` → `max`. The current tier is shown as a ⚡ chip in the header.
-Set a default in config with `reasoning_effort = "max"` (or `off` / `low` /
-`medium` / `high`).
+Legacy aliases `deepseek-chat` and `deepseek-reasoner` silently map to `deepseek-v4-flash`.

-## Models & pricing
+**NVIDIA NIM** hosted variants (`deepseek-ai/deepseek-v4-pro`, `deepseek-ai/deepseek-v4-flash`) use your NVIDIA account terms — no DeepSeek platform billing.

-| Model | Thinking | Context | Input cache hit | Input cache miss | Output |
-|---|---|---|---|---|---|
-| `deepseek-v4-pro` | default | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
-| `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |
-| `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
-| `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
+*\*DeepSeek lists the Pro rates above as a limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI cost estimator falls back to base Pro rates after that timestamp.*

-Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for
-`deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published
-by DeepSeek and is subject to change. *DeepSeek lists the Pro rates above as a
-limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI estimator
-falls back to the base Pro rates after that timestamp.
+---

 ## Usage

 ```bash
 deepseek                                      # interactive TUI
-deepseek "explain this in 2 sentences"        # one-shot prompt
-deepseek --model deepseek-v4-flash "summarize" # one-shot with model override
-deepseek --yolo                               # YOLO mode
-deepseek login --api-key "..."                # save API key to shared config
-deepseek doctor                               # check setup
-deepseek models                               # list live DeepSeek API models
+deepseek "explain this function"              # one-shot prompt
+deepseek --model deepseek-v4-flash "summarize" # model override
+deepseek --yolo                               # YOLO mode (auto-approve tools)
+deepseek login --api-key "..."                # save API key
+deepseek doctor                               # check setup & connectivity
+deepseek models                               # list live API models
 deepseek sessions                             # list saved sessions
-deepseek resume --last                        # resume the latest session
+deepseek resume --last                        # resume latest session
 deepseek serve --http                         # HTTP/SSE API server
 ```

-Controls: `F1` help, `Esc` backs out of the current action, `Ctrl+K` command palette.
-In the composer, `@path/to/file` adds local text file or directory context to
-the next message. Use `/attach <path>` for local image/video media references.
+### Keyboard shortcuts
+
+| Key | Action |
+|---|---|
+| `Tab` | Cycle mode: Plan → Agent → YOLO |
+| `Shift+Tab` | Cycle reasoning-effort: off → high → max |
+| `F1` | Help |
+| `Esc` | Back / dismiss |
+| `Ctrl+K` | Command palette |
+| `@path` | Attach file/directory context in composer |
+| `/attach <path>` | Attach image/video media references |
+
+---
+
+## Modes
+
+| Mode | Behavior |
+|---|---|
+| **Plan** 🔍 | Read-only investigation — model explores and proposes a plan before making changes |
+| **Agent** 🤖 | Default interactive mode — multi-step tool use with approval gates |
+| **YOLO** ⚡ | Auto-approve all tools in a trusted workspace (use with caution) |
+
+---

 ## Configuration

-`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for all options.
+`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for every option.

-Key environment overrides: `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`,
-`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`, `DEEPSEEK_PROVIDER`.
-For NVIDIA NIM, use `DEEPSEEK_PROVIDER=nvidia-nim` plus `NVIDIA_API_KEY`
-or `NVIDIA_NIM_API_KEY` (with `DEEPSEEK_API_KEY` as a compatibility fallback);
-the default model is `deepseek-ai/deepseek-v4-pro` and the default base URL is
-`https://integrate.api.nvidia.com/v1`. With `--provider nvidia-nim`,
-`--model deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`.
+Key environment overrides:

-Quick checks and scaffolding:
+| Variable | Purpose |
+|---|---|
+| `DEEPSEEK_API_KEY` | API key |
+| `DEEPSEEK_BASE_URL` | API base URL |
+| `DEEPSEEK_MODEL` | Default model |
+| `DEEPSEEK_PROVIDER` | Provider: `deepseek` (default) or `nvidia-nim` |
+| `DEEPSEEK_PROFILE` | Config profile name |
+| `NVIDIA_API_KEY` | NVIDIA NIM API key |

- `deepseek-tui setup --status` — read-only, network-free status of API key,
-  MCP/skills/tools/plugins, sandbox, and `.env`.
- `deepseek-tui setup --tools --plugins` — scaffold `~/.deepseek/tools/` and
-  `~/.deepseek/plugins/` with self-describing example templates.
- `deepseek-tui doctor --json` — machine-readable doctor output for CI.
+Quick diagnostics:

-The client targets DeepSeek's documented OpenAI-compatible Chat Completions API
-(`/chat/completions`). DeepSeek context caching is automatic; when the API
-returns cache hit/miss token fields, the TUI includes them in usage and cost
-tracking.
+```bash
+deepseek-tui setup --status    # read-only status check (API key, MCP, sandbox, .env)
+deepseek-tui doctor --json     # machine-readable doctor output for CI
+deepseek-tui setup --tools --plugins  # scaffold tools/ and plugins/ directories
+```

-Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md).
+DeepSeek context caching is automatic — when the API returns cache hit/miss token fields, the TUI includes them in usage and cost tracking.

-## Docs
+Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md)

-[docs/](docs/) — configuration, modes, MCP integration, runtime API, and release runbooks.
+---
+
+## Documentation
+
+| Doc | Topic |
+|---|---|
+| [ARCHITECTURE.md](docs/ARCHITECTURE.md) | Codebase internals |
+| [CONFIGURATION.md](docs/CONFIGURATION.md) | Full config reference |
+| [MODES.md](docs/MODES.md) | Plan / Agent / YOLO modes |
+| [MCP.md](docs/MCP.md) | Model Context Protocol integration |
+| [RUNTIME_API.md](docs/RUNTIME_API.md) | HTTP/SSE API server |
+| [RELEASE_RUNBOOK.md](docs/RELEASE_RUNBOOK.md) | Release process |
+| [OPERATIONS_RUNBOOK.md](docs/OPERATIONS_RUNBOOK.md) | Ops & recovery |
+
+---

 ## Contributing

-See [CONTRIBUTING.md](CONTRIBUTING.md). Not affiliated with DeepSeek Inc.
+See [CONTRIBUTING.md](CONTRIBUTING.md). Pull requests welcome!
+
+*Not affiliated with DeepSeek Inc.*

 ## License

@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"

 [dependencies]
-deepseek-config = { path = "../config", version = "0.4.5" }
+deepseek-config = { path = "../config", version = "0.4.9" }
 serde.workspace = true
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-core = { path = "../core", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-hooks = { path = "../hooks", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
-deepseek-tools = { path = "../tools", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-core = { path = "../core", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-hooks = { path = "../hooks", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
+deepseek-tools = { path = "../tools", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -14,12 +14,12 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-app-server = { path = "../app-server", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-app-server = { path = "../app-server", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
 chrono.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-hooks = { path = "../hooks", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
-deepseek-tools = { path = "../tools", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-hooks = { path = "../hooks", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
+deepseek-tools = { path = "../tools", version = "0.4.9" }
 serde_json.workspace = true
 tokio.workspace = true
 uuid.workspace = true
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace

 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo

 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
@@ -1351,7 +1351,6 @@ fn build_chat_messages_with_reasoning(
 ) -> Vec<Value> {
    let mut out = Vec::new();
    let mut pending_tool_calls: HashSet<String> = HashSet::new();
-    let current_turn_start = messages.iter().rposition(is_text_user_message);

    if let Some(instructions) = system_to_instructions(system.cloned())
        && !instructions.trim().is_empty()
@@ -1362,7 +1361,7 @@ fn build_chat_messages_with_reasoning(
        }));
    }

-    for (message_index, message) in messages.iter().enumerate() {
+    for message in messages.iter() {
        let role = message.role.as_str();
        let mut text_parts = Vec::new();
        let mut thinking_parts = Vec::new();
@@ -1421,32 +1420,28 @@ fn build_chat_messages_with_reasoning(

        if role == "assistant" {
            let content = text_parts.join("\n");
-            let reasoning_content = thinking_parts.join("\n");
+            let mut reasoning_content = thinking_parts.join("\n");
            let has_text = !content.trim().is_empty();
-            let mut has_tool_calls = !tool_calls.is_empty();
-            let include_reasoning_for_turn = include_reasoning
-                && has_tool_calls
-                && current_turn_start.is_some_and(|start| message_index > start)
-                && !has_later_assistant_text(messages, message_index);
-            let has_reasoning = include_reasoning_for_turn && !reasoning_content.trim().is_empty();
-
-            // DeepSeek thinking-mode tool turns are stateful within the
-            // stateless Chat Completions transcript: if an assistant performed
-            // a tool call in the current user turn, its `reasoning_content`
-            // must be replayed while continuing that tool round. Once a new
-            // user text turn starts, DeepSeek recommends clearing historical
-            // reasoning content so the context is not dominated by old CoT.
-            // Older checkpoints could lose the current-round field because the
-            // UI display stream had no visible text block. Do not forward those
-            // malformed current tool calls; dropping that round is better than
-            // guaranteeing a provider-side 400.
+            let has_tool_calls = !tool_calls.is_empty();
+            // DeepSeek thinking-mode rule: any assistant message that performed
+            // a tool call must keep its `reasoning_content` and replay it in
+            // ALL subsequent requests, including across new user turns. Final
+            // text-only answers may drop reasoning_content (the API ignores
+            // it). If a tool-call round somehow lost its reasoning_content
+            // (e.g. a session checkpoint from before this rule was enforced,
+            // or a sub-turn where the model emitted no reasoning text),
+            // substitute a non-empty placeholder so the API accepts the
+            // request. Dropping tool_calls instead would orphan matching
+            // tool_results and fragment the conversation chain.
+            let include_reasoning_for_turn = include_reasoning && has_tool_calls;
+            let mut has_reasoning =
+                include_reasoning_for_turn && !reasoning_content.trim().is_empty();
            if include_reasoning_for_turn && !has_reasoning {
                logging::warn(
-                    "Dropping DeepSeek tool_calls with missing reasoning_content from assistant message",
+                    "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
                );
-                tool_calls.clear();
-                tool_call_ids.clear();
-                has_tool_calls = false;
+                reasoning_content = String::from("(reasoning omitted)");
+                has_reasoning = true;
            }

            // DeepSeek rejects assistant messages where both `content` and
@@ -1618,33 +1613,6 @@ fn build_chat_messages_with_reasoning(
    out
 }

-fn is_text_user_message(message: &Message) -> bool {
-    message.role == "user"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
-fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
-    messages
-        .iter()
-        .skip(message_index.saturating_add(1))
-        .any(is_text_assistant_message)
-}
-
-fn is_text_assistant_message(message: &Message) -> bool {
-    message.role == "assistant"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn tool_to_chat(tool: &Tool) -> Value {
    let mut value = json!({
        "type": "function",
@@ -2437,7 +2405,7 @@ mod tests {
    }

    #[test]
-    fn chat_messages_clear_prior_tool_round_reasoning_after_new_user_turn() {
+    fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() {
        let messages = vec![
            Message {
                role: "user".to_string(),
@@ -2485,16 +2453,24 @@ mod tests {
            },
        ];
        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
            .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool"),
+            "DeepSeek thinking mode requires reasoning_content to be replayed for tool-call rounds across all subsequent user turns"
+        );
    }

    #[test]
-    fn chat_messages_clear_completed_tool_round_reasoning_after_final_answer() {
+    fn chat_messages_replay_completed_tool_round_reasoning_after_final_answer() {
        let messages = vec![
            Message {
                role: "user".to_string(),
@@ -2535,16 +2511,31 @@ mod tests {
            },
        ];
        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
            .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool")
+        );
+        let final_assistant = out
+            .iter()
+            .rfind(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
+            .expect("final assistant message");
+        assert!(
+            final_assistant.get("reasoning_content").is_none(),
+            "final text answer can drop reasoning_content (API ignores it)"
+        );
    }

    #[test]
-    fn chat_messages_clear_v4_tool_round_reasoning_after_new_user_turn() {
+    fn chat_messages_replay_v4_tool_round_reasoning_after_new_user_turn() {
        let messages = vec![
            Message {
                role: "user".to_string(),
@@ -2593,16 +2584,23 @@ mod tests {
        ];

        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
            .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need a tool for this")
+        );
    }

    #[test]
-    fn chat_messages_drop_v4_tool_round_missing_reasoning() {
+    fn chat_messages_substitute_placeholder_when_v4_tool_round_missing_reasoning() {
        let messages = vec![
            Message {
                role: "user".to_string(),
@@ -2633,15 +2631,24 @@ mod tests {

        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");

+        let assistant = out
+            .iter()
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message should be retained with placeholder");
        assert!(
-            !out.iter()
-                .any(|value| value.get("role").and_then(Value::as_str) == Some("assistant")),
-            "malformed assistant tool round should be removed"
+            assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str)
+                .is_some_and(|value| !value.trim().is_empty()),
+            "missing reasoning_content should be substituted with a non-empty placeholder so the API accepts the request"
        );
        assert!(
-            !out.iter()
+            out.iter()
                .any(|value| value.get("role").and_then(Value::as_str) == Some("tool")),
-            "tool result tied to missing reasoning should be removed"
+            "matching tool_result must remain so the conversation chain stays intact"
        );
    }

@@ -523,30 +523,15 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
 }

 pub fn estimate_tokens(messages: &[Message]) -> usize {
-    // Rough estimate: ~4 chars per token
-    let current_turn_start = messages.iter().rposition(is_text_user_message);
+    // Rough estimate: ~4 chars per token. DeepSeek thinking-mode rule: any
+    // assistant message with tool_calls keeps its reasoning_content forever
+    // (replayed in all subsequent requests). Final text-only answers drop it.
    messages
        .iter()
-        .enumerate()
-        .map(|(index, message)| {
-            let include_thinking = current_turn_start.is_some_and(|start| index > start)
-                && message_has_tool_use(message)
-                && !has_later_assistant_text(messages, index);
-            estimate_tokens_for_message(message, include_thinking)
-        })
+        .map(|message| estimate_tokens_for_message(message, message_has_tool_use(message)))
        .sum()
 }

-fn is_text_user_message(message: &Message) -> bool {
-    message.role == "user"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn message_has_tool_use(message: &Message) -> bool {
    message
        .content
@@ -554,23 +539,6 @@ fn message_has_tool_use(message: &Message) -> bool {
        .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
 }

-fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
-    messages
-        .iter()
-        .skip(message_index.saturating_add(1))
-        .any(is_text_assistant_message)
-}
-
-fn is_text_assistant_message(message: &Message) -> bool {
-    message.role == "assistant"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn estimate_text_tokens_conservative(text: &str) -> usize {
    text.chars().count().div_ceil(3)
 }
@@ -1158,7 +1126,11 @@ mod tests {
    }

    #[test]
-    fn estimate_tokens_counts_current_tool_round_thinking_only() {
+    fn estimate_tokens_counts_tool_round_thinking_across_turns() {
+        // Per DeepSeek thinking-mode rules, any assistant message that
+        // performed a tool call keeps its reasoning_content in the request
+        // forever, including across new user turns. Token estimates must
+        // count those bytes.
        let thinking = "reasoning ".repeat(800);
        let current_messages = vec![
            Message {
@@ -1222,9 +1194,10 @@ mod tests {
            messages
        };

-        assert!(estimate_tokens(&current_messages) > thinking.len() / 5);
-        assert!(estimate_tokens(&completed_messages) < thinking.len() / 8);
-        assert!(estimate_tokens(&historical_messages) < thinking.len() / 8);
+        let lower_bound = thinking.len() / 5;
+        assert!(estimate_tokens(&current_messages) > lower_bound);
+        assert!(estimate_tokens(&completed_messages) > lower_bound);
+        assert!(estimate_tokens(&historical_messages) > lower_bound);
    }

    #[test]
@@ -2888,11 +2888,24 @@ impl Engine {
            // Update turn usage
            turn.add_usage(&usage);

-            // Build content blocks
-            if !current_thinking.is_empty() {
-                content_blocks.push(ContentBlock::Thinking {
-                    thinking: current_thinking.clone(),
-                });
+            // Build content blocks. If this assistant turn produced tool
+            // calls, ensure a Thinking block is present even when the model
+            // didn't stream any reasoning text — DeepSeek's thinking-mode
+            // API requires `reasoning_content` to accompany every tool-call
+            // assistant message in the conversation history. Saving a
+            // placeholder here keeps the on-disk session structurally
+            // correct so subsequent requests won't 400.
+            let needs_thinking_block = !tool_uses.is_empty()
+                || tool_parser::has_tool_call_markers(&current_text_raw);
+            let thinking_to_persist = if !current_thinking.is_empty() {
+                Some(current_thinking.clone())
+            } else if needs_thinking_block {
+                Some(String::from("(reasoning omitted)"))
+            } else {
+                None
+            };
+            if let Some(thinking) = thinking_to_persist {
+                content_blocks.push(ContentBlock::Thinking { thinking });
            }
            let mut final_text = current_text_visible.clone();
            if tool_uses.is_empty() && tool_parser::has_tool_call_markers(&current_text_raw) {
@@ -1,7 +1,7 @@
 {
  "name": "deepseek-tui",
-  "version": "0.4.8",
-  "deepseekBinaryVersion": "0.4.8",
+  "version": "0.4.9",
+  "deepseekBinaryVersion": "0.4.9",
  "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
  "author": "Hmbown",
  "license": "MIT",