diff --git a/CHANGELOG.md b/CHANGELOG.md
index e00dc313..ec2c8872 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.4.9] - 2026-04-27
+
+### Fixed
+- DeepSeek thinking-mode tool-call rounds now always replay `reasoning_content` in all subsequent requests (including across new user turns), matching DeepSeek's documented API contract that assistant messages with tool calls must retain their reasoning content forever.
+- Missing `reasoning_content` on a tool-call assistant message now substitutes a safe placeholder (`"(reasoning omitted)"`) instead of dropping the tool calls and their matching tool results, preventing orphaned conversation chains and API 400 errors.
+- Session checkpoint now persists a Thinking-block placeholder for tool-call turns that produced no streamed reasoning text, keeping on-disk sessions structurally correct so subsequent requests avoid HTTP 400 rejections.
+- Token estimation for compaction now counts thinking tokens across all tool-call rounds (not just the current user turn), aligning with the updated reasoning_content replay rule.
+
 ## [0.4.8] - 2026-04-25
 
 ### Fixed
@@ -490,7 +498,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Hooks system and config profiles
 - Example skills and launch assets
 
-[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...HEAD
+[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.9...HEAD
+[0.4.9]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.8...v0.4.9
+[0.4.8]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...v0.4.8
 [0.3.33]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.32...v0.3.33
 [0.3.32]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.31...v0.3.32
 [0.3.31]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.28...v0.3.31
diff --git a/Cargo.lock b/Cargo.lock
index 0d5f5b4f..80bcc66d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -806,7 +806,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-agent"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "deepseek-config",
  "serde",
@@ -814,7 +814,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-app-server"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "axum",
@@ -837,7 +837,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-config"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "dirs",
@@ -848,7 +848,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-core"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "chrono",
@@ -867,7 +867,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-execpolicy"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "deepseek-protocol",
@@ -876,7 +876,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-hooks"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -890,7 +890,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-mcp"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "deepseek-protocol",
@@ -900,7 +900,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-protocol"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "serde",
  "serde_json",
@@ -908,7 +908,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-state"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "chrono",
@@ -920,7 +920,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tools"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -933,7 +933,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "arboard",
@@ -987,7 +987,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-cli"
-version = "0.4.8"
+version = "0.4.9"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1005,7 +1005,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-core"
-version = "0.4.8"
+version = "0.4.9"
 
 [[package]]
 name = "deranged"
diff --git a/Cargo.toml b/Cargo.toml
index 68921c25..eb90c8ab 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"
 
 [workspace.package]
-version = "0.4.8"
+version = "0.4.9"
 edition = "2024"
 license = "MIT"
 repository = "https://github.com/Hmbown/DeepSeek-TUI"
diff --git a/README.md b/README.md
index 67db4441..b09422c3 100644
--- a/README.md
+++ b/README.md
@@ -1,45 +1,64 @@
 # DeepSeek TUI
 
-`npm i -g deepseek-tui`
+> **A terminal-native coding agent for [DeepSeek V4](https://platform.deepseek.com) models — with 1M-token context, thinking-mode reasoning, and full tool-use.**
 
-A coding agent for [DeepSeek](https://platform.deepseek.com) models that runs in your terminal.
+```bash
+npm i -g deepseek-tui
+```
 
 [![CI](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml/badge.svg)](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml)
 [![crates.io](https://img.shields.io/crates/v/deepseek-tui)](https://crates.io/crates/deepseek-tui)
 [![npm](https://img.shields.io/npm/v/deepseek-tui)](https://www.npmjs.com/package/deepseek-tui)
 
+---
+
+## What is it?
+
+DeepSeek TUI is a coding agent that runs entirely in your terminal. It gives DeepSeek's frontier models direct access to your workspace — reading and editing files, running shell commands, searching the web, managing git, and orchestrating sub-agents — all through a fast, keyboard-driven TUI.
+
+**Built for DeepSeek V4** (`deepseek-v4-pro` / `deepseek-v4-flash`) with 1M-token context windows and native thinking-mode (chain-of-thought) streaming. See the model's reasoning unfold in real time as it works through your tasks.
+
+### Key Features
+
+- 🧠 **Thinking-mode streaming** — watch DeepSeek's chain-of-thought as it reasons about your code
+- 🔧 **Full tool suite** — file ops, shell execution, git, web search/browse, apply-patch, sub-agents, MCP servers, and more
+- 🪟 **1M-token context** — feed entire codebases; automatic intelligent compaction when context fills up
+- 🎛️ **Three interaction modes** — Plan (read-only explore), Agent (interactive with approval), YOLO (auto-approved)
+- ⚡ **Reasoning-effort tiers** — cycle through `off → high → max` with Shift+Tab
+- 🔄 **Session save/resume** — checkpoint and resume long sessions, fork conversations
+- 🌐 **HTTP/SSE runtime API** — `deepseek serve --http` for headless agent workflows
+- 📦 **MCP protocol** — connect to Model Context Protocol servers for extended tooling
+- 💰 **Live cost tracking** — per-turn and session-level token usage and cost estimates
+- 🎨 **Dark & light themes** — with a DeepSeek-blue branded palette
+
+---
+
 ## Quickstart
 
 ```bash
 npm install -g deepseek-tui
-```
-
-Start the TUI:
-
-```bash
 deepseek
 ```
 
-On first launch, it will prompt for your API key if one is not already configured.
-The package also installs `deepseek-tui`; both commands share the same
-`~/.deepseek/config.toml` for DeepSeek auth and default model settings.
-
-You can also set auth ahead of time with either of these:
+On first launch you'll be prompted for your [DeepSeek API key](https://platform.deepseek.com/api_keys). You can also set it ahead of time:
 
 ```bash
+# via CLI
 deepseek login --api-key "YOUR_DEEPSEEK_API_KEY"
-deepseek-tui login --api-key "YOUR_DEEPSEEK_API_KEY"
-DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" deepseek-tui
+
+# via env var
+export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY"
+deepseek
 ```
 
-To use NVIDIA NIM-hosted DeepSeek V4 Pro instead:
+### Using NVIDIA NIM
 
 ```bash
 deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"
 deepseek --provider nvidia-nim
 
-# or for one process:
-DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek
+# or per-process:
+DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="..." deepseek
 ```
 
 <details>
@@ -47,8 +66,8 @@ DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek
 
 ```bash
 # From crates.io (requires Rust 1.85+)
-cargo install deepseek-tui --locked       # TUI
-cargo install deepseek-tui-cli --locked   # deepseek CLI facade
+cargo install deepseek-tui --locked       # TUI binary
+cargo install deepseek-tui-cli --locked   # CLI facade (deepseek command)
 
 # From source
 git clone https://github.com/Hmbown/DeepSeek-TUI.git
@@ -56,99 +75,116 @@ cd DeepSeek-TUI
 cargo install --path crates/tui --locked
 ```
 
-The canonical crates.io packages for this repository are `deepseek-tui` and
-`deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this
-project. crates.io publication can lag the repository workspace version and the
-npm wrapper, so use npm or install from source if you need the newest release
-surface immediately.
+The canonical crates.io packages are `deepseek-tui` and `deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this project. crates.io publication can lag the workspace version — use npm or install from source for the latest release surface immediately.
 
 </details>
 
-## What it does
+---
 
-A terminal coding agent for DeepSeek models with file editing, shell execution, `web.run` browsing, git operations, session resume, and [MCP](https://modelcontextprotocol.io) server integration.
+## Models & Pricing
 
-Three visible modes (**Tab** to cycle):
+DeepSeek TUI targets **DeepSeek V4** models with 1M-token context windows by default.
 
-| Mode | Behavior |
-|------|----------|
-| **Plan** | Review a plan before the agent starts making changes |
-| **Agent** | Default interactive mode with multi-step tool use |
-| **YOLO** | Auto-approve tools in a trusted workspace |
+| Model | Context | Input (cache hit) | Input (cache miss) | Output |
+|---|---|---|---|---|
+| `deepseek-v4-pro` | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
+| `deepseek-v4-flash` | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |
 
-**Shift+Tab** cycles the reasoning-effort tier for DeepSeek thinking mode:
-`off` → `high` → `max`. The current tier is shown as a ⚡ chip in the header.
-Set a default in config with `reasoning_effort = "max"` (or `off` / `low` /
-`medium` / `high`).
+Legacy aliases `deepseek-chat` and `deepseek-reasoner` silently map to `deepseek-v4-flash`.
 
-## Models & pricing
+**NVIDIA NIM** hosted variants (`deepseek-ai/deepseek-v4-pro`, `deepseek-ai/deepseek-v4-flash`) use your NVIDIA account terms — no DeepSeek platform billing.
 
-| Model | Thinking | Context | Input cache hit | Input cache miss | Output |
-|---|---|---|---|---|---|
-| `deepseek-v4-pro` | default | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
-| `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M |
-| `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
-| `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms |
+*\*DeepSeek lists the Pro rates above as a limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI cost estimator falls back to base Pro rates after that timestamp.*
 
-Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for
-`deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published
-by DeepSeek and is subject to change. *DeepSeek lists the Pro rates above as a
-limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI estimator
-falls back to the base Pro rates after that timestamp.
+---
 
 ## Usage
 
 ```bash
 deepseek                                      # interactive TUI
-deepseek "explain this in 2 sentences"        # one-shot prompt
-deepseek --model deepseek-v4-flash "summarize" # one-shot with model override
-deepseek --yolo                               # YOLO mode
-deepseek login --api-key "..."                # save API key to shared config
-deepseek doctor                               # check setup
-deepseek models                               # list live DeepSeek API models
+deepseek "explain this function"              # one-shot prompt
+deepseek --model deepseek-v4-flash "summarize" # model override
+deepseek --yolo                               # YOLO mode (auto-approve tools)
+deepseek login --api-key "..."                # save API key
+deepseek doctor                               # check setup & connectivity
+deepseek models                               # list live API models
 deepseek sessions                             # list saved sessions
-deepseek resume --last                        # resume the latest session
+deepseek resume --last                        # resume latest session
 deepseek serve --http                         # HTTP/SSE API server
 ```
 
-Controls: `F1` help, `Esc` backs out of the current action, `Ctrl+K` command palette.
-In the composer, `@path/to/file` adds local text file or directory context to
-the next message. Use `/attach <path>` for local image/video media references.
+### Keyboard shortcuts
+
+| Key | Action |
+|---|---|
+| `Tab` | Cycle mode: Plan → Agent → YOLO |
+| `Shift+Tab` | Cycle reasoning-effort: off → high → max |
+| `F1` | Help |
+| `Esc` | Back / dismiss |
+| `Ctrl+K` | Command palette |
+| `@path` | Attach file/directory context in composer |
+| `/attach <path>` | Attach image/video media references |
+
+---
+
+## Modes
+
+| Mode | Behavior |
+|---|---|
+| **Plan** 🔍 | Read-only investigation — model explores and proposes a plan before making changes |
+| **Agent** 🤖 | Default interactive mode — multi-step tool use with approval gates |
+| **YOLO** ⚡ | Auto-approve all tools in a trusted workspace (use with caution) |
+
+---
 
 ## Configuration
 
-`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for all options.
+`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for every option.
 
-Key environment overrides: `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`,
-`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`, `DEEPSEEK_PROVIDER`.
-For NVIDIA NIM, use `DEEPSEEK_PROVIDER=nvidia-nim` plus `NVIDIA_API_KEY`
-or `NVIDIA_NIM_API_KEY` (with `DEEPSEEK_API_KEY` as a compatibility fallback);
-the default model is `deepseek-ai/deepseek-v4-pro` and the default base URL is
-`https://integrate.api.nvidia.com/v1`. With `--provider nvidia-nim`,
-`--model deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`.
+Key environment overrides:
 
-Quick checks and scaffolding:
+| Variable | Purpose |
+|---|---|
+| `DEEPSEEK_API_KEY` | API key |
+| `DEEPSEEK_BASE_URL` | API base URL |
+| `DEEPSEEK_MODEL` | Default model |
+| `DEEPSEEK_PROVIDER` | Provider: `deepseek` (default) or `nvidia-nim` |
+| `DEEPSEEK_PROFILE` | Config profile name |
+| `NVIDIA_API_KEY` | NVIDIA NIM API key |
 
-- `deepseek-tui setup --status` — read-only, network-free status of API key,
-  MCP/skills/tools/plugins, sandbox, and `.env`.
-- `deepseek-tui setup --tools --plugins` — scaffold `~/.deepseek/tools/` and
-  `~/.deepseek/plugins/` with self-describing example templates.
-- `deepseek-tui doctor --json` — machine-readable doctor output for CI.
+Quick diagnostics:
 
-The client targets DeepSeek's documented OpenAI-compatible Chat Completions API
-(`/chat/completions`). DeepSeek context caching is automatic; when the API
-returns cache hit/miss token fields, the TUI includes them in usage and cost
-tracking.
+```bash
+deepseek-tui setup --status    # read-only status check (API key, MCP, sandbox, .env)
+deepseek-tui doctor --json     # machine-readable doctor output for CI
+deepseek-tui setup --tools --plugins  # scaffold tools/ and plugins/ directories
+```
 
-Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md).
+DeepSeek context caching is automatic — when the API returns cache hit/miss token fields, the TUI includes them in usage and cost tracking.
 
-## Docs
+Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md)
 
-[docs/](docs/) — configuration, modes, MCP integration, runtime API, and release runbooks.
+---
+
+## Documentation
+
+| Doc | Topic |
+|---|---|
+| [ARCHITECTURE.md](docs/ARCHITECTURE.md) | Codebase internals |
+| [CONFIGURATION.md](docs/CONFIGURATION.md) | Full config reference |
+| [MODES.md](docs/MODES.md) | Plan / Agent / YOLO modes |
+| [MCP.md](docs/MCP.md) | Model Context Protocol integration |
+| [RUNTIME_API.md](docs/RUNTIME_API.md) | HTTP/SSE API server |
+| [RELEASE_RUNBOOK.md](docs/RELEASE_RUNBOOK.md) | Release process |
+| [OPERATIONS_RUNBOOK.md](docs/OPERATIONS_RUNBOOK.md) | Ops & recovery |
+
+---
 
 ## Contributing
 
-See [CONTRIBUTING.md](CONTRIBUTING.md). Not affiliated with DeepSeek Inc.
+See [CONTRIBUTING.md](CONTRIBUTING.md). Pull requests welcome!
+
+*Not affiliated with DeepSeek Inc.*
 
 ## License
 
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index d101732a..edb17e56 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
 
 [dependencies]
-deepseek-config = { path = "../config", version = "0.4.5" }
+deepseek-config = { path = "../config", version = "0.4.9" }
 serde.workspace = true
diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml
index 1b45a3cd..136de0bd 100644
--- a/crates/app-server/Cargo.toml
+++ b/crates/app-server/Cargo.toml
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-core = { path = "../core", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-hooks = { path = "../hooks", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
-deepseek-tools = { path = "../tools", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-core = { path = "../core", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-hooks = { path = "../hooks", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
+deepseek-tools = { path = "../tools", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index f0d79e33..71c766a5 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -14,12 +14,12 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-app-server = { path = "../app-server", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-app-server = { path = "../app-server", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
 chrono.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index 8487fd5e..9fce4bd9 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.4.5" }
-deepseek-config = { path = "../config", version = "0.4.5" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" }
-deepseek-hooks = { path = "../hooks", version = "0.4.5" }
-deepseek-mcp = { path = "../mcp", version = "0.4.5" }
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
-deepseek-state = { path = "../state", version = "0.4.5" }
-deepseek-tools = { path = "../tools", version = "0.4.5" }
+deepseek-agent = { path = "../agent", version = "0.4.9" }
+deepseek-config = { path = "../config", version = "0.4.9" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" }
+deepseek-hooks = { path = "../hooks", version = "0.4.9" }
+deepseek-mcp = { path = "../mcp", version = "0.4.9" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
+deepseek-state = { path = "../state", version = "0.4.9" }
+deepseek-tools = { path = "../tools", version = "0.4.9" }
 serde_json.workspace = true
 tokio.workspace = true
 uuid.workspace = true
diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml
index e53032ed..99732f7e 100644
--- a/crates/execpolicy/Cargo.toml
+++ b/crates/execpolicy/Cargo.toml
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
 
 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml
index b319d863..8b5b2469 100644
--- a/crates/hooks/Cargo.toml
+++ b/crates/hooks/Cargo.toml
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml
index 1fff24b8..9271cac9 100644
--- a/crates/mcp/Cargo.toml
+++ b/crates/mcp/Cargo.toml
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo
 
 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml
index 8b2648bf..73e75328 100644
--- a/crates/tools/Cargo.toml
+++ b/crates/tools/Cargo.toml
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.4.5" }
+deepseek-protocol = { path = "../protocol", version = "0.4.9" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index d273aa06..97389ea6 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -1351,7 +1351,6 @@ fn build_chat_messages_with_reasoning(
 ) -> Vec<Value> {
     let mut out = Vec::new();
     let mut pending_tool_calls: HashSet<String> = HashSet::new();
-    let current_turn_start = messages.iter().rposition(is_text_user_message);
 
     if let Some(instructions) = system_to_instructions(system.cloned())
         && !instructions.trim().is_empty()
@@ -1362,7 +1361,7 @@ fn build_chat_messages_with_reasoning(
         }));
     }
 
-    for (message_index, message) in messages.iter().enumerate() {
+    for message in messages.iter() {
         let role = message.role.as_str();
         let mut text_parts = Vec::new();
         let mut thinking_parts = Vec::new();
@@ -1421,32 +1420,28 @@ fn build_chat_messages_with_reasoning(
 
         if role == "assistant" {
             let content = text_parts.join("\n");
-            let reasoning_content = thinking_parts.join("\n");
+            let mut reasoning_content = thinking_parts.join("\n");
             let has_text = !content.trim().is_empty();
-            let mut has_tool_calls = !tool_calls.is_empty();
-            let include_reasoning_for_turn = include_reasoning
-                && has_tool_calls
-                && current_turn_start.is_some_and(|start| message_index > start)
-                && !has_later_assistant_text(messages, message_index);
-            let has_reasoning = include_reasoning_for_turn && !reasoning_content.trim().is_empty();
-
-            // DeepSeek thinking-mode tool turns are stateful within the
-            // stateless Chat Completions transcript: if an assistant performed
-            // a tool call in the current user turn, its `reasoning_content`
-            // must be replayed while continuing that tool round. Once a new
-            // user text turn starts, DeepSeek recommends clearing historical
-            // reasoning content so the context is not dominated by old CoT.
-            // Older checkpoints could lose the current-round field because the
-            // UI display stream had no visible text block. Do not forward those
-            // malformed current tool calls; dropping that round is better than
-            // guaranteeing a provider-side 400.
+            let has_tool_calls = !tool_calls.is_empty();
+            // DeepSeek thinking-mode rule: any assistant message that performed
+            // a tool call must keep its `reasoning_content` and replay it in
+            // ALL subsequent requests, including across new user turns. Final
+            // text-only answers may drop reasoning_content (the API ignores
+            // it). If a tool-call round somehow lost its reasoning_content
+            // (e.g. a session checkpoint from before this rule was enforced,
+            // or a sub-turn where the model emitted no reasoning text),
+            // substitute a non-empty placeholder so the API accepts the
+            // request. Dropping tool_calls instead would orphan matching
+            // tool_results and fragment the conversation chain.
+            let include_reasoning_for_turn = include_reasoning && has_tool_calls;
+            let mut has_reasoning =
+                include_reasoning_for_turn && !reasoning_content.trim().is_empty();
             if include_reasoning_for_turn && !has_reasoning {
                 logging::warn(
-                    "Dropping DeepSeek tool_calls with missing reasoning_content from assistant message",
+                    "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
                 );
-                tool_calls.clear();
-                tool_call_ids.clear();
-                has_tool_calls = false;
+                reasoning_content = String::from("(reasoning omitted)");
+                has_reasoning = true;
             }
 
             // DeepSeek rejects assistant messages where both `content` and
@@ -1618,33 +1613,6 @@ fn build_chat_messages_with_reasoning(
     out
 }
 
-fn is_text_user_message(message: &Message) -> bool {
-    message.role == "user"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
-fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
-    messages
-        .iter()
-        .skip(message_index.saturating_add(1))
-        .any(is_text_assistant_message)
-}
-
-fn is_text_assistant_message(message: &Message) -> bool {
-    message.role == "assistant"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn tool_to_chat(tool: &Tool) -> Value {
     let mut value = json!({
         "type": "function",
@@ -2437,7 +2405,7 @@ mod tests {
     }
 
     #[test]
-    fn chat_messages_clear_prior_tool_round_reasoning_after_new_user_turn() {
+    fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() {
         let messages = vec![
             Message {
                 role: "user".to_string(),
@@ -2485,16 +2453,24 @@ mod tests {
             },
         ];
         let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
             .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool"),
+            "DeepSeek thinking mode requires reasoning_content to be replayed for tool-call rounds across all subsequent user turns"
+        );
     }
 
     #[test]
-    fn chat_messages_clear_completed_tool_round_reasoning_after_final_answer() {
+    fn chat_messages_replay_completed_tool_round_reasoning_after_final_answer() {
         let messages = vec![
             Message {
                 role: "user".to_string(),
@@ -2535,16 +2511,31 @@ mod tests {
             },
         ];
         let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
             .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool")
+        );
+        let final_assistant = out
+            .iter()
+            .rfind(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
+            .expect("final assistant message");
+        assert!(
+            final_assistant.get("reasoning_content").is_none(),
+            "final text answer can drop reasoning_content (API ignores it)"
+        );
     }
 
     #[test]
-    fn chat_messages_clear_v4_tool_round_reasoning_after_new_user_turn() {
+    fn chat_messages_replay_v4_tool_round_reasoning_after_new_user_turn() {
         let messages = vec![
             Message {
                 role: "user".to_string(),
@@ -2593,16 +2584,23 @@ mod tests {
         ];
 
         let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
-        let assistant = out
+        let tool_assistant = out
             .iter()
-            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
-            .expect("assistant message");
-        assert!(assistant.get("tool_calls").is_some());
-        assert!(assistant.get("reasoning_content").is_none());
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need a tool for this")
+        );
     }
 
     #[test]
-    fn chat_messages_drop_v4_tool_round_missing_reasoning() {
+    fn chat_messages_substitute_placeholder_when_v4_tool_round_missing_reasoning() {
         let messages = vec![
             Message {
                 role: "user".to_string(),
@@ -2633,15 +2631,24 @@ mod tests {
 
         let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
 
+        let assistant = out
+            .iter()
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message should be retained with placeholder");
         assert!(
-            !out.iter()
-                .any(|value| value.get("role").and_then(Value::as_str) == Some("assistant")),
-            "malformed assistant tool round should be removed"
+            assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str)
+                .is_some_and(|value| !value.trim().is_empty()),
+            "missing reasoning_content should be substituted with a non-empty placeholder so the API accepts the request"
         );
         assert!(
-            !out.iter()
+            out.iter()
                 .any(|value| value.get("role").and_then(Value::as_str) == Some("tool")),
-            "tool result tied to missing reasoning should be removed"
+            "matching tool_result must remain so the conversation chain stays intact"
         );
     }
 
diff --git a/crates/tui/src/compaction.rs b/crates/tui/src/compaction.rs
index 1a5e45b0..f85b69ba 100644
--- a/crates/tui/src/compaction.rs
+++ b/crates/tui/src/compaction.rs
@@ -523,30 +523,15 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
 }
 
 pub fn estimate_tokens(messages: &[Message]) -> usize {
-    // Rough estimate: ~4 chars per token
-    let current_turn_start = messages.iter().rposition(is_text_user_message);
+    // Rough estimate: ~4 chars per token. DeepSeek thinking-mode rule: any
+    // assistant message with tool_calls keeps its reasoning_content forever
+    // (replayed in all subsequent requests). Final text-only answers drop it.
     messages
         .iter()
-        .enumerate()
-        .map(|(index, message)| {
-            let include_thinking = current_turn_start.is_some_and(|start| index > start)
-                && message_has_tool_use(message)
-                && !has_later_assistant_text(messages, index);
-            estimate_tokens_for_message(message, include_thinking)
-        })
+        .map(|message| estimate_tokens_for_message(message, message_has_tool_use(message)))
         .sum()
 }
 
-fn is_text_user_message(message: &Message) -> bool {
-    message.role == "user"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn message_has_tool_use(message: &Message) -> bool {
     message
         .content
@@ -554,23 +539,6 @@ fn message_has_tool_use(message: &Message) -> bool {
         .any(|block| matches!(block, ContentBlock::ToolUse { .. }))
 }
 
-fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool {
-    messages
-        .iter()
-        .skip(message_index.saturating_add(1))
-        .any(is_text_assistant_message)
-}
-
-fn is_text_assistant_message(message: &Message) -> bool {
-    message.role == "assistant"
-        && message.content.iter().any(|block| {
-            matches!(
-                block,
-                ContentBlock::Text { text, .. } if !text.trim().is_empty()
-            )
-        })
-}
-
 fn estimate_text_tokens_conservative(text: &str) -> usize {
     text.chars().count().div_ceil(3)
 }
@@ -1158,7 +1126,11 @@ mod tests {
     }
 
     #[test]
-    fn estimate_tokens_counts_current_tool_round_thinking_only() {
+    fn estimate_tokens_counts_tool_round_thinking_across_turns() {
+        // Per DeepSeek thinking-mode rules, any assistant message that
+        // performed a tool call keeps its reasoning_content in the request
+        // forever, including across new user turns. Token estimates must
+        // count those bytes.
         let thinking = "reasoning ".repeat(800);
         let current_messages = vec![
             Message {
@@ -1222,9 +1194,10 @@ mod tests {
             messages
         };
 
-        assert!(estimate_tokens(&current_messages) > thinking.len() / 5);
-        assert!(estimate_tokens(&completed_messages) < thinking.len() / 8);
-        assert!(estimate_tokens(&historical_messages) < thinking.len() / 8);
+        let lower_bound = thinking.len() / 5;
+        assert!(estimate_tokens(&current_messages) > lower_bound);
+        assert!(estimate_tokens(&completed_messages) > lower_bound);
+        assert!(estimate_tokens(&historical_messages) > lower_bound);
     }
 
     #[test]
diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 27d0aee0..77746337 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -2888,11 +2888,24 @@ impl Engine {
             // Update turn usage
             turn.add_usage(&usage);
 
-            // Build content blocks
-            if !current_thinking.is_empty() {
-                content_blocks.push(ContentBlock::Thinking {
-                    thinking: current_thinking.clone(),
-                });
+            // Build content blocks. If this assistant turn produced tool
+            // calls, ensure a Thinking block is present even when the model
+            // didn't stream any reasoning text — DeepSeek's thinking-mode
+            // API requires `reasoning_content` to accompany every tool-call
+            // assistant message in the conversation history. Saving a
+            // placeholder here keeps the on-disk session structurally
+            // correct so subsequent requests won't 400.
+            let needs_thinking_block = !tool_uses.is_empty()
+                || tool_parser::has_tool_call_markers(&current_text_raw);
+            let thinking_to_persist = if !current_thinking.is_empty() {
+                Some(current_thinking.clone())
+            } else if needs_thinking_block {
+                Some(String::from("(reasoning omitted)"))
+            } else {
+                None
+            };
+            if let Some(thinking) = thinking_to_persist {
+                content_blocks.push(ContentBlock::Thinking { thinking });
             }
             let mut final_text = current_text_visible.clone();
             if tool_uses.is_empty() && tool_parser::has_tool_call_markers(&current_text_raw) {
diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json
index 7a6a09a4..e29dabe7 100644
--- a/npm/deepseek-tui/package.json
+++ b/npm/deepseek-tui/package.json
@@ -1,7 +1,7 @@
 {
   "name": "deepseek-tui",
-  "version": "0.4.8",
-  "deepseekBinaryVersion": "0.4.8",
+  "version": "0.4.9",
+  "deepseekBinaryVersion": "0.4.9",
   "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
   "author": "Hmbown",
   "license": "MIT",