diff --git a/CHANGELOG.md b/CHANGELOG.md index e00dc313..ec2c8872 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.9] - 2026-04-27 + +### Fixed +- DeepSeek thinking-mode tool-call rounds now always replay `reasoning_content` in all subsequent requests (including across new user turns), matching DeepSeek's documented API contract that assistant messages with tool calls must retain their reasoning content forever. +- Missing `reasoning_content` on a tool-call assistant message now substitutes a safe placeholder (`"(reasoning omitted)"`) instead of dropping the tool calls and their matching tool results, preventing orphaned conversation chains and API 400 errors. +- Session checkpoint now persists a Thinking-block placeholder for tool-call turns that produced no streamed reasoning text, keeping on-disk sessions structurally correct so subsequent requests avoid HTTP 400 rejections. +- Token estimation for compaction now counts thinking tokens across all tool-call rounds (not just the current user turn), aligning with the updated reasoning_content replay rule. + ## [0.4.8] - 2026-04-25 ### Fixed @@ -490,7 +498,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Hooks system and config profiles - Example skills and launch assets -[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...HEAD +[Unreleased]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.9...HEAD +[0.4.9]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.4.8...v0.4.9 +[0.4.8]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.33...v0.4.8 [0.3.33]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.32...v0.3.33 [0.3.32]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.31...v0.3.32 [0.3.31]: https://github.com/Hmbown/DeepSeek-TUI/compare/v0.3.28...v0.3.31 diff --git a/Cargo.lock b/Cargo.lock index 0d5f5b4f..80bcc66d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -806,7 +806,7 @@ dependencies = [ [[package]] name = "deepseek-agent" -version = "0.4.8" +version = "0.4.9" dependencies = [ "deepseek-config", "serde", @@ -814,7 +814,7 @@ dependencies = [ [[package]] name = "deepseek-app-server" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "axum", @@ -837,7 +837,7 @@ dependencies = [ [[package]] name = "deepseek-config" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "dirs", @@ -848,7 +848,7 @@ dependencies = [ [[package]] name = "deepseek-core" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "chrono", @@ -867,7 +867,7 @@ dependencies = [ [[package]] name = "deepseek-execpolicy" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "deepseek-protocol", @@ -876,7 +876,7 @@ dependencies = [ [[package]] name = "deepseek-hooks" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "async-trait", @@ -890,7 +890,7 @@ dependencies = [ [[package]] name = "deepseek-mcp" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "deepseek-protocol", @@ -900,7 +900,7 @@ dependencies = [ [[package]] name = "deepseek-protocol" -version = "0.4.8" +version = "0.4.9" dependencies = [ "serde", "serde_json", @@ -908,7 +908,7 @@ dependencies = [ [[package]] name = "deepseek-state" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "chrono", @@ -920,7 +920,7 @@ dependencies = [ [[package]] name = "deepseek-tools" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "async-trait", @@ -933,7 +933,7 @@ dependencies = [ [[package]] name = "deepseek-tui" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "arboard", @@ -987,7 +987,7 @@ dependencies = [ [[package]] name = "deepseek-tui-cli" -version = "0.4.8" +version = "0.4.9" dependencies = [ "anyhow", "chrono", @@ -1005,7 +1005,7 @@ dependencies = [ [[package]] name = "deepseek-tui-core" -version = "0.4.8" +version = "0.4.9" [[package]] name = "deranged" diff --git a/Cargo.toml b/Cargo.toml index 68921c25..eb90c8ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.4.8" +version = "0.4.9" edition = "2024" license = "MIT" repository = "https://github.com/Hmbown/DeepSeek-TUI" diff --git a/README.md b/README.md index 67db4441..b09422c3 100644 --- a/README.md +++ b/README.md @@ -1,45 +1,64 @@ # DeepSeek TUI -`npm i -g deepseek-tui` +> **A terminal-native coding agent for [DeepSeek V4](https://platform.deepseek.com) models — with 1M-token context, thinking-mode reasoning, and full tool-use.** -A coding agent for [DeepSeek](https://platform.deepseek.com) models that runs in your terminal. +```bash +npm i -g deepseek-tui +``` [![CI](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml/badge.svg)](https://github.com/Hmbown/DeepSeek-TUI/actions/workflows/ci.yml) [![crates.io](https://img.shields.io/crates/v/deepseek-tui)](https://crates.io/crates/deepseek-tui) [![npm](https://img.shields.io/npm/v/deepseek-tui)](https://www.npmjs.com/package/deepseek-tui) +--- + +## What is it? + +DeepSeek TUI is a coding agent that runs entirely in your terminal. It gives DeepSeek's frontier models direct access to your workspace — reading and editing files, running shell commands, searching the web, managing git, and orchestrating sub-agents — all through a fast, keyboard-driven TUI. + +**Built for DeepSeek V4** (`deepseek-v4-pro` / `deepseek-v4-flash`) with 1M-token context windows and native thinking-mode (chain-of-thought) streaming. See the model's reasoning unfold in real time as it works through your tasks. + +### Key Features + +- 🧠 **Thinking-mode streaming** — watch DeepSeek's chain-of-thought as it reasons about your code +- 🔧 **Full tool suite** — file ops, shell execution, git, web search/browse, apply-patch, sub-agents, MCP servers, and more +- 🪟 **1M-token context** — feed entire codebases; automatic intelligent compaction when context fills up +- 🎛️ **Three interaction modes** — Plan (read-only explore), Agent (interactive with approval), YOLO (auto-approved) +- ⚡ **Reasoning-effort tiers** — cycle through `off → high → max` with Shift+Tab +- 🔄 **Session save/resume** — checkpoint and resume long sessions, fork conversations +- 🌐 **HTTP/SSE runtime API** — `deepseek serve --http` for headless agent workflows +- 📦 **MCP protocol** — connect to Model Context Protocol servers for extended tooling +- 💰 **Live cost tracking** — per-turn and session-level token usage and cost estimates +- 🎨 **Dark & light themes** — with a DeepSeek-blue branded palette + +--- + ## Quickstart ```bash npm install -g deepseek-tui -``` - -Start the TUI: - -```bash deepseek ``` -On first launch, it will prompt for your API key if one is not already configured. -The package also installs `deepseek-tui`; both commands share the same -`~/.deepseek/config.toml` for DeepSeek auth and default model settings. - -You can also set auth ahead of time with either of these: +On first launch you'll be prompted for your [DeepSeek API key](https://platform.deepseek.com/api_keys). You can also set it ahead of time: ```bash +# via CLI deepseek login --api-key "YOUR_DEEPSEEK_API_KEY" -deepseek-tui login --api-key "YOUR_DEEPSEEK_API_KEY" -DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" deepseek-tui + +# via env var +export DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" +deepseek ``` -To use NVIDIA NIM-hosted DeepSeek V4 Pro instead: +### Using NVIDIA NIM ```bash deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY" deepseek --provider nvidia-nim -# or for one process: -DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek +# or per-process: +DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="..." deepseek ```
@@ -47,8 +66,8 @@ DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek ```bash # From crates.io (requires Rust 1.85+) -cargo install deepseek-tui --locked # TUI -cargo install deepseek-tui-cli --locked # deepseek CLI facade +cargo install deepseek-tui --locked # TUI binary +cargo install deepseek-tui-cli --locked # CLI facade (deepseek command) # From source git clone https://github.com/Hmbown/DeepSeek-TUI.git @@ -56,99 +75,116 @@ cd DeepSeek-TUI cargo install --path crates/tui --locked ``` -The canonical crates.io packages for this repository are `deepseek-tui` and -`deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this -project. crates.io publication can lag the repository workspace version and the -npm wrapper, so use npm or install from source if you need the newest release -surface immediately. +The canonical crates.io packages are `deepseek-tui` and `deepseek-tui-cli`. The unrelated `deepseek-cli` crate is not part of this project. crates.io publication can lag the workspace version — use npm or install from source for the latest release surface immediately.
-## What it does +--- -A terminal coding agent for DeepSeek models with file editing, shell execution, `web.run` browsing, git operations, session resume, and [MCP](https://modelcontextprotocol.io) server integration. +## Models & Pricing -Three visible modes (**Tab** to cycle): +DeepSeek TUI targets **DeepSeek V4** models with 1M-token context windows by default. -| Mode | Behavior | -|------|----------| -| **Plan** | Review a plan before the agent starts making changes | -| **Agent** | Default interactive mode with multi-step tool use | -| **YOLO** | Auto-approve tools in a trusted workspace | +| Model | Context | Input (cache hit) | Input (cache miss) | Output | +|---|---|---|---|---| +| `deepseek-v4-pro` | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* | +| `deepseek-v4-flash` | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M | -**Shift+Tab** cycles the reasoning-effort tier for DeepSeek thinking mode: -`off` → `high` → `max`. The current tier is shown as a ⚡ chip in the header. -Set a default in config with `reasoning_effort = "max"` (or `off` / `low` / -`medium` / `high`). +Legacy aliases `deepseek-chat` and `deepseek-reasoner` silently map to `deepseek-v4-flash`. -## Models & pricing +**NVIDIA NIM** hosted variants (`deepseek-ai/deepseek-v4-pro`, `deepseek-ai/deepseek-v4-flash`) use your NVIDIA account terms — no DeepSeek platform billing. -| Model | Thinking | Context | Input cache hit | Input cache miss | Output | -|---|---|---|---|---|---| -| `deepseek-v4-pro` | default | 1M | $0.03625 / 1M* | $0.435 / 1M* | $0.87 / 1M* | -| `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M | -| `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | -| `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | +*\*DeepSeek lists the Pro rates above as a limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI cost estimator falls back to base Pro rates after that timestamp.* -Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for -`deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published -by DeepSeek and is subject to change. *DeepSeek lists the Pro rates above as a -limited-time 75% discount valid until 2026-05-05 15:59 UTC; the TUI estimator -falls back to the base Pro rates after that timestamp. +--- ## Usage ```bash deepseek # interactive TUI -deepseek "explain this in 2 sentences" # one-shot prompt -deepseek --model deepseek-v4-flash "summarize" # one-shot with model override -deepseek --yolo # YOLO mode -deepseek login --api-key "..." # save API key to shared config -deepseek doctor # check setup -deepseek models # list live DeepSeek API models +deepseek "explain this function" # one-shot prompt +deepseek --model deepseek-v4-flash "summarize" # model override +deepseek --yolo # YOLO mode (auto-approve tools) +deepseek login --api-key "..." # save API key +deepseek doctor # check setup & connectivity +deepseek models # list live API models deepseek sessions # list saved sessions -deepseek resume --last # resume the latest session +deepseek resume --last # resume latest session deepseek serve --http # HTTP/SSE API server ``` -Controls: `F1` help, `Esc` backs out of the current action, `Ctrl+K` command palette. -In the composer, `@path/to/file` adds local text file or directory context to -the next message. Use `/attach ` for local image/video media references. +### Keyboard shortcuts + +| Key | Action | +|---|---| +| `Tab` | Cycle mode: Plan → Agent → YOLO | +| `Shift+Tab` | Cycle reasoning-effort: off → high → max | +| `F1` | Help | +| `Esc` | Back / dismiss | +| `Ctrl+K` | Command palette | +| `@path` | Attach file/directory context in composer | +| `/attach ` | Attach image/video media references | + +--- + +## Modes + +| Mode | Behavior | +|---|---| +| **Plan** 🔍 | Read-only investigation — model explores and proposes a plan before making changes | +| **Agent** 🤖 | Default interactive mode — multi-step tool use with approval gates | +| **YOLO** ⚡ | Auto-approve all tools in a trusted workspace (use with caution) | + +--- ## Configuration -`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for all options. +`~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for every option. -Key environment overrides: `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`, -`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`, `DEEPSEEK_PROVIDER`. -For NVIDIA NIM, use `DEEPSEEK_PROVIDER=nvidia-nim` plus `NVIDIA_API_KEY` -or `NVIDIA_NIM_API_KEY` (with `DEEPSEEK_API_KEY` as a compatibility fallback); -the default model is `deepseek-ai/deepseek-v4-pro` and the default base URL is -`https://integrate.api.nvidia.com/v1`. With `--provider nvidia-nim`, -`--model deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`. +Key environment overrides: -Quick checks and scaffolding: +| Variable | Purpose | +|---|---| +| `DEEPSEEK_API_KEY` | API key | +| `DEEPSEEK_BASE_URL` | API base URL | +| `DEEPSEEK_MODEL` | Default model | +| `DEEPSEEK_PROVIDER` | Provider: `deepseek` (default) or `nvidia-nim` | +| `DEEPSEEK_PROFILE` | Config profile name | +| `NVIDIA_API_KEY` | NVIDIA NIM API key | -- `deepseek-tui setup --status` — read-only, network-free status of API key, - MCP/skills/tools/plugins, sandbox, and `.env`. -- `deepseek-tui setup --tools --plugins` — scaffold `~/.deepseek/tools/` and - `~/.deepseek/plugins/` with self-describing example templates. -- `deepseek-tui doctor --json` — machine-readable doctor output for CI. +Quick diagnostics: -The client targets DeepSeek's documented OpenAI-compatible Chat Completions API -(`/chat/completions`). DeepSeek context caching is automatic; when the API -returns cache hit/miss token fields, the TUI includes them in usage and cost -tracking. +```bash +deepseek-tui setup --status # read-only status check (API key, MCP, sandbox, .env) +deepseek-tui doctor --json # machine-readable doctor output for CI +deepseek-tui setup --tools --plugins # scaffold tools/ and plugins/ directories +``` -Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md). +DeepSeek context caching is automatic — when the API returns cache hit/miss token fields, the TUI includes them in usage and cost tracking. -## Docs +Full reference: [docs/CONFIGURATION.md](docs/CONFIGURATION.md) -[docs/](docs/) — configuration, modes, MCP integration, runtime API, and release runbooks. +--- + +## Documentation + +| Doc | Topic | +|---|---| +| [ARCHITECTURE.md](docs/ARCHITECTURE.md) | Codebase internals | +| [CONFIGURATION.md](docs/CONFIGURATION.md) | Full config reference | +| [MODES.md](docs/MODES.md) | Plan / Agent / YOLO modes | +| [MCP.md](docs/MCP.md) | Model Context Protocol integration | +| [RUNTIME_API.md](docs/RUNTIME_API.md) | HTTP/SSE API server | +| [RELEASE_RUNBOOK.md](docs/RELEASE_RUNBOOK.md) | Release process | +| [OPERATIONS_RUNBOOK.md](docs/OPERATIONS_RUNBOOK.md) | Ops & recovery | + +--- ## Contributing -See [CONTRIBUTING.md](CONTRIBUTING.md). Not affiliated with DeepSeek Inc. +See [CONTRIBUTING.md](CONTRIBUTING.md). Pull requests welcome! + +*Not affiliated with DeepSeek Inc.* ## License diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index d101732a..edb17e56 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture" [dependencies] -deepseek-config = { path = "../config", version = "0.4.5" } +deepseek-config = { path = "../config", version = "0.4.9" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index 1b45a3cd..136de0bd 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect anyhow.workspace = true axum.workspace = true clap.workspace = true -deepseek-agent = { path = "../agent", version = "0.4.5" } -deepseek-config = { path = "../config", version = "0.4.5" } -deepseek-core = { path = "../core", version = "0.4.5" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" } -deepseek-hooks = { path = "../hooks", version = "0.4.5" } -deepseek-mcp = { path = "../mcp", version = "0.4.5" } -deepseek-protocol = { path = "../protocol", version = "0.4.5" } -deepseek-state = { path = "../state", version = "0.4.5" } -deepseek-tools = { path = "../tools", version = "0.4.5" } +deepseek-agent = { path = "../agent", version = "0.4.9" } +deepseek-config = { path = "../config", version = "0.4.9" } +deepseek-core = { path = "../core", version = "0.4.9" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" } +deepseek-hooks = { path = "../hooks", version = "0.4.9" } +deepseek-mcp = { path = "../mcp", version = "0.4.9" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } +deepseek-state = { path = "../state", version = "0.4.9" } +deepseek-tools = { path = "../tools", version = "0.4.9" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index f0d79e33..71c766a5 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -14,12 +14,12 @@ path = "src/main.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -deepseek-agent = { path = "../agent", version = "0.4.5" } -deepseek-app-server = { path = "../app-server", version = "0.4.5" } -deepseek-config = { path = "../config", version = "0.4.5" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" } -deepseek-mcp = { path = "../mcp", version = "0.4.5" } -deepseek-state = { path = "../state", version = "0.4.5" } +deepseek-agent = { path = "../agent", version = "0.4.9" } +deepseek-app-server = { path = "../app-server", version = "0.4.9" } +deepseek-config = { path = "../config", version = "0.4.9" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" } +deepseek-mcp = { path = "../mcp", version = "0.4.9" } +deepseek-state = { path = "../state", version = "0.4.9" } chrono.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 8487fd5e..9fce4bd9 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture" [dependencies] anyhow.workspace = true chrono.workspace = true -deepseek-agent = { path = "../agent", version = "0.4.5" } -deepseek-config = { path = "../config", version = "0.4.5" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.4.5" } -deepseek-hooks = { path = "../hooks", version = "0.4.5" } -deepseek-mcp = { path = "../mcp", version = "0.4.5" } -deepseek-protocol = { path = "../protocol", version = "0.4.5" } -deepseek-state = { path = "../state", version = "0.4.5" } -deepseek-tools = { path = "../tools", version = "0.4.5" } +deepseek-agent = { path = "../agent", version = "0.4.9" } +deepseek-config = { path = "../config", version = "0.4.9" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.4.9" } +deepseek-hooks = { path = "../hooks", version = "0.4.9" } +deepseek-mcp = { path = "../mcp", version = "0.4.9" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } +deepseek-state = { path = "../state", version = "0.4.9" } +deepseek-tools = { path = "../tools", version = "0.4.9" } serde_json.workspace = true tokio.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index e53032ed..99732f7e 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.4.5" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index b319d863..8b5b2469 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.4.5" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml index 1fff24b8..9271cac9 100644 --- a/crates/mcp/Cargo.toml +++ b/crates/mcp/Cargo.toml @@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.4.5" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } serde.workspace = true serde_json.workspace = true diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index 8b2648bf..73e75328 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.4.5" } +deepseek-protocol = { path = "../protocol", version = "0.4.9" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index d273aa06..97389ea6 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -1351,7 +1351,6 @@ fn build_chat_messages_with_reasoning( ) -> Vec { let mut out = Vec::new(); let mut pending_tool_calls: HashSet = HashSet::new(); - let current_turn_start = messages.iter().rposition(is_text_user_message); if let Some(instructions) = system_to_instructions(system.cloned()) && !instructions.trim().is_empty() @@ -1362,7 +1361,7 @@ fn build_chat_messages_with_reasoning( })); } - for (message_index, message) in messages.iter().enumerate() { + for message in messages.iter() { let role = message.role.as_str(); let mut text_parts = Vec::new(); let mut thinking_parts = Vec::new(); @@ -1421,32 +1420,28 @@ fn build_chat_messages_with_reasoning( if role == "assistant" { let content = text_parts.join("\n"); - let reasoning_content = thinking_parts.join("\n"); + let mut reasoning_content = thinking_parts.join("\n"); let has_text = !content.trim().is_empty(); - let mut has_tool_calls = !tool_calls.is_empty(); - let include_reasoning_for_turn = include_reasoning - && has_tool_calls - && current_turn_start.is_some_and(|start| message_index > start) - && !has_later_assistant_text(messages, message_index); - let has_reasoning = include_reasoning_for_turn && !reasoning_content.trim().is_empty(); - - // DeepSeek thinking-mode tool turns are stateful within the - // stateless Chat Completions transcript: if an assistant performed - // a tool call in the current user turn, its `reasoning_content` - // must be replayed while continuing that tool round. Once a new - // user text turn starts, DeepSeek recommends clearing historical - // reasoning content so the context is not dominated by old CoT. - // Older checkpoints could lose the current-round field because the - // UI display stream had no visible text block. Do not forward those - // malformed current tool calls; dropping that round is better than - // guaranteeing a provider-side 400. + let has_tool_calls = !tool_calls.is_empty(); + // DeepSeek thinking-mode rule: any assistant message that performed + // a tool call must keep its `reasoning_content` and replay it in + // ALL subsequent requests, including across new user turns. Final + // text-only answers may drop reasoning_content (the API ignores + // it). If a tool-call round somehow lost its reasoning_content + // (e.g. a session checkpoint from before this rule was enforced, + // or a sub-turn where the model emitted no reasoning text), + // substitute a non-empty placeholder so the API accepts the + // request. Dropping tool_calls instead would orphan matching + // tool_results and fragment the conversation chain. + let include_reasoning_for_turn = include_reasoning && has_tool_calls; + let mut has_reasoning = + include_reasoning_for_turn && !reasoning_content.trim().is_empty(); if include_reasoning_for_turn && !has_reasoning { logging::warn( - "Dropping DeepSeek tool_calls with missing reasoning_content from assistant message", + "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message", ); - tool_calls.clear(); - tool_call_ids.clear(); - has_tool_calls = false; + reasoning_content = String::from("(reasoning omitted)"); + has_reasoning = true; } // DeepSeek rejects assistant messages where both `content` and @@ -1618,33 +1613,6 @@ fn build_chat_messages_with_reasoning( out } -fn is_text_user_message(message: &Message) -> bool { - message.role == "user" - && message.content.iter().any(|block| { - matches!( - block, - ContentBlock::Text { text, .. } if !text.trim().is_empty() - ) - }) -} - -fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool { - messages - .iter() - .skip(message_index.saturating_add(1)) - .any(is_text_assistant_message) -} - -fn is_text_assistant_message(message: &Message) -> bool { - message.role == "assistant" - && message.content.iter().any(|block| { - matches!( - block, - ContentBlock::Text { text, .. } if !text.trim().is_empty() - ) - }) -} - fn tool_to_chat(tool: &Tool) -> Value { let mut value = json!({ "type": "function", @@ -2437,7 +2405,7 @@ mod tests { } #[test] - fn chat_messages_clear_prior_tool_round_reasoning_after_new_user_turn() { + fn chat_messages_replay_prior_tool_round_reasoning_after_new_user_turn() { let messages = vec![ Message { role: "user".to_string(), @@ -2485,16 +2453,24 @@ mod tests { }, ]; let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); - let assistant = out + let tool_assistant = out .iter() - .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant")) - .expect("assistant message"); - assert!(assistant.get("tool_calls").is_some()); - assert!(assistant.get("reasoning_content").is_none()); + .find(|value| { + value.get("role").and_then(Value::as_str) == Some("assistant") + && value.get("tool_calls").is_some() + }) + .expect("tool-call assistant message"); + assert_eq!( + tool_assistant + .get("reasoning_content") + .and_then(Value::as_str), + Some("Need to call a tool"), + "DeepSeek thinking mode requires reasoning_content to be replayed for tool-call rounds across all subsequent user turns" + ); } #[test] - fn chat_messages_clear_completed_tool_round_reasoning_after_final_answer() { + fn chat_messages_replay_completed_tool_round_reasoning_after_final_answer() { let messages = vec![ Message { role: "user".to_string(), @@ -2535,16 +2511,31 @@ mod tests { }, ]; let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); - let assistant = out + let tool_assistant = out .iter() - .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant")) - .expect("assistant message"); - assert!(assistant.get("tool_calls").is_some()); - assert!(assistant.get("reasoning_content").is_none()); + .find(|value| { + value.get("role").and_then(Value::as_str) == Some("assistant") + && value.get("tool_calls").is_some() + }) + .expect("tool-call assistant message"); + assert_eq!( + tool_assistant + .get("reasoning_content") + .and_then(Value::as_str), + Some("Need to call a tool") + ); + let final_assistant = out + .iter() + .rfind(|value| value.get("role").and_then(Value::as_str) == Some("assistant")) + .expect("final assistant message"); + assert!( + final_assistant.get("reasoning_content").is_none(), + "final text answer can drop reasoning_content (API ignores it)" + ); } #[test] - fn chat_messages_clear_v4_tool_round_reasoning_after_new_user_turn() { + fn chat_messages_replay_v4_tool_round_reasoning_after_new_user_turn() { let messages = vec![ Message { role: "user".to_string(), @@ -2593,16 +2584,23 @@ mod tests { ]; let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); - let assistant = out + let tool_assistant = out .iter() - .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant")) - .expect("assistant message"); - assert!(assistant.get("tool_calls").is_some()); - assert!(assistant.get("reasoning_content").is_none()); + .find(|value| { + value.get("role").and_then(Value::as_str) == Some("assistant") + && value.get("tool_calls").is_some() + }) + .expect("tool-call assistant message"); + assert_eq!( + tool_assistant + .get("reasoning_content") + .and_then(Value::as_str), + Some("Need a tool for this") + ); } #[test] - fn chat_messages_drop_v4_tool_round_missing_reasoning() { + fn chat_messages_substitute_placeholder_when_v4_tool_round_missing_reasoning() { let messages = vec![ Message { role: "user".to_string(), @@ -2633,15 +2631,24 @@ mod tests { let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); + let assistant = out + .iter() + .find(|value| { + value.get("role").and_then(Value::as_str) == Some("assistant") + && value.get("tool_calls").is_some() + }) + .expect("tool-call assistant message should be retained with placeholder"); assert!( - !out.iter() - .any(|value| value.get("role").and_then(Value::as_str) == Some("assistant")), - "malformed assistant tool round should be removed" + assistant + .get("reasoning_content") + .and_then(Value::as_str) + .is_some_and(|value| !value.trim().is_empty()), + "missing reasoning_content should be substituted with a non-empty placeholder so the API accepts the request" ); assert!( - !out.iter() + out.iter() .any(|value| value.get("role").and_then(Value::as_str) == Some("tool")), - "tool result tied to missing reasoning should be removed" + "matching tool_result must remain so the conversation chain stays intact" ); } diff --git a/crates/tui/src/compaction.rs b/crates/tui/src/compaction.rs index 1a5e45b0..f85b69ba 100644 --- a/crates/tui/src/compaction.rs +++ b/crates/tui/src/compaction.rs @@ -523,30 +523,15 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi } pub fn estimate_tokens(messages: &[Message]) -> usize { - // Rough estimate: ~4 chars per token - let current_turn_start = messages.iter().rposition(is_text_user_message); + // Rough estimate: ~4 chars per token. DeepSeek thinking-mode rule: any + // assistant message with tool_calls keeps its reasoning_content forever + // (replayed in all subsequent requests). Final text-only answers drop it. messages .iter() - .enumerate() - .map(|(index, message)| { - let include_thinking = current_turn_start.is_some_and(|start| index > start) - && message_has_tool_use(message) - && !has_later_assistant_text(messages, index); - estimate_tokens_for_message(message, include_thinking) - }) + .map(|message| estimate_tokens_for_message(message, message_has_tool_use(message))) .sum() } -fn is_text_user_message(message: &Message) -> bool { - message.role == "user" - && message.content.iter().any(|block| { - matches!( - block, - ContentBlock::Text { text, .. } if !text.trim().is_empty() - ) - }) -} - fn message_has_tool_use(message: &Message) -> bool { message .content @@ -554,23 +539,6 @@ fn message_has_tool_use(message: &Message) -> bool { .any(|block| matches!(block, ContentBlock::ToolUse { .. })) } -fn has_later_assistant_text(messages: &[Message], message_index: usize) -> bool { - messages - .iter() - .skip(message_index.saturating_add(1)) - .any(is_text_assistant_message) -} - -fn is_text_assistant_message(message: &Message) -> bool { - message.role == "assistant" - && message.content.iter().any(|block| { - matches!( - block, - ContentBlock::Text { text, .. } if !text.trim().is_empty() - ) - }) -} - fn estimate_text_tokens_conservative(text: &str) -> usize { text.chars().count().div_ceil(3) } @@ -1158,7 +1126,11 @@ mod tests { } #[test] - fn estimate_tokens_counts_current_tool_round_thinking_only() { + fn estimate_tokens_counts_tool_round_thinking_across_turns() { + // Per DeepSeek thinking-mode rules, any assistant message that + // performed a tool call keeps its reasoning_content in the request + // forever, including across new user turns. Token estimates must + // count those bytes. let thinking = "reasoning ".repeat(800); let current_messages = vec![ Message { @@ -1222,9 +1194,10 @@ mod tests { messages }; - assert!(estimate_tokens(¤t_messages) > thinking.len() / 5); - assert!(estimate_tokens(&completed_messages) < thinking.len() / 8); - assert!(estimate_tokens(&historical_messages) < thinking.len() / 8); + let lower_bound = thinking.len() / 5; + assert!(estimate_tokens(¤t_messages) > lower_bound); + assert!(estimate_tokens(&completed_messages) > lower_bound); + assert!(estimate_tokens(&historical_messages) > lower_bound); } #[test] diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 27d0aee0..77746337 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2888,11 +2888,24 @@ impl Engine { // Update turn usage turn.add_usage(&usage); - // Build content blocks - if !current_thinking.is_empty() { - content_blocks.push(ContentBlock::Thinking { - thinking: current_thinking.clone(), - }); + // Build content blocks. If this assistant turn produced tool + // calls, ensure a Thinking block is present even when the model + // didn't stream any reasoning text — DeepSeek's thinking-mode + // API requires `reasoning_content` to accompany every tool-call + // assistant message in the conversation history. Saving a + // placeholder here keeps the on-disk session structurally + // correct so subsequent requests won't 400. + let needs_thinking_block = !tool_uses.is_empty() + || tool_parser::has_tool_call_markers(¤t_text_raw); + let thinking_to_persist = if !current_thinking.is_empty() { + Some(current_thinking.clone()) + } else if needs_thinking_block { + Some(String::from("(reasoning omitted)")) + } else { + None + }; + if let Some(thinking) = thinking_to_persist { + content_blocks.push(ContentBlock::Thinking { thinking }); } let mut final_text = current_text_visible.clone(); if tool_uses.is_empty() && tool_parser::has_tool_call_markers(¤t_text_raw) { diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json index 7a6a09a4..e29dabe7 100644 --- a/npm/deepseek-tui/package.json +++ b/npm/deepseek-tui/package.json @@ -1,7 +1,7 @@ { "name": "deepseek-tui", - "version": "0.4.8", - "deepseekBinaryVersion": "0.4.8", + "version": "0.4.9", + "deepseekBinaryVersion": "0.4.9", "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.", "author": "Hmbown", "license": "MIT",