feat: DeepSeek V4 support with reasoning-effort control (0.4.0)

Adds first-class DeepSeek V4 Pro and Flash support, updates the default model to deepseek-v4-pro, aligns legacy aliases with the current V4 1M context behavior, and fixes thinking-mode request handling.

Key fixes:
- Send DeepSeek's raw Chat Completions `thinking` parameter at the top level instead of SDK-only `extra_body`.
- Preserve assistant `reasoning_content` for all prior thinking-mode tool-call turns so subsequent requests satisfy DeepSeek V4's replay requirement.
- Fix npm wrapper concurrent first-run downloads by using per-process temporary download paths.
- Add `.mailmap` so historical bot-attributed commits aggregate under Hunter Bown where mailmap is honored.

Verified with the full local Rust gate, live DeepSeek V4 smoke, npm wrapper temp-install smoke, and green PR CI across Linux, macOS, and Windows.
This commit is contained in:
Hunter Bown
2026-04-23 22:53:20 -05:00
parent dc8e94d705
commit b7bd02d814
53 changed files with 1695 additions and 299 deletions
+19 -2
View File
@@ -16,11 +16,28 @@ api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
# ─────────────────────────────────────────────────────────────────────────────────
base_url = "https://api.deepseek.com"
# base_url = "https://api.deepseeki.com" # China users
# base_url = "https://api.deepseek.com/beta" # DeepSeek beta features such as strict tool mode
# ─────────────────────────────────────────────────────────────────────────────────
# Default Models
# ─────────────────────────────────────────────────────────────────────────────────
default_text_model = "deepseek-reasoner" # common IDs: deepseek-reasoner, deepseek-chat (any deepseek-* ID is valid)
# DeepSeek V4 family:
# deepseek-v4-pro — flagship reasoning model
# deepseek-v4-flash — fast, cost-efficient (legacy aliases: deepseek-chat, deepseek-reasoner)
default_text_model = "deepseek-v4-pro"
# ─────────────────────────────────────────────────────────────────────────────────
# Thinking Mode (DeepSeek V4 reasoning effort)
# ─────────────────────────────────────────────────────────────────────────────────
# "off" — disables chain-of-thought (thinking.type = disabled)
# "low" — compat-maps to "high" server-side
# "medium" — compat-maps to "high" server-side
# "high" — reasoning_effort = high (DeepSeek default)
# "max" — reasoning_effort = max (deepest reasoning)
#
# Shift+Tab in the TUI cycles between off / high / max. The header shows the
# current tier as a ⚡ chip.
reasoning_effort = "max"
# ─────────────────────────────────────────────────────────────────────────────────
# Paths
@@ -81,7 +98,7 @@ exponential_base = 2.0
# token_threshold = 50000 # Trigger compaction above this token estimate
# message_threshold = 50 # Or above this message count
# model = "deepseek-chat" # Model to use for summarization
# cache_summary = true # Cache the summary block
# cache_summary = true # Keep summary blocks stable; DeepSeek context caching is automatic
# ─────────────────────────────────────────────────────────────────────────────────
# Capacity Controller (runtime pressure guardrails)