feat: DeepSeek V4 support with reasoning-effort control (0.4.0)

Adds first-class DeepSeek V4 Pro and Flash support, updates the default model to deepseek-v4-pro, aligns legacy aliases with the current V4 1M context behavior, and fixes thinking-mode request handling. Key fixes: - Send DeepSeek's raw Chat Completions `thinking` parameter at the top level instead of SDK-only `extra_body`. - Preserve assistant `reasoning_content` for all prior thinking-mode tool-call turns so subsequent requests satisfy DeepSeek V4's replay requirement. - Fix npm wrapper concurrent first-run downloads by using per-process temporary download paths. - Add `.mailmap` so historical bot-attributed commits aggregate under Hunter Bown where mailmap is honored. Verified with the full local Rust gate, live DeepSeek V4 smoke, npm wrapper temp-install smoke, and green PR CI across Linux, macOS, and Windows.
2026-04-23 22:53:20 -05:00
parent dc8e94d705
commit b7bd02d814
53 changed files with 1695 additions and 299 deletions
@@ -16,11 +16,28 @@ api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
 # ─────────────────────────────────────────────────────────────────────────────────
 base_url = "https://api.deepseek.com"
 # base_url = "https://api.deepseeki.com"         # China users
+# base_url = "https://api.deepseek.com/beta"     # DeepSeek beta features such as strict tool mode

 # ─────────────────────────────────────────────────────────────────────────────────
 # Default Models
 # ─────────────────────────────────────────────────────────────────────────────────
-default_text_model = "deepseek-reasoner" # common IDs: deepseek-reasoner, deepseek-chat (any deepseek-* ID is valid)
+# DeepSeek V4 family:
+#   deepseek-v4-pro    — flagship reasoning model
+#   deepseek-v4-flash  — fast, cost-efficient (legacy aliases: deepseek-chat, deepseek-reasoner)
+default_text_model = "deepseek-v4-pro"
+
+# ─────────────────────────────────────────────────────────────────────────────────
+# Thinking Mode (DeepSeek V4 reasoning effort)
+# ─────────────────────────────────────────────────────────────────────────────────
+# "off"    — disables chain-of-thought (thinking.type = disabled)
+# "low"    — compat-maps to "high" server-side
+# "medium" — compat-maps to "high" server-side
+# "high"   — reasoning_effort = high (DeepSeek default)
+# "max"    — reasoning_effort = max (deepest reasoning)
+#
+# Shift+Tab in the TUI cycles between off / high / max. The header shows the
+# current tier as a ⚡ chip.
+reasoning_effort = "max"

 # ─────────────────────────────────────────────────────────────────────────────────
 # Paths
@@ -81,7 +98,7 @@ exponential_base = 2.0
 # token_threshold = 50000          # Trigger compaction above this token estimate
 # message_threshold = 50           # Or above this message count
 # model = "deepseek-chat"           # Model to use for summarization
-# cache_summary = true             # Cache the summary block
+# cache_summary = true             # Keep summary blocks stable; DeepSeek context caching is automatic

 # ─────────────────────────────────────────────────────────────────────────────────
 # Capacity Controller (runtime pressure guardrails)