From 4e86a0fb8efa9b208a8ec7a62b4325b8b3832ae6 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Mon, 4 May 2026 13:41:30 -0500
Subject: [PATCH] fix(prompts): expand language-mirroring carve-out + pin
 reasoning_content anchor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two small follow-ups to #588's review:

* Gemini-code-assist suggested explicitly listing environment variables,
  command-line flags, and URLs alongside identifiers/tool-names in the
  carve-out clause, since those are exactly the categories an LLM is
  likeliest to "helpfully" translate (e.g. `--verbose` or `DEBUG=true`).
  Adopting verbatim — the additions are non-controversial and the failure
  mode they prevent is real.

* Copilot flagged that the structural test only checked for the `## Language`
  heading. A future edit could keep the heading but silently weaken the
  section to a generic "respond in the user's language" directive,
  dropping the cross-cutting #588 commitment that the model's
  `reasoning_content` field — not just the visible reply — follows the
  user's language. Add a second structural anchor: assert the section
  body mentions `reasoning_content`. This matches the existing rlm test's
  "anchor tokens, not prose" convention (the API field name is the
  feature contract, not a wording choice).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/prompts.rs      | 13 ++++++++++++-
 crates/tui/src/prompts/base.md |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index f8711741..9b2b2ecc 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -608,7 +608,12 @@ mod tests {
 
     /// #588: language-mirroring directive must ship in every mode so
     /// DeepSeek's `reasoning_content` and final reply follow the user's
-    /// language. Structural test — wording is not a test concern.
+    /// language. Structural test — wording is not a test concern, but
+    /// the cross-cutting commitment of #588 is specifically that the
+    /// `reasoning_content` field tracks the user's language (not just
+    /// the visible reply); pin that anchor token so a future edit
+    /// can't silently weaken the section to a generic "respond in the
+    /// user's language" directive while keeping the heading.
     #[test]
     fn language_mirroring_section_present_in_all_modes() {
         for mode in [AppMode::Agent, AppMode::Yolo, AppMode::Plan] {
@@ -617,6 +622,12 @@ mod tests {
                 prompt.contains("## Language"),
                 "## Language section missing from mode {mode:?}"
             );
+            assert!(
+                prompt.contains("reasoning_content"),
+                "## Language section in {mode:?} must mention `reasoning_content` — \
+                 that field name is the structural anchor for the #588 commitment that \
+                 internal reasoning, not just the visible reply, follows the user's language"
+            );
         }
     }
 
diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md
index 2d5f246d..4be7db0e 100644
--- a/crates/tui/src/prompts/base.md
+++ b/crates/tui/src/prompts/base.md
@@ -4,7 +4,7 @@ You are DeepSeek TUI. You're already running inside it — don't try to launch a
 
 Detect the language the user writes in and respond in that same language — including your internal reasoning. If the user writes in Simplified Chinese (简体中文), your `reasoning_content` and final reply must both be in Simplified Chinese. If they switch languages mid-conversation, switch with them. The default when no clear signal is present is English.
 
-Code, file paths, identifiers, tool names, and log lines stay in their original form — translating `read_file` to `读取文件` would break tool calls. Only natural-language prose mirrors the user.
+Code, file paths, identifiers, tool names, environment variables, command-line flags, URLs, and log lines stay in their original form — translating `read_file` to `读取文件` would break tool calls. Only natural-language prose mirrors the user.
 
 ## Preamble Rhythm