Merge remote-tracking branch 'origin/pr/1384' into work/v0.8.34

2026-05-13 00:13:30 -05:00
parent 6873784b22 b023d54c5d
commit bad58784e4
2 changed files with 76 additions and 22 deletions
@@ -754,28 +754,6 @@ pub fn build_system_prompt(base: &str, project_context: Option<&ProjectContext>)
    SystemPrompt::Text(full_prompt)
 }

-// ── Legacy functions for backwards compatibility ──────────────────────
-
-pub fn base_system_prompt() -> SystemPrompt {
-    SystemPrompt::Text(BASE_PROMPT.trim().to_string())
-}
-
-pub fn normal_system_prompt() -> SystemPrompt {
-    system_prompt_for_mode(AppMode::Agent)
-}
-
-pub fn agent_system_prompt() -> SystemPrompt {
-    system_prompt_for_mode(AppMode::Agent)
-}
-
-pub fn yolo_system_prompt() -> SystemPrompt {
-    system_prompt_for_mode(AppMode::Yolo)
-}
-
-pub fn plan_system_prompt() -> SystemPrompt {
-    system_prompt_for_mode(AppMode::Plan)
-}
-
 #[cfg(test)]
 mod tests {
    // Don't assert on prose. If you wouldn't fail a code review for
@@ -787,6 +765,44 @@ mod tests {
    /// agent prompt's own discussion of the convention).
    const HANDOFF_BLOCK_MARKER: &str = "left a relay artifact at `.deepseek/handoff.md`";

+    #[test]
+    fn base_prompt_carries_execution_discipline_block() {
+        // The XML-tagged execution-discipline block is the contract —
+        // verify each section name is present so reviewers can't quietly
+        // strip the rules that herd V4 toward acting instead of narrating.
+        for tag in [
+            "<tool_persistence>",
+            "<mandatory_tool_use>",
+            "<act_dont_ask>",
+            "<verification>",
+            "<missing_context>",
+        ] {
+            assert!(
+                BASE_PROMPT.contains(tag),
+                "BASE_PROMPT missing required tag {tag}"
+            );
+        }
+        assert!(
+            BASE_PROMPT.contains("Tool-use enforcement"),
+            "BASE_PROMPT missing the tool-use enforcement clause"
+        );
+    }
+
+    #[test]
+    fn execution_discipline_is_at_the_end_for_cache_stability() {
+        // DeepSeek's prefix cache keys on a leading byte-stable run, so
+        // the new sections must be appended, not interleaved earlier.
+        let body = BASE_PROMPT;
+        let persistence_at = body
+            .find("<tool_persistence>")
+            .expect("tool_persistence anchor present");
+        let language_at = body.find("## Language").expect("Language anchor present");
+        assert!(
+            language_at < persistence_at,
+            "execution-discipline block must come after the early sections"
+        );
+    }
+
    #[test]
    fn render_environment_block_lists_supplied_locale_and_workspace() {
        let tmp = tempdir().expect("tempdir");
@@ -226,3 +226,41 @@ You're rendering into a terminal, not a browser. Markdown tables almost never re
 - **Definition-style lists** (`- **Label**: value`) when the user asked for a comparison or summary.

 If you genuinely need column-aligned data (e.g. the user asked for a table or for `/cost` style output), keep columns narrow, ASCII-only, and limit to 2–3 columns. Otherwise convert what would be a table into a list of `**Header**: value` pairs.
+
+## Execution discipline
+
+<tool_persistence>
+- Use tools whenever they improve correctness, completeness, or grounding.
+- Do not stop early when another tool call would materially improve the result.
+- If a tool returns empty or partial results, retry with a different query or strategy before giving up.
+- Keep calling tools until: (1) the task is complete, AND (2) you have verified the result.
+</tool_persistence>
+
+<mandatory_tool_use>
+NEVER answer these from memory or mental computation — ALWAYS use a tool:
+- Arithmetic, math, calculations → `exec_shell` (e.g. `python -c '…'`)
+- Hashes, encodings, checksums → `exec_shell` (e.g. `sha256sum`, `base64`)
+- Current time, date, timezone → `exec_shell` (e.g. `date`)
+- System state: OS, CPU, memory, disk, ports, processes → `exec_shell`
+- File contents, sizes, line counts → `read_file` or `grep_files`
+- Symbol or pattern search across the workspace → `grep_files`
+- Filename search → `file_search`
+</mandatory_tool_use>
+
+<act_dont_ask>
+When a question has an obvious default interpretation, act on it immediately instead of asking for clarification. Save clarification for genuinely ambiguous requests.
+</act_dont_ask>
+
+<verification>
+After making changes, verify them: read back the file you wrote, run the test you fixed, fetch the URL you posted to. Don't claim success on faith.
+</verification>
+
+<missing_context>
+If you need context (a file you haven't read, a variable's current value, an external URL), name the gap and fetch it before proceeding.
+</missing_context>
+
+## Tool-use enforcement
+
+You MUST use your tools to take action — do not describe what you would do or plan to do without actually doing it. When you say you will perform an action ("I will run the tests", "Let me check the file", "I will create the project"), you MUST immediately make the corresponding tool call in the same response. Never end your turn with a promise of future action — execute it now.
+
+Every response should either (a) contain tool calls that make progress, or (b) deliver a final result to the user. Responses that only describe intentions without acting are not acceptable.