From 1f9f860a3e8a8009dc05cbcc2bc5e5b17b9c8af0 Mon Sep 17 00:00:00 2001
From: LeoAlex0 <31839998+LeoAlex0@users.noreply.github.com>
Date: Thu, 4 Jun 2026 02:52:27 +0800
Subject: [PATCH] feat(cache): project mode prompts per request

Keep the stable system prompt mode-agnostic and project mode, approval policy, and tool taxonomy as request-time runtime metadata. This avoids mutating stored history while preserving provider chat-template compatibility.

Harvested from PR #2687 with stewardship turn-metadata cache tests preserved. The replan replay guard remains <= 2, and cache inspect now asserts tool-result budget metadata for both deduplicated=false and deduplicated=true.

(cherry picked from commit 77943304e637545b441ac135f06977065c4b350f)
---
 CHANGELOG.md                                |   4 +
 crates/tui/CHANGELOG.md                     |   4 +
 crates/tui/src/commands/debug.rs            |  23 +-
 crates/tui/src/core/engine.rs               | 138 +++++++--
 crates/tui/src/core/engine/capacity_flow.rs |  18 +-
 crates/tui/src/core/engine/tests.rs         | 307 +++++++++++---------
 crates/tui/src/core/engine/tool_catalog.rs  |  19 +-
 crates/tui/src/core/engine/turn_loop.rs     |  18 +-
 crates/tui/src/core/ops.rs                  |   2 +-
 crates/tui/src/core/session.rs              |   4 +-
 crates/tui/src/prompts.rs                   | 147 ++++------
 11 files changed, 398 insertions(+), 286 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 929cdbd5..201e648b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Appended volatile `<turn_meta>` blocks after user text in outgoing user
   message content arrays so provider prefix caches can keep matching the stable
   user-input prefix across date, route, and working-set changes.
+- Projected mode, approval, and tool-taxonomy prompt metadata per request
+  instead of mutating stored system prompts, keeping provider prefix-cache
+  inputs byte-stable while preserving mode-specific instructions (#2687).
+  Thanks @LeoAlex0 for the implementation.
 - Softened contribution intake automation: external issues now receive a warm
   triage note and are never auto-closed by the contribution gate, while the PR
   gate copy makes clear that dry-run observations are about maintainer safety,
diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md
index 929cdbd5..201e648b 100644
--- a/crates/tui/CHANGELOG.md
+++ b/crates/tui/CHANGELOG.md
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Appended volatile `<turn_meta>` blocks after user text in outgoing user
   message content arrays so provider prefix caches can keep matching the stable
   user-input prefix across date, route, and working-set changes.
+- Projected mode, approval, and tool-taxonomy prompt metadata per request
+  instead of mutating stored system prompts, keeping provider prefix-cache
+  inputs byte-stable while preserving mode-specific instructions (#2687).
+  Thanks @LeoAlex0 for the implementation.
 - Softened contribution intake automation: external issues now receive a warm
   triage note and are never auto-closed by the contribution gate, while the PR
   gate copy makes clear that dry-run observations are about maintainer safety,
diff --git a/crates/tui/src/commands/debug.rs b/crates/tui/src/commands/debug.rs
index eee41bb5..13c18ecb 100644
--- a/crates/tui/src/commands/debug.rs
+++ b/crates/tui/src/commands/debug.rs
@@ -1225,10 +1225,25 @@ mod tests {
         let result = cache(&mut app, Some("inspect"));
         let msg = result.message.expect("inspect output");
 
-        assert!(msg.contains("original_chars=14000"), "got: {msg}");
-        assert!(msg.contains("truncated=true"), "got: {msg}");
-        assert!(msg.contains("deduplicated=false"), "got: {msg}");
-        assert!(msg.contains("deduplicated=true"), "got: {msg}");
+        let tool_budget_lines: Vec<_> = msg
+            .lines()
+            .filter(|line| line.contains("original_chars=14000"))
+            .collect();
+        assert_eq!(tool_budget_lines.len(), 2, "got: {msg}");
+
+        let first_sighting = tool_budget_lines
+            .iter()
+            .find(|line| line.contains("deduplicated=false"))
+            .expect("first tool-result sighting should report non-dedup metadata");
+        assert!(first_sighting.contains("sent_chars="), "got: {msg}");
+        assert!(first_sighting.contains("truncated=true"), "got: {msg}");
+
+        let repeat_sighting = tool_budget_lines
+            .iter()
+            .find(|line| line.contains("deduplicated=true"))
+            .expect("repeat tool-result sighting should report dedup metadata");
+        assert!(repeat_sighting.contains("sent_chars="), "got: {msg}");
+        assert!(repeat_sighting.contains("truncated=false"), "got: {msg}");
     }
 
     #[test]
diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index ef92332a..96af3169 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -687,7 +687,6 @@ impl Engine {
                     show_thinking: config.show_thinking,
                     allow_shell: config.allow_shell,
                 },
-                session.approval_mode,
             );
         let stable_prompt = Some(system_prompt);
         session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
@@ -853,11 +852,12 @@ impl Engine {
         self.session.trust_mode = trust_mode;
         self.config.trust_mode = trust_mode;
         self.session.auto_approve = auto_approve;
-        self.session.approval_mode = if auto_approve {
-            crate::tui::approval::ApprovalMode::Auto
-        } else {
-            approval_mode
-        };
+        let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
+        // Only track the Agent-mode approval — Yolo/Plan have fixed
+        // approval policies that are derived from the mode itself.
+        if mode == AppMode::Agent {
+            self.session.approval_mode = agent_approval_mode;
+        }
 
         let _ = self
             .tx_event
@@ -1236,7 +1236,6 @@ impl Engine {
                 Op::ChangeMode { mode } => {
                     let previous_mode = self.current_mode;
                     self.current_mode = mode;
-                    self.refresh_system_prompt(mode);
                     self.emit_session_updated().await;
                     // Notify the agent that the mode has changed so it can re-evaluate
                     // any operations that were blocked by the previous mode's policy.
@@ -1253,11 +1252,11 @@ impl Engine {
                         )))
                         .await;
                 }
-                Op::SetModel { model, mode } => {
+                Op::SetModel { model, mode: _ } => {
                     self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
                     self.session.model = model;
                     self.config.model.clone_from(&self.session.model);
-                    self.refresh_system_prompt(mode);
+                    self.refresh_system_prompt();
                     self.emit_session_updated().await;
                     let _ = self
                         .tx_event
@@ -1304,6 +1303,10 @@ impl Engine {
                     self.session.compaction_summary_prompt =
                         extract_compaction_summary_prompt(system_prompt.clone());
                     self.session.system_prompt = system_prompt;
+                    self.session.last_system_prompt_hash =
+                        Some(system_prompt_hash(self.session.system_prompt.as_ref()));
+                    // Host-supplied prompts are persisted prefixes. Keep them
+                    // byte-stable; mode/runtime state is projected per request.
                     self.session.system_prompt_override =
                         system_prompt_override && self.session.system_prompt.is_some();
                     self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
@@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\
         }
     }
 
+    fn runtime_prompt_message(&self) -> Message {
+        let mode = self.current_mode;
+        let approval_mode = approval_mode_for(mode, self.session.approval_mode);
+        Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: runtime_prompt_text(mode, approval_mode),
+                cache_control: None,
+            }],
+        }
+    }
+
     fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
         self.user_text_message_with_turn_metadata_for_route(
             text,
@@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\
             .observe_user_message(&content, &self.session.workspace);
         let force_update_plan_first = should_force_update_plan_first(mode, &content);
 
+        let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
+        self.session.auto_approve = auto_approve;
+        // Only track the Agent-mode approval — Yolo/Plan have fixed
+        // approval policies that are derived from the mode itself.
+        if mode == AppMode::Agent {
+            self.session.approval_mode = agent_approval_mode;
+        }
+
         // Add user message to session
         let user_msg = self.user_text_message_with_turn_metadata_for_route(
             content,
@@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\
         self.config.trust_mode = trust_mode;
         self.config.translation_enabled = translation_enabled;
         self.config.show_thinking = show_thinking;
-        self.session.auto_approve = auto_approve;
-        self.session.approval_mode = if auto_approve {
-            crate::tui::approval::ApprovalMode::Auto
-        } else {
-            approval_mode
-        };
 
-        // Update system prompt to match current mode and include persisted compaction context.
-        self.refresh_system_prompt(mode);
+        // Refresh stable prompt context. Current mode is carried by the
+        // request-time runtime prompt projection.
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         // Build tool registry and tool list for the current mode
@@ -2430,8 +2448,8 @@ In {new} mode: {policy}\n\n\
             )))
             .await;
     }
-    /// Refresh the system prompt based on current mode and context.
-    fn refresh_system_prompt(&mut self, mode: AppMode) {
+    /// Refresh the stable system prompt based on current non-mode context.
+    fn refresh_system_prompt(&mut self) {
         let user_memory_block =
             crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
         let prompt_goal_objective = goal_objective_for_prompt(
@@ -2439,7 +2457,7 @@ In {new} mode: {policy}\n\n\
             &self.config.goal_state,
         );
         let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval(
-            mode,
+            AppMode::Agent,
             &self.config.workspace,
             None,
             Some(&self.config.skills_dir),
@@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\
                 show_thinking: self.config.show_thinking,
                 allow_shell: self.session.allow_shell,
             },
-            self.session.approval_mode,
         );
         let mut stable_prompt =
             merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
@@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\
 
         let stable_hash = system_prompt_hash(stable_prompt.as_ref());
         if self.session.system_prompt_override {
-            self.session.last_system_prompt_hash = Some(stable_hash);
             return;
         }
         if self.session.last_system_prompt_hash != Some(stable_hash) {
@@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt(
     normalized_goal_objective(configured_goal)
 }
 
+// ── Mode & approval prompts as request-time runtime metadata ─────────
+//
+// Mode contracts and approval policies are not persisted in the session
+// history and are not sent as extra system messages. Instead, each API
+// request projects a transient user-role runtime metadata message at the
+// tail. The stable system prompt remains byte-stable, stored history remains
+// byte-stable, and strict chat-template providers never see a system message
+// outside messages[0].
+
+fn approval_mode_for(
+    mode: AppMode,
+    session_approval: crate::tui::approval::ApprovalMode,
+) -> crate::tui::approval::ApprovalMode {
+    match mode {
+        AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto,
+        AppMode::Plan => crate::tui::approval::ApprovalMode::Never,
+        AppMode::Agent => session_approval,
+    }
+}
+
+fn agent_approval_mode_for_turn(
+    auto_approve: bool,
+    approval_mode: crate::tui::approval::ApprovalMode,
+) -> crate::tui::approval::ApprovalMode {
+    if auto_approve {
+        crate::tui::approval::ApprovalMode::Auto
+    } else {
+        approval_mode
+    }
+}
+
+fn mode_prompt_marker(mode: AppMode) -> String {
+    format!(
+        "<mode_prompt mode=\"{}\">",
+        match mode {
+            AppMode::Agent => "agent",
+            AppMode::Plan => "plan",
+            AppMode::Yolo => "yolo",
+        }
+    )
+}
+
+fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
+    format!(
+        "<approval_policy policy=\"{}\">",
+        match approval_mode {
+            crate::tui::approval::ApprovalMode::Auto => "auto",
+            crate::tui::approval::ApprovalMode::Suggest => "suggest",
+            crate::tui::approval::ApprovalMode::Never => "never",
+        }
+    )
+}
+
+fn mode_prompt_text(mode: AppMode) -> &'static str {
+    match mode {
+        AppMode::Agent => prompts::AGENT_MODE,
+        AppMode::Plan => prompts::PLAN_MODE,
+        AppMode::Yolo => prompts::YOLO_MODE,
+    }
+}
+
+fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
+    let marker = mode_prompt_marker(mode);
+    let mode_text = mode_prompt_text(mode).trim();
+    let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
+    let approval_marker = approval_prompt_marker(approval_mode);
+    let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
+    format!(
+        "<runtime_prompt visibility=\"internal\">\n\
+This is runtime control metadata for the current request, not user input. \
+Apply it to the next assistant response and tool calls. It supersedes any \
+earlier mode or approval metadata in the transcript.\n\n\
+{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
+{approval_marker}\n{approval_text}\n</approval_policy>\n\
+</runtime_prompt>"
+    )
+}
+
 /// Spawn the engine in a background task
 pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
     let (engine, handle) = Engine::new(config, api_config);
diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs
index 50cdeef4..514ad120 100644
--- a/crates/tui/src/core/engine/capacity_flow.rs
+++ b/crates/tui/src/core/engine/capacity_flow.rs
@@ -36,7 +36,7 @@ impl Engine {
     pub(super) async fn run_capacity_post_tool_checkpoint(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
+
         tool_registry: Option<&crate::tools::ToolRegistry>,
         tool_exec_lock: Arc<RwLock<()>>,
         mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
@@ -56,7 +56,6 @@ impl Engine {
                 let _ = self
                     .apply_verify_with_tool_replay(
                         turn,
-                        mode,
                         snapshot.as_ref(),
                         tool_registry,
                         tool_exec_lock,
@@ -66,7 +65,7 @@ impl Engine {
                 false
             }
             GuardrailAction::VerifyAndReplan => {
-                self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool")
+                self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool")
                     .await
             }
             GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false,
@@ -76,7 +75,7 @@ impl Engine {
     pub(super) async fn run_capacity_error_escalation_checkpoint(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
+
         step_error_count: usize,
         consecutive_tool_error_steps: u32,
         error_categories: &[ErrorCategory],
@@ -136,7 +135,6 @@ impl Engine {
         let category_labels: Vec<String> = error_categories.iter().map(|c| c.to_string()).collect();
         self.apply_verify_and_replan(
             turn,
-            mode,
             Some(&forced),
             &format!(
                 "error_escalation: step_errors={}, consecutive_steps={}, categories={}",
@@ -385,7 +383,7 @@ impl Engine {
         &mut self,
         turn: &TurnContext,
         client: Option<&DeepSeekClient>,
-        mode: AppMode,
+        _mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
     ) -> bool {
         let before_tokens = self.estimated_input_tokens();
@@ -465,7 +463,7 @@ impl Engine {
             GuardrailAction::TargetedContextRefresh,
             None,
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let after_tokens = self.estimated_input_tokens();
@@ -487,7 +485,6 @@ impl Engine {
     pub(super) async fn apply_verify_with_tool_replay(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
         tool_registry: Option<&crate::tools::ToolRegistry>,
         tool_exec_lock: Arc<RwLock<()>>,
@@ -617,7 +614,7 @@ impl Engine {
             GuardrailAction::VerifyWithToolReplay,
             Some(&verification_note),
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let after_tokens = self.estimated_input_tokens();
@@ -638,7 +635,6 @@ impl Engine {
     pub(super) async fn apply_verify_and_replan(
         &mut self,
         turn: &TurnContext,
-        mode: AppMode,
         snapshot: Option<&CapacitySnapshot>,
         reason: &str,
     ) -> bool {
@@ -685,7 +681,7 @@ impl Engine {
             GuardrailAction::VerifyAndReplan,
             Some("Replan now from canonical state. Keep steps minimal and verifiable."),
         )));
-        self.refresh_system_prompt(mode);
+        self.refresh_system_prompt();
         self.emit_session_updated().await;
 
         let _ = self
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 186ba2d0..7369c17c 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -303,7 +303,7 @@ fn refresh_system_prompt_uses_runtime_goal_state() {
         goal.create("Close the runtime goal loop".to_string(), None);
     }
 
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
     let prompt = match engine.session.system_prompt {
         Some(SystemPrompt::Text(text)) => text,
         Some(SystemPrompt::Blocks(blocks)) => blocks
@@ -505,116 +505,36 @@ fn tool_exec_outcome_tracks_duration() {
 #[test]
 fn core_native_tools_stay_loaded_in_yolo_mode() {
     let always_load = HashSet::new();
-    assert!(!should_default_defer_tool(
-        "exec_shell",
-        AppMode::Yolo,
-        &always_load
-    ));
+    assert!(!should_default_defer_tool("exec_shell", &always_load));
     // git_blame remains deferred (read-only git history beyond log/show/diff).
-    assert!(should_default_defer_tool(
-        "git_blame",
-        AppMode::Yolo,
-        &always_load
-    ));
+    assert!(should_default_defer_tool("git_blame", &always_load));
 }
 
 #[test]
 fn non_yolo_mode_retains_default_defer_policy() {
     let always_load = HashSet::new();
-    assert!(!should_default_defer_tool(
-        "exec_shell",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "edit_file",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "apply_patch",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "fetch_url",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "git_diff",
-        AppMode::Agent,
-        &always_load
-    ));
+    assert!(!should_default_defer_tool("exec_shell", &always_load));
+    assert!(!should_default_defer_tool("edit_file", &always_load));
+    assert!(!should_default_defer_tool("apply_patch", &always_load));
+    assert!(!should_default_defer_tool("fetch_url", &always_load));
+    assert!(!should_default_defer_tool("git_diff", &always_load));
     // #2654: read-only git history joins the active set.
-    assert!(!should_default_defer_tool(
-        "git_log",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "git_show",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "git_status",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "run_tests",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "agent_open",
-        AppMode::Agent,
-        &always_load
-    ));
+    assert!(!should_default_defer_tool("git_log", &always_load));
+    assert!(!should_default_defer_tool("git_show", &always_load));
+    assert!(!should_default_defer_tool("git_status", &always_load));
+    assert!(!should_default_defer_tool("run_tests", &always_load));
+    assert!(!should_default_defer_tool("agent_open", &always_load));
     // #2605: the fetch/close side of the sub-agent surface must also stay
     // active so a first `agent_eval`/`agent_close` executes instead of
     // hydrating its schema and forcing a double-invoke.
-    assert!(!should_default_defer_tool(
-        "agent_eval",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "agent_close",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "read_file",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "web_search",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "write_file",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "task_shell_start",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(!should_default_defer_tool(
-        "task_shell_wait",
-        AppMode::Agent,
-        &always_load
-    ));
-    assert!(should_default_defer_tool(
-        "git_blame",
-        AppMode::Agent,
-        &always_load
-    ));
+    assert!(!should_default_defer_tool("agent_eval", &always_load));
+    assert!(!should_default_defer_tool("agent_close", &always_load));
+    assert!(!should_default_defer_tool("read_file", &always_load));
+    assert!(!should_default_defer_tool("web_search", &always_load));
+    assert!(!should_default_defer_tool("write_file", &always_load));
+    assert!(!should_default_defer_tool("task_shell_start", &always_load));
+    assert!(!should_default_defer_tool("task_shell_wait", &always_load));
+    assert!(should_default_defer_tool("git_blame", &always_load));
 }
 
 #[test]
@@ -815,11 +735,7 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
 #[test]
 fn tools_always_load_overrides_default_native_deferral() {
     let always_load = HashSet::from(["git_blame".to_string()]);
-    assert!(!should_default_defer_tool(
-        "git_blame",
-        AppMode::Agent,
-        &always_load
-    ));
+    assert!(!should_default_defer_tool("git_blame", &always_load));
 }
 
 #[test]
@@ -1795,15 +1711,20 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
         .await
         .expect("send change mode");
 
-    let prompt = {
+    let (_prompt, messages) = {
         let mut rx = handle.rx_event.write().await;
         loop {
             let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
                 .await
                 .expect("session update after mode switch")
                 .expect("event");
-            if let Event::SessionUpdated { system_prompt, .. } = event {
-                break match system_prompt.expect("system prompt") {
+            if let Event::SessionUpdated {
+                system_prompt,
+                messages,
+                ..
+            } = event
+            {
+                let prompt = match system_prompt.expect("system prompt") {
                     SystemPrompt::Text(text) => text,
                     SystemPrompt::Blocks(blocks) => blocks
                         .into_iter()
@@ -1811,13 +1732,98 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
                         .collect::<Vec<_>>()
                         .join("\n"),
                 };
+                break (prompt, messages);
             }
         }
     };
     run.abort();
 
-    assert!(prompt.contains("Mode: YOLO"));
-    assert!(prompt.contains("Approval Policy: Auto"));
+    assert!(
+        messages.iter().all(|message| message.role != "system"),
+        "mode switch must not persist appended system messages: {messages:?}"
+    );
+    assert!(
+        messages.iter().all(|message| {
+            message.content.iter().all(|block| {
+                !matches!(
+                    block,
+                    ContentBlock::Text { text, .. }
+                        if text.contains("<mode_prompt") || text.contains("<approval_policy")
+                )
+            })
+        }),
+        "mode/approval prompts should be request-time metadata, not session history"
+    );
+}
+
+#[test]
+fn turn_approval_mode_prefers_auto_approve_flag() {
+    use crate::tui::approval::ApprovalMode;
+
+    assert_eq!(
+        agent_approval_mode_for_turn(true, ApprovalMode::Suggest),
+        ApprovalMode::Auto
+    );
+    assert_eq!(
+        approval_mode_for(
+            AppMode::Agent,
+            agent_approval_mode_for_turn(true, ApprovalMode::Never),
+        ),
+        ApprovalMode::Auto
+    );
+    assert_eq!(
+        approval_mode_for(AppMode::Yolo, ApprovalMode::Suggest),
+        ApprovalMode::Auto
+    );
+    assert_eq!(
+        approval_mode_for(AppMode::Plan, ApprovalMode::Auto),
+        ApprovalMode::Never
+    );
+}
+
+#[test]
+fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
+    use crate::tui::approval::ApprovalMode;
+
+    let tmp = tempdir().expect("tempdir");
+    let config = EngineConfig {
+        workspace: tmp.path().to_path_buf(),
+        ..Default::default()
+    };
+    let (mut engine, _handle) = Engine::new(config, &Config::default());
+    engine.current_mode = AppMode::Plan;
+    engine.session.approval_mode = ApprovalMode::Suggest;
+    engine.session.messages = vec![Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: "summary after compaction".to_string(),
+            cache_control: None,
+        }],
+    }];
+    let stored = engine.session.messages.clone();
+
+    let request_messages = engine.messages_with_turn_metadata();
+
+    assert_eq!(engine.session.messages, stored);
+    assert_eq!(request_messages.len(), stored.len() + 1);
+    assert!(
+        request_messages
+            .iter()
+            .all(|message| message.role != "system"),
+        "runtime prompts must not create appended system messages"
+    );
+    let runtime = request_messages.last().expect("runtime prompt message");
+    assert_eq!(runtime.role, "user");
+    let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
+    else {
+        panic!("expected text runtime prompt");
+    };
+    assert!(text.contains("<runtime_prompt"));
+    assert!(text.contains("<mode_prompt mode=\"plan\">"));
+    assert!(
+        text.contains("<approval_policy policy=\"never\">"),
+        "Plan mode should project its fixed never-approval policy: {text}"
+    );
 }
 
 #[tokio::test]
@@ -2216,7 +2222,7 @@ fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
         .working_set
         .observe_user_message("please inspect src/lib.rs", tmp.path());
 
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
 
     let prompt = match &engine.session.system_prompt {
         Some(SystemPrompt::Text(text)) => text.clone(),
@@ -2251,7 +2257,7 @@ fn working_set_reaches_model_as_turn_metadata() {
 
     let messages = engine.messages_with_turn_metadata();
     let last_block = messages
-        .last()
+        .first()
         .and_then(|message| message.content.last())
         .expect("turn metadata block");
     let ContentBlock::Text { text, .. } = last_block else {
@@ -2276,7 +2282,7 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
 
     let messages = engine.messages_with_turn_metadata();
     let last_block = messages
-        .last()
+        .first()
         .and_then(|message| message.content.last())
         .expect("turn metadata block");
     let ContentBlock::Text { text, .. } = last_block else {
@@ -2455,7 +2461,16 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
     let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
     engine.session.add_message(first_user.clone());
     let first_request = engine.messages_with_turn_metadata();
-    assert_eq!(first_request, engine.session.messages);
+    assert_eq!(
+        &first_request[..engine.session.messages.len()],
+        engine.session.messages.as_slice()
+    );
+    assert_eq!(first_request.len(), engine.session.messages.len() + 1);
+    assert_eq!(first_request.first(), Some(&first_user));
+    assert_eq!(
+        first_request.last().map(|message| message.role.as_str()),
+        Some("user")
+    );
 
     engine.session.add_message(Message {
         role: "assistant".to_string(),
@@ -2472,14 +2487,24 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
     engine.session.add_message(second_user);
 
     let second_request = engine.messages_with_turn_metadata();
-    assert_eq!(second_request, engine.session.messages);
+    assert_eq!(
+        &second_request[..engine.session.messages.len()],
+        engine.session.messages.as_slice()
+    );
+    assert_eq!(second_request.len(), engine.session.messages.len() + 1);
     assert_eq!(second_request.first(), Some(&first_user));
+    let runtime = second_request.last().expect("runtime prompt");
+    let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
+    else {
+        panic!("expected runtime prompt text");
+    };
+    assert!(text.contains("<runtime_prompt"));
 }
 
 /// v0.8.11 regression: tool-result messages serialize to role="tool" on
 /// the wire but are stored as role="user" internally. `<turn_meta>` must
-/// be stored only on actual user-text messages, not retroactively added
-/// to tool-result messages at request time.
+/// be stored only on actual user-text messages. Request-time runtime metadata
+/// is appended separately and must not mutate tool-result messages.
 #[test]
 fn turn_metadata_skips_tool_result_messages() {
     let tmp = tempdir().expect("tempdir");
@@ -2522,9 +2547,11 @@ fn turn_metadata_skips_tool_result_messages() {
 
     let messages = engine.messages_with_turn_metadata();
 
-    // The trailing message is the tool result and MUST be untouched —
+    // The stored trailing message is the tool result and MUST be untouched —
     // no Text block sneaking in front of the ToolResult block.
-    let trailing = messages.last().expect("trailing message");
+    let trailing = messages
+        .get(messages.len().saturating_sub(2))
+        .expect("stored trailing message");
     assert_eq!(trailing.role, "user");
     assert_eq!(trailing.content.len(), 1);
     assert!(matches!(
@@ -2546,6 +2573,14 @@ fn turn_metadata_skips_tool_result_messages() {
         panic!("expected Text block for turn_meta at tail");
     };
     assert!(meta.starts_with("<turn_meta>\n"));
+    assert!(meta.contains("src/lib.rs"));
+    assert!(
+        matches!(
+            messages.last().and_then(|message| message.content.first()),
+            Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
+        ),
+        "request projection should append transient runtime metadata"
+    );
 }
 
 /// User text must appear before turn_meta in the content array so that
@@ -2588,8 +2623,8 @@ fn user_message_turn_meta_is_appended_not_prepended() {
 }
 
 /// When the turn is mid-execution and the trailing user message is a
-/// tool result, no turn_meta is injected at request time. The working_set
-/// surfaces again on the next stored user-text message.
+/// tool result, no turn_meta is injected into that tool-result message. The
+/// working_set surfaces again on the next stored user-text message.
 #[test]
 fn turn_metadata_skips_when_only_tool_results_trail() {
     let tmp = tempdir().expect("tempdir");
@@ -2622,14 +2657,21 @@ fn turn_metadata_skips_when_only_tool_results_trail() {
 
     let messages = engine.messages_with_turn_metadata();
 
-    // Returned unchanged: the single tool-result message, no Text
-    // prefix, content length == 1.
-    let only = messages.last().expect("trailing message");
+    // Stored tool-result message is unchanged: no Text prefix, content length == 1.
+    let only = messages.first().expect("stored tool result message");
     assert_eq!(only.content.len(), 1);
     assert!(matches!(
         only.content.first(),
         Some(ContentBlock::ToolResult { .. })
     ));
+    assert_eq!(messages.len(), 2);
+    assert!(
+        matches!(
+            messages.last().and_then(|message| message.content.first()),
+            Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
+        ),
+        "request projection should still append transient runtime metadata"
+    );
 }
 
 #[test]
@@ -2641,10 +2683,10 @@ fn refresh_system_prompt_is_noop_when_unchanged() {
     };
     let (mut engine, _handle) = Engine::new(config, &Config::default());
 
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
     let first_hash = engine.session.last_system_prompt_hash;
     let first_prompt = engine.session.system_prompt.clone();
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
 
     assert_eq!(engine.session.last_system_prompt_hash, first_hash);
     assert_eq!(engine.session.system_prompt, first_prompt);
@@ -2691,7 +2733,7 @@ fn text_system_prompt_override_via_runtime_sync_survives_refresh() {
     let expected = Some(prompt.clone());
 
     sync_runtime_system_prompt_override(&mut engine, prompt);
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
 
     assert_eq!(engine.session.system_prompt, expected);
 }
@@ -2712,7 +2754,7 @@ fn blocks_system_prompt_override_via_runtime_sync_survives_mode_change_refresh()
     let expected = Some(prompt.clone());
 
     sync_runtime_system_prompt_override(&mut engine, prompt);
-    engine.refresh_system_prompt(AppMode::Plan);
+    engine.refresh_system_prompt();
 
     assert_eq!(engine.session.system_prompt, expected);
 }
@@ -2732,7 +2774,7 @@ fn compaction_summary_stays_in_stable_system_prompt() {
         .session
         .working_set
         .observe_user_message("continue in src/main.rs", tmp.path());
-    engine.refresh_system_prompt(AppMode::Agent);
+    engine.refresh_system_prompt();
     engine.merge_compaction_summary(Some(SystemPrompt::Blocks(vec![SystemBlock {
         block_type: "text".to_string(),
         text: format!("{COMPACTION_SUMMARY_MARKER}\nsummary"),
@@ -2885,7 +2927,6 @@ async fn post_tool_replay_invoked_when_high_non_severe_risk() {
     let restarted = engine
         .run_capacity_post_tool_checkpoint(
             &turn,
-            AppMode::Agent,
             Some(&registry),
             Arc::new(RwLock::new(())),
             None,
@@ -2946,7 +2987,7 @@ async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
     let before_len = engine.session.messages.len();
     let turn = TurnContext::new(10);
     let restarted = engine
-        .run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
+        .run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
         .await;
 
     assert!(restarted);
@@ -3004,7 +3045,7 @@ async fn capacity_disabled_by_default_keeps_messages_intact() {
     let before_len = engine.session.messages.len();
     let turn = TurnContext::new(10);
     let restarted = engine
-        .run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
+        .run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
         .await;
 
     // Capacity is disabled → no replan, no message clear.
diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs
index 9ddc0862..ebba11b4 100644
--- a/crates/tui/src/core/engine/tool_catalog.rs
+++ b/crates/tui/src/core/engine/tool_catalog.rs
@@ -67,11 +67,7 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
     "write_file",
 ];
 
-pub(super) fn should_default_defer_tool(
-    name: &str,
-    _mode: AppMode,
-    always_load: &HashSet<String>,
-) -> bool {
+pub(super) fn should_default_defer_tool(name: &str, always_load: &HashSet<String>) -> bool {
     if always_load.contains(name) {
         return false;
     }
@@ -85,13 +81,9 @@ pub(super) fn should_default_defer_tool(
         .any(|core_tool| core_tool == &name)
 }
 
-pub(super) fn apply_native_tool_deferral(
-    catalog: &mut [Tool],
-    mode: AppMode,
-    always_load: &HashSet<String>,
-) {
+pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], always_load: &HashSet<String>) {
     for tool in catalog {
-        tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
+        tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
     }
 }
 
@@ -185,7 +177,7 @@ pub(super) fn build_model_tool_catalog(
     mode: AppMode,
     always_load: &HashSet<String>,
 ) -> Vec<Tool> {
-    apply_native_tool_deferral(&mut native_tools, mode, always_load);
+    apply_native_tool_deferral(&mut native_tools, always_load);
     apply_mcp_tool_deferral(&mut mcp_tools, mode);
     // Sort each partition by name for prefix-cache stability (#263). The
     // upstream `to_api_tools()` already sorts the registry's HashMap output;
@@ -229,7 +221,6 @@ pub(super) fn ensure_advanced_tooling(
             allowed_callers: Some(vec!["direct".to_string()]),
             defer_loading: Some(should_default_defer_tool(
                 CODE_EXECUTION_TOOL_NAME,
-                mode,
                 always_load,
             )),
             input_examples: None,
@@ -248,7 +239,7 @@ pub(super) fn ensure_advanced_tooling(
         && crate::dependencies::resolve_node().is_some()
     {
         let mut tool = crate::tools::js_execution::js_execution_tool_definition();
-        tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
+        tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
         catalog.push(tool);
     }
 
diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs
index bcb3dc3a..892d0d21 100644
--- a/crates/tui/src/core/engine/turn_loop.rs
+++ b/crates/tui/src/core/engine/turn_loop.rs
@@ -105,7 +105,7 @@ impl Engine {
             }
 
             // Ensure system prompt is up to date with latest session states
-            self.refresh_system_prompt(mode);
+            self.refresh_system_prompt();
 
             if turn.at_max_steps() {
                 let _ = self
@@ -2149,7 +2149,6 @@ impl Engine {
             if self
                 .run_capacity_post_tool_checkpoint(
                     turn,
-                    mode,
                     tool_registry,
                     tool_exec_lock.clone(),
                     mcp_pool.clone(),
@@ -2181,7 +2180,6 @@ impl Engine {
             if self
                 .run_capacity_error_escalation_checkpoint(
                     turn,
-                    mode,
                     step_error_count,
                     consecutive_tool_error_steps,
                     &step_error_categories,
@@ -2254,11 +2252,15 @@ impl Engine {
     }
 
     pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
-        // `<turn_meta>` is stored on user-text messages when the message is
-        // appended. Do not rewrite historical messages at request time: doing
-        // so makes the API prefix differ from the bytes sent in earlier turns
-        // and destroys DeepSeek's KV prefix cache reuse.
-        self.session.messages.clone()
+        // Keep stored history byte-stable and provider-compatible: runtime
+        // mode/approval contracts are projected as a transient user message
+        // at request time instead of being persisted as appended system
+        // messages. This preserves the stable prefix through all stored
+        // messages while avoiding strict chat templates that only allow
+        // system messages at messages[0].
+        let mut messages = self.session.messages.clone();
+        messages.push(self.runtime_prompt_message());
+        messages
     }
 }
 
diff --git a/crates/tui/src/core/ops.rs b/crates/tui/src/core/ops.rs
index 0889c5b2..4ad48c06 100644
--- a/crates/tui/src/core/ops.rs
+++ b/crates/tui/src/core/ops.rs
@@ -77,7 +77,7 @@ pub enum Op {
     #[allow(dead_code)]
     ChangeMode { mode: AppMode },
 
-    /// Update the model being used and refresh the prompt for the current mode.
+    /// Update the model being used and refresh stable prompt context.
     #[allow(dead_code)]
     SetModel { model: String, mode: AppMode },
 
diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs
index dccdd913..67ffb7ad 100644
--- a/crates/tui/src/core/session.rs
+++ b/crates/tui/src/core/session.rs
@@ -31,8 +31,8 @@ pub struct Session {
 
     /// System prompt (optional)
     pub system_prompt: Option<SystemPrompt>,
-    /// True when `system_prompt` came from an explicit runtime API override
-    /// and should not be replaced by mode/context refreshes.
+    /// True when `system_prompt` is a persisted/runtime-supplied prefix that
+    /// should not be replaced by mode/context refreshes.
     pub system_prompt_override: bool,
     /// Hash of the last assembled stable system prompt. Used to avoid
     /// replacing `system_prompt` when unchanged.
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 850e7944..a959f053 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -2,7 +2,8 @@
 //! System prompts for different modes.
 //!
 //! Prompts are assembled from composable layers loaded at compile time:
-//!   tool taxonomy → base.md → personality overlay → mode delta → approval policy
+//!   base.md + personality overlay → message[0] (byte‑stable).
+//!   mode delta + tool taxonomy + approval policy → request-time runtime metadata.
 //!
 //! This keeps each concern in its own file and makes prompt tuning
 //! a single-file operation.
@@ -671,7 +672,7 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
     }
 }
 
-fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
+pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
     match mode {
         AppMode::Yolo => AUTO_APPROVAL,
         AppMode::Plan => NEVER_APPROVAL,
@@ -705,7 +706,7 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];
 const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"];
 const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"];
 
-fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
+pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
     let core_tools = core_taxonomy_tools_for_mode(mode);
     let mut sentences = Vec::new();
 
@@ -762,15 +763,11 @@ context are subordinate to the Constitution, the Statutes, and the user's
 current request. When in doubt, consult Article VII: The Hierarchy of Law.";
 
 pub fn compose_prompt(mode: AppMode, personality: Personality) -> String {
-    compose_prompt_with_approval(mode, personality, default_approval_mode_for_mode(mode))
+    compose_prompt_with_approval(mode, personality)
 }
 
-pub fn compose_prompt_with_approval(
-    mode: AppMode,
-    personality: Personality,
-    approval_mode: ApprovalMode,
-) -> String {
-    compose_prompt_with_approval_and_model(mode, personality, approval_mode, "codewhale")
+pub fn compose_prompt_with_approval(mode: AppMode, personality: Personality) -> String {
+    compose_prompt_with_approval_and_model(mode, personality, "codewhale")
 }
 
 /// Compose with explicit model ID for dynamic identity injection.
@@ -778,33 +775,24 @@ pub fn compose_prompt_with_approval(
 pub fn compose_prompt_with_approval_and_model(
     mode: AppMode,
     personality: Personality,
-    approval_mode: ApprovalMode,
     model_id: &str,
 ) -> String {
-    compose_prompt_with_approval_model_and_shell(mode, personality, approval_mode, model_id, true)
+    compose_prompt_with_approval_model_and_shell(mode, personality, model_id, true)
 }
 
 fn compose_prompt_with_approval_model_and_shell(
     mode: AppMode,
     personality: Personality,
-    approval_mode: ApprovalMode,
     model_id: &str,
     allow_shell: bool,
 ) -> String {
-    let tool_taxonomy = render_core_tool_taxonomy_block(mode);
     let shell_tools_available = allow_shell && mode != AppMode::Plan;
     let base_prompt = render_base_prompt_for_tool_availability(
         effective_base_prompt().trim(),
         model_id,
         shell_tools_available,
     );
-    let parts: [&str; 5] = [
-        tool_taxonomy.as_str(),
-        base_prompt.as_str(),
-        personality.prompt().trim(),
-        mode_prompt(mode).trim(),
-        approval_prompt_for_mode(mode, approval_mode).trim(),
-    ];
+    let parts: [&str; 2] = [base_prompt.as_str(), personality.prompt().trim()];
 
     let mut out =
         String::with_capacity(parts.iter().map(|p| p.len()).sum::<usize>() + (parts.len() - 1) * 2);
@@ -883,22 +871,16 @@ fn compose_mode_prompt(mode: AppMode) -> String {
     compose_prompt(mode, Personality::Calm)
 }
 
-fn compose_mode_prompt_with_approval(mode: AppMode, approval_mode: ApprovalMode) -> String {
-    compose_prompt_with_approval(mode, Personality::Calm, approval_mode)
+fn compose_mode_prompt_with_approval(mode: AppMode) -> String {
+    compose_prompt_with_approval(mode, Personality::Calm)
 }
 
 fn compose_mode_prompt_with_approval_and_model(
     mode: AppMode,
-    approval_mode: ApprovalMode,
+    _approval_mode: ApprovalMode,
     model_id: &str,
 ) -> String {
-    compose_prompt_with_approval_model_and_shell(
-        mode,
-        Personality::Calm,
-        approval_mode,
-        model_id,
-        true,
-    )
+    compose_prompt_with_approval_model_and_shell(mode, Personality::Calm, model_id, true)
 }
 
 // ── Public API ────────────────────────────────────────────────────────
@@ -991,7 +973,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
         skills_dir,
         instructions,
         session_context,
-        default_approval_mode_for_mode(mode),
     )
 }
 
@@ -1002,12 +983,10 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
     skills_dir: Option<&Path>,
     instructions: Option<&[InstructionSource]>,
     session_context: PromptSessionContext<'_>,
-    approval_mode: ApprovalMode,
 ) -> SystemPrompt {
     let mode_prompt = compose_prompt_with_approval_model_and_shell(
         mode,
         Personality::Calm,
-        approval_mode,
         session_context.model_id,
         session_context.allow_shell,
     );
@@ -1335,7 +1314,6 @@ mod tests {
         let prompt = compose_prompt_with_approval_and_model(
             AppMode::Agent,
             Personality::Calm,
-            ApprovalMode::Suggest,
             "deepseek-v4-flash",
         );
         assert!(
@@ -1353,7 +1331,6 @@ mod tests {
         let prompt = compose_prompt_with_approval_model_and_shell(
             AppMode::Agent,
             Personality::Calm,
-            ApprovalMode::Suggest,
             "deepseek-v4-pro",
             true,
         );
@@ -1369,7 +1346,6 @@ mod tests {
         let prompt = compose_prompt_with_approval_model_and_shell(
             AppMode::Agent,
             Personality::Calm,
-            ApprovalMode::Suggest,
             "deepseek-v4-pro",
             false,
         );
@@ -1403,47 +1379,36 @@ mod tests {
     }
 
     #[test]
-    fn composed_prompt_starts_with_core_tool_taxonomy() {
+    fn composed_prompt_no_longer_inlines_tool_taxonomy() {
         let prompt = compose_prompt_with_approval_and_model(
             AppMode::Agent,
             Personality::Calm,
-            ApprovalMode::Suggest,
             "deepseek-v4-pro",
         );
-        let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Agent);
-
-        assert!(
-            prompt.starts_with(&expected_taxonomy),
-            "composed prompt should start with the compact generated tool taxonomy"
-        );
+        // The generated "## Core Tool Taxonomy" block now travels in the
+        // request-time <mode_prompt> metadata rather than being prepended here.
+        // (The "## Toolbox" section from the Constitutional preamble remains.)
+        assert!(!prompt.contains("## Core Tool Taxonomy"));
+        assert!(prompt.contains("You are deepseek-v4-pro"));
     }
 
     #[test]
     fn plan_prompt_taxonomy_omits_run_tests() {
-        let prompt = compose_prompt_with_approval_and_model(
-            AppMode::Plan,
-            Personality::Calm,
-            ApprovalMode::Never,
-            "deepseek-v4-pro",
-        );
-        let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
-
+        let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
+        // Plan taxonomy should omit execution tools (verified at the source).
         assert!(
-            prompt.starts_with(&expected_taxonomy),
-            "Plan prompt should start with its mode-specific tool taxonomy"
-        );
-        assert!(
-            expected_taxonomy.contains("for discovery")
-                && expected_taxonomy.contains("for git inspection"),
+            taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"),
             "Plan taxonomy should keep read-only discovery and git guidance"
         );
         assert!(
-            !expected_taxonomy.contains("run_tests")
-                && !expected_taxonomy.contains("run_verifiers")
-                && !expected_taxonomy.contains("for verification")
-                && !expected_taxonomy.contains("Use  "),
-            "Plan taxonomy must not advertise unavailable verification tools: {expected_taxonomy:?}"
+            !taxonomy.contains("run_tests")
+                && !taxonomy.contains("run_verifiers")
+                && !taxonomy.contains("exec_shell"),
+            "Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell"
         );
+        // The taxonomy block is rendered correctly but no longer inlined
+        // into the base system prompt — it travels in request-time
+        // <mode_prompt> metadata instead.
     }
 
     #[test]
@@ -1471,7 +1436,6 @@ mod tests {
             None,
             None,
             PromptSessionContext::default(),
-            ApprovalMode::Suggest,
         ) {
             SystemPrompt::Text(text) => text,
             SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1677,7 +1641,6 @@ mod tests {
                 show_thinking: true,
                 allow_shell: true,
             },
-            ApprovalMode::Suggest,
         ) {
             SystemPrompt::Text(text) => text,
             SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1749,7 +1712,6 @@ mod tests {
                 show_thinking: true,
                 allow_shell: true,
             },
-            ApprovalMode::Suggest,
         ) {
             SystemPrompt::Text(text) => text,
             SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1794,7 +1756,6 @@ mod tests {
                 show_thinking: false,
                 allow_shell: true,
             },
-            ApprovalMode::Suggest,
         ) {
             SystemPrompt::Text(text) => text,
             SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1849,7 +1810,6 @@ mod tests {
                 show_thinking: true,
                 allow_shell: true,
             },
-            ApprovalMode::Suggest,
         ) {
             SystemPrompt::Text(text) => text,
             SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -2175,10 +2135,10 @@ mod tests {
         assert!(prompt.contains("You are codewhale"));
         // Personality layer
         assert!(prompt.contains("Personality: Calm"));
-        // Mode layer
-        assert!(prompt.contains("Mode: Agent"));
-        // Approval layer
-        assert!(prompt.contains("Approval Policy: Suggest"));
+        // Mode and approval are no longer inlined — they travel as
+        // request-time runtime metadata.
+        assert!(!prompt.contains("Mode: Agent"));
+        assert!(!prompt.contains("Approval Policy:"));
     }
 
     /// Gate against shipping a release with a missing CHANGELOG entry — which
@@ -2233,32 +2193,37 @@ mod tests {
         let prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
         let base_pos = prompt.find("You are codewhale").unwrap();
         let personality_pos = prompt.find("Personality: Calm").unwrap();
-        let mode_pos = prompt.find("Mode: YOLO").unwrap();
-        let approval_pos = prompt.find("Approval Policy: Auto").unwrap();
 
         assert!(base_pos < personality_pos);
-        assert!(personality_pos < mode_pos);
-        assert!(mode_pos < approval_pos);
+        // Mode and approval text are no longer inlined — they travel as
+        // request-time runtime metadata.
     }
 
     #[test]
-    fn each_mode_gets_correct_approval() {
-        assert!(
-            compose_prompt(AppMode::Agent, Personality::Calm).contains("Approval Policy: Suggest")
-        );
-        assert!(compose_prompt(AppMode::Yolo, Personality::Calm).contains("Approval Policy: Auto"));
-        assert!(
-            compose_prompt(AppMode::Plan, Personality::Calm).contains("Approval Policy: Never")
-        );
+    fn base_prompt_is_mode_agnostic() {
+        // Mode and approval text are no longer inlined into compose_prompt —
+        // they travel as request-time runtime metadata.
+        let agent_prompt = compose_prompt(AppMode::Agent, Personality::Calm);
+        let yolo_prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
+        let plan_prompt = compose_prompt(AppMode::Plan, Personality::Calm);
+        assert!(!agent_prompt.contains("Mode: Agent"));
+        assert!(!yolo_prompt.contains("Mode: YOLO"));
+        assert!(!plan_prompt.contains("Mode: Plan"));
+        assert!(!agent_prompt.contains("Approval Policy:"));
+        assert!(!yolo_prompt.contains("Approval Policy:"));
+        assert!(!plan_prompt.contains("Approval Policy:"));
+        // Base prompt still contains Constitutional preamble and personality
+        assert!(agent_prompt.contains("You are codewhale"));
+        assert!(agent_prompt.contains("Personality: Calm"));
     }
 
     #[test]
-    fn agent_prompt_can_reflect_never_approval_policy() {
-        let prompt =
-            compose_prompt_with_approval(AppMode::Agent, Personality::Calm, ApprovalMode::Never);
-        assert!(prompt.contains("Mode: Agent"));
-        assert!(prompt.contains("Approval Policy: Never"));
-        assert!(prompt.contains("/config approval_mode suggest"));
+    fn approval_policy_no_longer_inlined_in_base_prompt() {
+        let prompt = compose_prompt_with_approval(AppMode::Agent, Personality::Calm);
+        assert!(!prompt.contains("Mode: Agent"));
+        assert!(!prompt.contains("Approval Policy:"));
+        // Constitutional preamble is still present
+        assert!(prompt.contains("You are codewhale"));
     }
 
     #[test]