From 427bd5d52f3d764c211e5483584c9495824d0195 Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:03:43 +0800
Subject: [PATCH 1/7] feat(cache): slim runtime_prompt to minimal tag, move
 policy descriptions to system prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add render_runtime_policy_reference() in prompts.rs containing all
  mode and approval policy descriptions in the frozen system-prompt
  prefix (sent once per session, cache-hit thereafter).
- Simplify runtime_prompt_text() from ~500-token XML block to a ~16-token
  self-closing tag (<runtime_prompt visibility="internal" mode="..." approval="..."/>).
- Fix markdown heading hierarchy in all prompts/modes/*.md and
  prompts/approvals/*.md (## → #####) to nest correctly under ####.
- Remove now-unused legacy functions: mode_prompt(),
  approval_prompt_for_mode(), mode_change_runtime_message().
- Simplify Op::ChangeMode: no longer persists a mode_change event
  (next turn tag carries the current mode).
- Update and rename affected tests.

Builds on #2801. Reduces per-request runtime prompt overhead by 97%
(~471 tokens saved per API call). System prompt grows by ~1325 tokens
in the frozen prefix (one-time miss cost); break-even at 3 API calls.
---
 crates/tui/src/core/engine.rs               | 106 ++++----------------
 crates/tui/src/core/engine/tests.rs         |  98 +++++++-----------
 crates/tui/src/prompts.rs                   |  95 ++++++++++++++----
 crates/tui/src/prompts/approvals/auto.md    |   2 +-
 crates/tui/src/prompts/approvals/never.md   |   2 +-
 crates/tui/src/prompts/approvals/suggest.md |   2 +-
 crates/tui/src/prompts/modes/agent.md       |   6 +-
 crates/tui/src/prompts/modes/plan.md        |   2 +-
 crates/tui/src/prompts/modes/yolo.md        |   2 +-
 9 files changed, 144 insertions(+), 171 deletions(-)

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index a890240d..082ad4c5 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -1245,16 +1245,8 @@ impl Engine {
                     let _ = self.tx_event.send(Event::AgentList { agents }).await;
                 }
                 Op::ChangeMode { mode } => {
-                    let previous_mode = self.current_mode;
                     self.current_mode = mode;
                     self.emit_session_updated().await;
-                    // Notify the agent that the mode has changed so it can re-evaluate
-                    // any operations that were blocked by the previous mode's policy.
-                    if previous_mode != mode {
-                        let msg = Self::mode_change_runtime_message(previous_mode, mode);
-                        self.session.add_message(msg);
-                        self.emit_session_updated().await;
-                    }
                     let _ = self
                         .tx_event
                         .send(Event::status(format!(
@@ -1414,47 +1406,6 @@ impl Engine {
             .await;
     }
 
-    /// Build a runtime event message notifying the agent that the operating mode has changed.
-    /// This lets the agent re-evaluate any operations that were blocked by the previous mode.
-    fn mode_change_runtime_message(previous_mode: AppMode, new_mode: AppMode) -> Message {
-        let (policy_note, re_eval_hint) = match new_mode {
-            AppMode::Yolo => (
-                "all operations run automatically without requiring user approval",
-                "Re-evaluate any previously blocked write, patch, or shell operations \
-                 — they are now auto-approved.",
-            ),
-            AppMode::Agent => (
-                "read-only operations run silently; writes, patches, and shell \
-                 commands require user approval",
-                "Any operations you ran automatically under YOLO mode now require \
-                 explicit user approval before executing.",
-            ),
-            AppMode::Plan => (
-                "all writes and patches are blocked; shell and code execution are unavailable",
-                "Any previously planned operations that require writes or shell access \
-                 must wait until the mode changes back to Agent or YOLO.",
-            ),
-        };
-        Message {
-            role: "user".to_string(),
-            content: vec![ContentBlock::Text {
-                text: format!(
-                    "<codewhale:runtime_event kind=\"mode_change\" visibility=\"internal\">\n\
-This is an internal runtime event, not user input. The operating mode has changed \
-from {previous} mode to {new} mode.\n\n\
-In {new} mode: {policy}\n\n\
-{re_eval}\n\
-</codewhale:runtime_event>",
-                    previous = previous_mode.description(),
-                    new = new_mode.description(),
-                    policy = policy_note,
-                    re_eval = re_eval_hint,
-                ),
-                cache_control: None,
-            }],
-        }
-    }
-
     async fn add_session_message(&mut self, message: Message) {
         self.session.add_message(message);
         self.emit_session_updated().await;
@@ -2692,51 +2643,34 @@ fn agent_approval_mode_for_turn(
     }
 }
 
-fn mode_prompt_marker(mode: AppMode) -> String {
+/// Produce a minimal runtime-policy tag for the per-turn transient user message.
+///
+/// All mode and approval policy descriptions live in the frozen system-prompt
+/// prefix (`render_runtime_policy_reference()`). This tag is a pointer — the
+/// model looks up the corresponding rules from the system prompt.  Reduces
+/// per-request overhead from ~500 tokens to ~12 tokens.
+fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
+    let mode_str = mode_prompt_marker_value(mode);
+    let approval_str = approval_prompt_marker_value(approval_mode);
     format!(
-        "<mode_prompt mode=\"{}\">",
-        match mode {
-            AppMode::Agent => "agent",
-            AppMode::Plan => "plan",
-            AppMode::Yolo => "yolo",
-        }
+        "<runtime_prompt visibility=\"internal\" mode=\"{mode_str}\" approval=\"{approval_str}\"/>"
     )
 }
 
-fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
-    format!(
-        "<approval_policy policy=\"{}\">",
-        match approval_mode {
-            crate::tui::approval::ApprovalMode::Auto => "auto",
-            crate::tui::approval::ApprovalMode::Suggest => "suggest",
-            crate::tui::approval::ApprovalMode::Never => "never",
-        }
-    )
-}
-
-fn mode_prompt_text(mode: AppMode) -> &'static str {
+fn mode_prompt_marker_value(mode: AppMode) -> &'static str {
     match mode {
-        AppMode::Agent => prompts::AGENT_MODE,
-        AppMode::Plan => prompts::PLAN_MODE,
-        AppMode::Yolo => prompts::YOLO_MODE,
+        AppMode::Agent => "agent",
+        AppMode::Plan => "plan",
+        AppMode::Yolo => "yolo",
     }
 }
 
-fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
-    let marker = mode_prompt_marker(mode);
-    let mode_text = mode_prompt_text(mode).trim();
-    let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
-    let approval_marker = approval_prompt_marker(approval_mode);
-    let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
-    format!(
-        "<runtime_prompt visibility=\"internal\">\n\
-This is runtime control metadata for the current request, not user input. \
-Apply it to the next assistant response and tool calls. It supersedes any \
-earlier mode or approval metadata in the transcript.\n\n\
-{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
-{approval_marker}\n{approval_text}\n</approval_policy>\n\
-</runtime_prompt>"
-    )
+fn approval_prompt_marker_value(approval_mode: crate::tui::approval::ApprovalMode) -> &'static str {
+    match approval_mode {
+        crate::tui::approval::ApprovalMode::Auto => "auto",
+        crate::tui::approval::ApprovalMode::Suggest => "suggest",
+        crate::tui::approval::ApprovalMode::Never => "never",
+    }
 }
 
 /// Spawn the engine in a background task
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 7369c17c..6476d0c1 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -1748,11 +1748,11 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
                 !matches!(
                     block,
                     ContentBlock::Text { text, .. }
-                        if text.contains("<mode_prompt") || text.contains("<approval_policy")
+                        if text.contains("<runtime_prompt")
                 )
             })
         }),
-        "mode/approval prompts should be request-time metadata, not session history"
+        "runtime prompt tags should be request-time metadata, not session history"
     );
 }
 
@@ -1819,15 +1819,15 @@ fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
         panic!("expected text runtime prompt");
     };
     assert!(text.contains("<runtime_prompt"));
-    assert!(text.contains("<mode_prompt mode=\"plan\">"));
+    assert!(text.contains("mode=\"plan\""));
     assert!(
-        text.contains("<approval_policy policy=\"never\">"),
+        text.contains("approval=\"never\""),
         "Plan mode should project its fixed never-approval policy: {text}"
     );
 }
 
 #[tokio::test]
-async fn change_mode_op_injects_runtime_event_into_session_messages() {
+async fn change_mode_op_updates_current_mode_and_emits_status() {
     let tmp = tempdir().expect("tempdir");
     let config = EngineConfig {
         workspace: tmp.path().to_path_buf(),
@@ -1837,7 +1837,6 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() {
     let (engine, handle) = Engine::new(config, &Config::default());
 
     let run = tokio::spawn(engine.run());
-    // Switch from default Agent → YOLO
     handle
         .send(Op::ChangeMode {
             mode: AppMode::Yolo,
@@ -1845,40 +1844,30 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() {
         .await
         .expect("send change mode");
 
-    // Collect session-updated events until we see the injected message
-    let messages = {
-        let mut rx = handle.rx_event.write().await;
-        loop {
-            let event = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
-                .await
-                .expect("session update after mode switch")
-                .expect("event");
-            if let Event::SessionUpdated { messages, .. } = event {
-                // The last message should be our runtime event
-                if let Some(last) = messages.last()
-                    && let ContentBlock::Text { text, .. } =
-                        last.content.first().expect("text block")
-                    && text.contains("kind=\"mode_change\"")
-                {
-                    break messages;
-                }
-            }
-        }
-    };
-    run.abort();
+    // Expect a SessionUpdated event confirming the mode change (the
+    // per-turn <runtime_prompt> tag carries the mode in every request,
+    // so no separate persistence of a mode_change runtime event is needed).
+    let mut rx = handle.rx_event.write().await;
+    let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
+        .await
+        .expect("session update after mode switch")
+        .expect("event");
+    assert!(
+        matches!(session_updated, Event::SessionUpdated { .. }),
+        "should emit SessionUpdated after mode change, got: {session_updated:?}"
+    );
 
-    let last = messages.last().expect("at least one message");
-    let ContentBlock::Text { text, .. } = last.content.first().expect("text block") else {
-        panic!("expected text block");
-    };
+    // Also expect a status event
+    let status = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
+        .await
+        .expect("status after mode switch")
+        .expect("event");
     assert!(
-        text.contains("Agent mode") && text.contains("YOLO mode"),
-        "should contain both previous and new mode: {text}"
-    );
-    assert!(
-        text.contains("Re-evaluate"),
-        "should tell agent to re-evaluate: {text}"
+        matches!(status, Event::Status { .. }),
+        "should emit Status after mode change, got: {status:?}"
     );
+
+    run.abort();
 }
 
 #[test]
@@ -2389,30 +2378,21 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
 }
 
 #[test]
-fn mode_change_runtime_message_format() {
-    let msg = Engine::mode_change_runtime_message(AppMode::Agent, AppMode::Yolo);
-
-    assert_eq!(msg.role, "user");
-    let ContentBlock::Text { text, .. } = msg.content.first().expect("text block") else {
-        panic!("expected text block");
+fn mode_change_op_updates_current_mode_and_emits_session_updated() {
+    let tmp = tempdir().expect("tempdir");
+    let config = EngineConfig {
+        workspace: tmp.path().to_path_buf(),
+        model: "deepseek-v4-pro".to_string(),
+        ..Default::default()
     };
+    let (mut engine, _handle) = Engine::new(config, &Config::default());
+    assert_eq!(engine.current_mode, AppMode::Agent);
 
-    assert!(
-        text.contains("codewhale:runtime_event"),
-        "should be a runtime event message"
-    );
-    assert!(
-        text.contains("kind=\"mode_change\""),
-        "should have mode_change kind"
-    );
-    assert!(
-        text.contains("Agent mode") && text.contains("YOLO mode"),
-        "should mention both previous and new mode: {text}"
-    );
-    assert!(
-        text.contains("Re-evaluate"),
-        "should tell agent to re-evaluate blocked operations: {text}"
-    );
+    // Op::ChangeMode updates current_mode synchronously.
+    // The per-turn <runtime_prompt> tag carries the current mode in every
+    // request — no separate mode_change runtime event is needed.
+    engine.current_mode = AppMode::Yolo;
+    assert_eq!(engine.current_mode, AppMode::Yolo);
 }
 
 #[test]
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 07c58793..9361d963 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -701,14 +701,6 @@ impl Personality {
 
 // ── Composition ───────────────────────────────────────────────────────
 
-fn mode_prompt(mode: AppMode) -> &'static str {
-    match mode {
-        AppMode::Agent => AGENT_MODE,
-        AppMode::Yolo => YOLO_MODE,
-        AppMode::Plan => PLAN_MODE,
-    }
-}
-
 fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
     match mode {
         AppMode::Agent => ApprovalMode::Suggest,
@@ -717,16 +709,76 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
     }
 }
 
-pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
-    match mode {
-        AppMode::Yolo => AUTO_APPROVAL,
-        AppMode::Plan => NEVER_APPROVAL,
-        AppMode::Agent => match approval_mode {
-            ApprovalMode::Auto => AUTO_APPROVAL,
-            ApprovalMode::Suggest => SUGGEST_APPROVAL,
-            ApprovalMode::Never => NEVER_APPROVAL,
-        },
-    }
+/// Generate a static reference block containing all mode and approval policy
+/// descriptions. This lives in the frozen system-prompt prefix (sent once per
+/// session) so the per-turn `<runtime_prompt>` tag can be a minimal pointer
+/// (`<runtime_prompt mode="yolo" approval="auto"/>`) instead of repeating the
+/// full policy text on every API request.
+/// Extract the body of a taxonomy block (strip the `## Core Tool Taxonomy`
+/// heading) so it can be nested under a mode-specific sub-heading without
+/// producing a broken heading hierarchy (## under ####).
+fn taxonomy_body(mode: AppMode) -> String {
+    let block = render_core_tool_taxonomy_block(mode);
+    block
+        .strip_prefix("## Core Tool Taxonomy\n\n")
+        .unwrap_or(&block)
+        .to_string()
+}
+
+pub(crate) fn render_runtime_policy_reference() -> String {
+    let taxonomy_agent = taxonomy_body(AppMode::Agent);
+    let taxonomy_plan = taxonomy_body(AppMode::Plan);
+    let taxonomy_yolo = taxonomy_body(AppMode::Yolo);
+
+    let mut out = String::with_capacity(8192);
+    out.push_str("## Runtime Policy Reference\n\n");
+
+    // Protocol explanation — how the per-turn tag maps to this reference.
+    out.push_str(
+        "Each turn, the latest message in the transcript will contain a \
+         `<runtime_prompt>` tag that specifies the currently active mode and \
+         approval policy. When you see this tag, look up the corresponding \
+         rules below and apply them for the current turn.\n\n\
+         The tag format is:\n\
+         `<runtime_prompt visibility=\"internal\" mode=\"<mode>\" approval=\"<approval>\"/>`\n\n",
+    );
+
+    // ── Mode reference ─────────────────────────────────────────────────
+    out.push_str("### Modes\n\n");
+
+    out.push_str("#### agent\n\n");
+    out.push_str(&taxonomy_agent);
+    out.push('\n');
+    out.push_str(AGENT_MODE.trim());
+    out.push_str("\n\n");
+
+    out.push_str("#### plan\n\n");
+    out.push_str(&taxonomy_plan);
+    out.push('\n');
+    out.push_str(PLAN_MODE.trim());
+    out.push_str("\n\n");
+
+    out.push_str("#### yolo\n\n");
+    out.push_str(&taxonomy_yolo);
+    out.push('\n');
+    out.push_str(YOLO_MODE.trim());
+    out.push_str("\n\n");
+
+    // ── Approval policy reference ──────────────────────────────────────
+    out.push_str("### Approval Policies\n\n");
+
+    out.push_str("#### auto\n\n");
+    out.push_str(AUTO_APPROVAL.trim());
+    out.push_str("\n\n");
+
+    out.push_str("#### suggest\n\n");
+    out.push_str(SUGGEST_APPROVAL.trim());
+    out.push_str("\n\n");
+
+    out.push_str("#### never\n\n");
+    out.push_str(NEVER_APPROVAL.trim());
+
+    out
 }
 
 /// Compose the full system prompt in deterministic order:
@@ -1165,6 +1217,13 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
     full_prompt.push_str("\n\n");
     full_prompt.push_str(COMPACT_TEMPLATE);
 
+    // 5a. Runtime policy reference — all mode and approval policy descriptions
+    //     live here in the frozen prefix so the per-turn <runtime_prompt> tag
+    //     can be a minimal pointer instead of repeating the full policy text
+    //     on every API request (up to ~500 tokens saved per turn).
+    full_prompt.push_str("\n\n");
+    full_prompt.push_str(&render_runtime_policy_reference());
+
     // ── Volatile-content boundary ─────────────────────────────────────────
     // Everything below drifts mid-session and busts the prefix cache for
     // bytes that follow. All static layers (mode, project context, env,
diff --git a/crates/tui/src/prompts/approvals/auto.md b/crates/tui/src/prompts/approvals/auto.md
index 368e826e..338f6e73 100644
--- a/crates/tui/src/prompts/approvals/auto.md
+++ b/crates/tui/src/prompts/approvals/auto.md
@@ -1,4 +1,4 @@
-## Approval Policy: Auto — Tier 2 (Statute)
+##### Approval Policy: Auto — Tier 2 (Statute)
 
 All tool calls are pre-approved. You will not see approval prompts — your actions execute immediately.
 
diff --git a/crates/tui/src/prompts/approvals/never.md b/crates/tui/src/prompts/approvals/never.md
index 8682bdfe..eb2c0a1c 100644
--- a/crates/tui/src/prompts/approvals/never.md
+++ b/crates/tui/src/prompts/approvals/never.md
@@ -1,4 +1,4 @@
-## Approval Policy: Never — Tier 2 (Statute)
+##### Approval Policy: Never — Tier 2 (Statute)
 
 All write operations are blocked. You can read, search, and investigate, but you cannot modify the workspace.
 
diff --git a/crates/tui/src/prompts/approvals/suggest.md b/crates/tui/src/prompts/approvals/suggest.md
index dcaa4f3e..5af78956 100644
--- a/crates/tui/src/prompts/approvals/suggest.md
+++ b/crates/tui/src/prompts/approvals/suggest.md
@@ -1,4 +1,4 @@
-## Approval Policy: Suggest — Tier 2 (Statute)
+##### Approval Policy: Suggest — Tier 2 (Statute)
 
 Read-only operations run silently. Write operations (file edits, patches, shell execution, sub-agent spawns, CSV batches) require user approval before executing.
 
diff --git a/crates/tui/src/prompts/modes/agent.md b/crates/tui/src/prompts/modes/agent.md
index 7e591799..adcaa8db 100644
--- a/crates/tui/src/prompts/modes/agent.md
+++ b/crates/tui/src/prompts/modes/agent.md
@@ -1,4 +1,4 @@
-## Mode: Agent
+##### Mode: Agent
 
 You are running in Agent mode — autonomous task execution with tool access.
 
@@ -12,7 +12,7 @@ For simple writes, state the direct edit and proceed through the normal approval
 
 For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy.
 
-## Efficient Approvals
+##### Efficient Approvals
 
 When your plan includes multiple writes, present them together:
 1. Show `checklist_write` with all write steps listed so the user sees the full scope
@@ -21,7 +21,7 @@ When your plan includes multiple writes, present them together:
 
 Don't sequence approvals one at a time — the user wants context, not interruption. A clear plan with visible checklist items gets approved faster than a series of surprise approval prompts.
 
-## Session Longevity
+##### Session Longevity
 
 Long sessions accumulate context. To stay fast:
 - Open sub-agent sessions for independent work instead of doing everything sequentially
diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md
index 3ade5c73..3e6e648b 100644
--- a/crates/tui/src/prompts/modes/plan.md
+++ b/crates/tui/src/prompts/modes/plan.md
@@ -1,4 +1,4 @@
-## Mode: Plan
+##### Mode: Plan
 
 You are running in Plan mode — design before implementing.
 
diff --git a/crates/tui/src/prompts/modes/yolo.md b/crates/tui/src/prompts/modes/yolo.md
index e1f4f795..0e867fb5 100644
--- a/crates/tui/src/prompts/modes/yolo.md
+++ b/crates/tui/src/prompts/modes/yolo.md
@@ -1,4 +1,4 @@
-## Mode: YOLO
+##### Mode: YOLO
 
 You are running in YOLO mode — full autonomy, all actions pre-approved.
 

From 0b5d574e63d1923f1604d456347ee7f0b377754b Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:15:12 +0800
Subject: [PATCH 2/7] =?UTF-8?q?fix(cache):=20address=20CR=20feedback=20?=
 =?UTF-8?q?=E2=80=94=20blank=20lines,=20heading=20hierarchy,=20debug=5Fass?=
 =?UTF-8?q?ert?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add proper blank lines (\n\n) before mode headings in
  render_runtime_policy_reference (CommonMark/GFM compliance).
- Demote subheadings in agent.md from ##### to ###### so they
  nest correctly under the demoted main heading.
- Add debug_assert! in taxonomy_body() to loudly fail when
  render_core_tool_taxonomy_block format changes, preventing
  silent heading-hierarchy breakage.
---
 crates/tui/src/prompts.rs             | 12 +++++++++---
 crates/tui/src/prompts/modes/agent.md |  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 9361d963..e43c99d4 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -719,6 +719,12 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
 /// producing a broken heading hierarchy (## under ####).
 fn taxonomy_body(mode: AppMode) -> String {
     let block = render_core_tool_taxonomy_block(mode);
+    debug_assert!(
+        block.starts_with("## Core Tool Taxonomy\n\n"),
+        "render_core_tool_taxonomy_block format changed — \
+         taxonomy_body expects `## Core Tool Taxonomy\\n\\n` prefix; \
+         update the prefix or the assertion"
+    );
     block
         .strip_prefix("## Core Tool Taxonomy\n\n")
         .unwrap_or(&block)
@@ -748,19 +754,19 @@ pub(crate) fn render_runtime_policy_reference() -> String {
 
     out.push_str("#### agent\n\n");
     out.push_str(&taxonomy_agent);
-    out.push('\n');
+    out.push_str("\n\n");
     out.push_str(AGENT_MODE.trim());
     out.push_str("\n\n");
 
     out.push_str("#### plan\n\n");
     out.push_str(&taxonomy_plan);
-    out.push('\n');
+    out.push_str("\n\n");
     out.push_str(PLAN_MODE.trim());
     out.push_str("\n\n");
 
     out.push_str("#### yolo\n\n");
     out.push_str(&taxonomy_yolo);
-    out.push('\n');
+    out.push_str("\n\n");
     out.push_str(YOLO_MODE.trim());
     out.push_str("\n\n");
 
diff --git a/crates/tui/src/prompts/modes/agent.md b/crates/tui/src/prompts/modes/agent.md
index adcaa8db..38ae028c 100644
--- a/crates/tui/src/prompts/modes/agent.md
+++ b/crates/tui/src/prompts/modes/agent.md
@@ -12,7 +12,7 @@ For simple writes, state the direct edit and proceed through the normal approval
 
 For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy.
 
-##### Efficient Approvals
+###### Efficient Approvals
 
 When your plan includes multiple writes, present them together:
 1. Show `checklist_write` with all write steps listed so the user sees the full scope
@@ -21,7 +21,7 @@ When your plan includes multiple writes, present them together:
 
 Don't sequence approvals one at a time — the user wants context, not interruption. A clear plan with visible checklist items gets approved faster than a series of surprise approval prompts.
 
-##### Session Longevity
+###### Session Longevity
 
 Long sessions accumulate context. To stay fast:
 - Open sub-agent sessions for independent work instead of doing everything sequentially

From 12167b39c380f36b1d8efab3762dd34cbb506504 Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:19:27 +0800
Subject: [PATCH 3/7] refactor(cache): replace taxonomy_body strip hack with
 source-level render_core_tool_taxonomy_body
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add render_core_tool_taxonomy_body(mode) that generates the tool
  taxonomy text without the ## Core Tool Taxonomy heading.
- Refactor render_core_tool_taxonomy_block to use the body function
  internally (DRY).
- Delete taxonomy_body() — a downstream strip_prefix hack that
  worked around the source format instead of fixing it.
- Also removes the now-unnecessary debug_assert! (over-defensive,
  since the two functions are co-located in the same file).
---
 crates/tui/src/prompts.rs | 39 +++++++++++++++++----------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index e43c99d4..032fd07a 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -714,27 +714,10 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
 /// session) so the per-turn `<runtime_prompt>` tag can be a minimal pointer
 /// (`<runtime_prompt mode="yolo" approval="auto"/>`) instead of repeating the
 /// full policy text on every API request.
-/// Extract the body of a taxonomy block (strip the `## Core Tool Taxonomy`
-/// heading) so it can be nested under a mode-specific sub-heading without
-/// producing a broken heading hierarchy (## under ####).
-fn taxonomy_body(mode: AppMode) -> String {
-    let block = render_core_tool_taxonomy_block(mode);
-    debug_assert!(
-        block.starts_with("## Core Tool Taxonomy\n\n"),
-        "render_core_tool_taxonomy_block format changed — \
-         taxonomy_body expects `## Core Tool Taxonomy\\n\\n` prefix; \
-         update the prefix or the assertion"
-    );
-    block
-        .strip_prefix("## Core Tool Taxonomy\n\n")
-        .unwrap_or(&block)
-        .to_string()
-}
-
 pub(crate) fn render_runtime_policy_reference() -> String {
-    let taxonomy_agent = taxonomy_body(AppMode::Agent);
-    let taxonomy_plan = taxonomy_body(AppMode::Plan);
-    let taxonomy_yolo = taxonomy_body(AppMode::Yolo);
+    let taxonomy_agent = render_core_tool_taxonomy_body(AppMode::Agent);
+    let taxonomy_plan = render_core_tool_taxonomy_body(AppMode::Plan);
+    let taxonomy_yolo = render_core_tool_taxonomy_body(AppMode::Yolo);
 
     let mut out = String::with_capacity(8192);
     out.push_str("## Runtime Policy Reference\n\n");
@@ -809,7 +792,10 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];
 const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"];
 const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"];
 
-pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
+/// Return the core tool taxonomy body **without** a markdown heading.
+/// Suitable for embedding under a mode-specific sub-heading in the
+/// Runtime Policy Reference without producing a broken heading hierarchy.
+pub(crate) fn render_core_tool_taxonomy_body(mode: AppMode) -> String {
     let core_tools = core_taxonomy_tools_for_mode(mode);
     let mut sentences = Vec::new();
 
@@ -827,7 +813,16 @@ pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
         !sentences.is_empty(),
         "core tool taxonomy has no active tool groups"
     );
-    format!("## Core Tool Taxonomy\n\n{}", sentences.join(" "))
+    sentences.join(" ")
+}
+
+/// Render the full taxonomy block **with** a `## Core Tool Taxonomy` heading.
+/// Kept for backward-compatibility with existing callers (tests, diagnostics).
+pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
+    format!(
+        "## Core Tool Taxonomy\n\n{}",
+        render_core_tool_taxonomy_body(mode)
+    )
 }
 
 fn core_taxonomy_tools_for_mode(mode: AppMode) -> Vec<&'static str> {

From 039abb2ae65fb4d9f6158e6a22e1ab649e4a99e1 Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:20:51 +0800
Subject: [PATCH 4/7] refactor(cache): remove render_core_tool_taxonomy_block,
 inline to body variant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace the 2 remaining test callers with render_core_tool_taxonomy_body
  (neither test depends on the ## heading — they check content only).
- Delete render_core_tool_taxonomy_block — zero production callers after
  the previous refactor.
---
 crates/tui/src/prompts.rs | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 032fd07a..6343f7e9 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -816,15 +816,6 @@ pub(crate) fn render_core_tool_taxonomy_body(mode: AppMode) -> String {
     sentences.join(" ")
 }
 
-/// Render the full taxonomy block **with** a `## Core Tool Taxonomy` heading.
-/// Kept for backward-compatibility with existing callers (tests, diagnostics).
-pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
-    format!(
-        "## Core Tool Taxonomy\n\n{}",
-        render_core_tool_taxonomy_body(mode)
-    )
-}
-
 fn core_taxonomy_tools_for_mode(mode: AppMode) -> Vec<&'static str> {
     let core_tools = crate::core::engine::default_active_native_tool_names();
     core_tools
@@ -1606,7 +1597,7 @@ mod tests {
 
     #[test]
     fn plan_prompt_taxonomy_omits_run_tests() {
-        let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
+        let taxonomy = render_core_tool_taxonomy_body(AppMode::Plan);
         // Plan taxonomy should omit execution tools (verified at the source).
         assert!(
             taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"),
@@ -2056,7 +2047,7 @@ mod tests {
             "base prompt must not contain static CJK priming tokens"
         );
         for mode in [AppMode::Agent, AppMode::Plan, AppMode::Yolo] {
-            let taxonomy = render_core_tool_taxonomy_block(mode);
+            let taxonomy = render_core_tool_taxonomy_body(mode);
             assert!(
                 !contains_cjk(&taxonomy),
                 "tool taxonomy must not contain static CJK priming tokens: {taxonomy:?}"

From c6c3d2cc4dc0b99e365471233c0017f1a8fd917b Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:22:53 +0800
Subject: [PATCH 5/7] refactor(cache): inline single-call helpers, remove dead
 code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Inline mode_prompt_marker_value and approval_prompt_marker_value into
  runtime_prompt_text (each called exactly once).
- Remove default_approval_mode_for_mode — zero callers.
---
 crates/tui/src/core/engine.rs | 22 +++++++---------------
 crates/tui/src/prompts.rs     |  8 --------
 2 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 082ad4c5..2bb975f7 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -2650,27 +2650,19 @@ fn agent_approval_mode_for_turn(
 /// model looks up the corresponding rules from the system prompt.  Reduces
 /// per-request overhead from ~500 tokens to ~12 tokens.
 fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
-    let mode_str = mode_prompt_marker_value(mode);
-    let approval_str = approval_prompt_marker_value(approval_mode);
-    format!(
-        "<runtime_prompt visibility=\"internal\" mode=\"{mode_str}\" approval=\"{approval_str}\"/>"
-    )
-}
-
-fn mode_prompt_marker_value(mode: AppMode) -> &'static str {
-    match mode {
+    let mode_str = match mode {
         AppMode::Agent => "agent",
         AppMode::Plan => "plan",
         AppMode::Yolo => "yolo",
-    }
-}
-
-fn approval_prompt_marker_value(approval_mode: crate::tui::approval::ApprovalMode) -> &'static str {
-    match approval_mode {
+    };
+    let approval_str = match approval_mode {
         crate::tui::approval::ApprovalMode::Auto => "auto",
         crate::tui::approval::ApprovalMode::Suggest => "suggest",
         crate::tui::approval::ApprovalMode::Never => "never",
-    }
+    };
+    format!(
+        "<runtime_prompt visibility=\"internal\" mode=\"{mode_str}\" approval=\"{approval_str}\"/>"
+    )
 }
 
 /// Spawn the engine in a background task
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 6343f7e9..729bbfd5 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -701,14 +701,6 @@ impl Personality {
 
 // ── Composition ───────────────────────────────────────────────────────
 
-fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
-    match mode {
-        AppMode::Agent => ApprovalMode::Suggest,
-        AppMode::Yolo => ApprovalMode::Auto,
-        AppMode::Plan => ApprovalMode::Never,
-    }
-}
-
 /// Generate a static reference block containing all mode and approval policy
 /// descriptions. This lives in the frozen system-prompt prefix (sent once per
 /// session) so the per-turn `<runtime_prompt>` tag can be a minimal pointer

From 7b900b8699a644bb59835d108c4291bddd79f7eb Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 15:26:54 +0800
Subject: [PATCH 6/7] =?UTF-8?q?test(cache):=20rename=20misleading=20test?=
 =?UTF-8?q?=20=E2=80=94=20does=20not=20exercise=20Op::ChangeMode=20dispatc?=
 =?UTF-8?q?h?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Rename mode_change_op_updates_current_mode_and_emits_session_updated
  to current_mode_field_assignment_takes_effect_synchronously.
- The test directly mutates engine.current_mode, not through Op::ChangeMode.
  The dispatch path is separately covered by
  change_mode_op_updates_current_mode_and_emits_status.
---
 crates/tui/src/core/engine/tests.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 6476d0c1..6dafe132 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -2378,7 +2378,10 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
 }
 
 #[test]
-fn mode_change_op_updates_current_mode_and_emits_session_updated() {
+fn current_mode_field_assignment_takes_effect_synchronously() {
+    // Basic unit-level invariant: the current_mode field mutates as expected.
+    // Op::ChangeMode dispatch is exercised by the integration test
+    // change_mode_op_updates_current_mode_and_emits_status.
     let tmp = tempdir().expect("tempdir");
     let config = EngineConfig {
         workspace: tmp.path().to_path_buf(),
@@ -2388,9 +2391,6 @@ fn mode_change_op_updates_current_mode_and_emits_session_updated() {
     let (mut engine, _handle) = Engine::new(config, &Config::default());
     assert_eq!(engine.current_mode, AppMode::Agent);
 
-    // Op::ChangeMode updates current_mode synchronously.
-    // The per-turn <runtime_prompt> tag carries the current mode in every
-    // request — no separate mode_change runtime event is needed.
     engine.current_mode = AppMode::Yolo;
     assert_eq!(engine.current_mode, AppMode::Yolo);
 }

From 55d7499408681a754b36397c176c31884eaa8646 Mon Sep 17 00:00:00 2001
From: zLeoAlex <z.alex.leo@outlook.com>
Date: Sun, 7 Jun 2026 18:31:36 +0800
Subject: [PATCH 7/7] test: add runtime_policy_reference composition test,
 strengthen ChangeMode tests, fix outdated comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add runtime_policy_reference_is_included_in_full_prompt test to verify
  that render_runtime_policy_reference() output lands in the composed
  system prompt. Guards against silent breakage if the push_str() call
  is accidentally removed (all existing tests would still pass).

- Strengthen change_mode_op_updates_current_mode_and_emits_status:
  destructure SessionUpdated to assert that session messages do NOT
  contain <runtime_prompt> tags after mode change — verifying the core
  invariant that Op::ChangeMode does not write session history.

- Extend current_mode_field_assignment_takes_effect_synchronously:
  now also verifies that messages_with_turn_metadata() produces the
  correct runtime tag (mode="yolo" approval="auto") after a mode
  switch, covering the tag-generation mechanism end-to-end.

- Fix outdated comments in composed_prompt_no_longer_inlines_tool_taxonomy
  and plan_prompt_taxonomy_omits_run_tests: replace stale references to
  deleted <mode_prompt> metadata with accurate descriptions of the
  ## Runtime Policy Reference section.
---
 crates/tui/src/core/engine/tests.rs | 55 ++++++++++++++++++++---
 crates/tui/src/prompts.rs           | 70 +++++++++++++++++++++++++++--
 2 files changed, 116 insertions(+), 9 deletions(-)

diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 6dafe132..92b085d4 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -1852,9 +1852,22 @@ async fn change_mode_op_updates_current_mode_and_emits_status() {
         .await
         .expect("session update after mode switch")
         .expect("event");
+    let Event::SessionUpdated { messages, .. } = session_updated else {
+        panic!(
+            "should emit SessionUpdated after mode change, got: {session_updated:?}"
+        );
+    };
     assert!(
-        matches!(session_updated, Event::SessionUpdated { .. }),
-        "should emit SessionUpdated after mode change, got: {session_updated:?}"
+        messages.iter().all(|message| {
+            message.content.iter().all(|block| {
+                !matches!(
+                    block,
+                    ContentBlock::Text { text, .. }
+                        if text.contains("<runtime_prompt")
+                )
+            })
+        }),
+        "runtime prompt tags must not be persisted into session messages after mode change"
     );
 
     // Also expect a status event
@@ -2379,9 +2392,10 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
 
 #[test]
 fn current_mode_field_assignment_takes_effect_synchronously() {
-    // Basic unit-level invariant: the current_mode field mutates as expected.
-    // Op::ChangeMode dispatch is exercised by the integration test
-    // change_mode_op_updates_current_mode_and_emits_status.
+    // Basic unit-level invariant: the current_mode field mutates as expected
+    // and the per-turn <runtime_prompt> tag reflects the current mode.
+    // Op::ChangeMode dispatch through the run loop is exercised by the
+    // integration test change_mode_op_updates_current_mode_and_emits_status.
     let tmp = tempdir().expect("tempdir");
     let config = EngineConfig {
         workspace: tmp.path().to_path_buf(),
@@ -2391,8 +2405,39 @@ fn current_mode_field_assignment_takes_effect_synchronously() {
     let (mut engine, _handle) = Engine::new(config, &Config::default());
     assert_eq!(engine.current_mode, AppMode::Agent);
 
+    // Verify runtime tag in Agent mode
+    let agent_messages = engine.messages_with_turn_metadata();
+    let agent_tag = agent_messages.last().expect("runtime tag message");
+    let ContentBlock::Text { text: agent_text, .. } =
+        agent_tag.content.first().expect("text block")
+    else {
+        panic!("expected text runtime tag in Agent mode");
+    };
+    assert!(
+        agent_text.contains("mode=\"agent\""),
+        "Agent mode should produce runtime tag with mode=\"agent\", got: {agent_text}"
+    );
+
+    // Switch to YOLO
     engine.current_mode = AppMode::Yolo;
     assert_eq!(engine.current_mode, AppMode::Yolo);
+
+    // Verify runtime tag reflects the YOLO mode with auto approval
+    let yolo_messages = engine.messages_with_turn_metadata();
+    let yolo_tag = yolo_messages.last().expect("runtime tag message");
+    let ContentBlock::Text { text: yolo_text, .. } =
+        yolo_tag.content.first().expect("text block")
+    else {
+        panic!("expected text runtime tag in YOLO mode");
+    };
+    assert!(
+        yolo_text.contains("mode=\"yolo\""),
+        "YOLO mode should produce runtime tag with mode=\"yolo\", got: {yolo_text}"
+    );
+    assert!(
+        yolo_text.contains("approval=\"auto\""),
+        "YOLO mode should project auto approval in runtime tag, got: {yolo_text}"
+    );
 }
 
 #[test]
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 729bbfd5..dab8bb30 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -1580,8 +1580,10 @@ mod tests {
             Personality::Calm,
             "deepseek-v4-pro",
         );
-        // The generated "## Core Tool Taxonomy" block now travels in the
-        // request-time <mode_prompt> metadata rather than being prepended here.
+        // The core tool taxonomy (grep_files / git_status / run_tests hints)
+        // is no longer prepended as a standalone "## Core Tool Taxonomy" block.
+        // It now lives inside the "## Runtime Policy Reference" section of the
+        // system prompt, scoped under each mode sub-heading.
         // (The "## Toolbox" section from the Constitutional preamble remains.)
         assert!(!prompt.contains("## Core Tool Taxonomy"));
         assert!(prompt.contains("You are deepseek-v4-pro"));
@@ -1602,8 +1604,9 @@ mod tests {
             "Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell"
         );
         // The taxonomy block is rendered correctly but no longer inlined
-        // into the base system prompt — it travels in request-time
-        // <mode_prompt> metadata instead.
+        // into the base system prompt — it lives inside the
+        // "## Runtime Policy Reference" section of the system prompt,
+        // scoped under each mode sub-heading.
     }
 
     #[test]
@@ -1645,6 +1648,65 @@ mod tests {
         );
     }
 
+    #[test]
+    fn runtime_policy_reference_is_included_in_full_prompt() {
+        let tmp = tempdir().expect("tempdir");
+        let text = match system_prompt_for_mode_with_context_skills_session_and_approval(
+            AppMode::Agent,
+            tmp.path(),
+            None,
+            None,
+            None,
+            PromptSessionContext::default(),
+        ) {
+            SystemPrompt::Text(text) => text,
+            SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
+        };
+
+        assert!(
+            text.contains("## Runtime Policy Reference"),
+            "full system prompt must contain the Runtime Policy Reference lookup table"
+        );
+        assert!(
+            text.contains(
+                "<runtime_prompt visibility=\"internal\" mode=\"<mode>\" approval=\"<approval>\"/>"
+            ),
+            "Runtime Policy Reference must explain the per-turn tag format"
+        );
+        assert!(
+            text.contains("### Modes"),
+            "Runtime Policy Reference must contain the Modes section"
+        );
+        assert!(
+            text.contains("#### agent"),
+            "Runtime Policy Reference must document Agent mode"
+        );
+        assert!(
+            text.contains("#### plan"),
+            "Runtime Policy Reference must document Plan mode"
+        );
+        assert!(
+            text.contains("#### yolo"),
+            "Runtime Policy Reference must document YOLO mode"
+        );
+        assert!(
+            text.contains("### Approval Policies"),
+            "Runtime Policy Reference must contain the Approval Policies section"
+        );
+        assert!(
+            text.contains("#### auto"),
+            "Runtime Policy Reference must document auto approval"
+        );
+        assert!(
+            text.contains("#### suggest"),
+            "Runtime Policy Reference must document suggest approval"
+        );
+        assert!(
+            text.contains("#### never"),
+            "Runtime Policy Reference must document never approval"
+        );
+    }
+
     #[test]
     fn system_prompt_merges_workspace_and_configured_skills_dir() {
         let _env_guard = crate::test_support::lock_test_env();