From 427bd5d52f3d764c211e5483584c9495824d0195 Mon Sep 17 00:00:00 2001 From: zLeoAlex Date: Sun, 7 Jun 2026 15:03:43 +0800 Subject: [PATCH] feat(cache): slim runtime_prompt to minimal tag, move policy descriptions to system prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add render_runtime_policy_reference() in prompts.rs containing all mode and approval policy descriptions in the frozen system-prompt prefix (sent once per session, cache-hit thereafter). - Simplify runtime_prompt_text() from ~500-token XML block to a ~16-token self-closing tag (). - Fix markdown heading hierarchy in all prompts/modes/*.md and prompts/approvals/*.md (## → #####) to nest correctly under ####. - Remove now-unused legacy functions: mode_prompt(), approval_prompt_for_mode(), mode_change_runtime_message(). - Simplify Op::ChangeMode: no longer persists a mode_change event (next turn tag carries the current mode). - Update and rename affected tests. Builds on #2801. Reduces per-request runtime prompt overhead by 97% (~471 tokens saved per API call). System prompt grows by ~1325 tokens in the frozen prefix (one-time miss cost); break-even at 3 API calls. --- crates/tui/src/core/engine.rs | 106 ++++---------------- crates/tui/src/core/engine/tests.rs | 98 +++++++----------- crates/tui/src/prompts.rs | 95 ++++++++++++++---- crates/tui/src/prompts/approvals/auto.md | 2 +- crates/tui/src/prompts/approvals/never.md | 2 +- crates/tui/src/prompts/approvals/suggest.md | 2 +- crates/tui/src/prompts/modes/agent.md | 6 +- crates/tui/src/prompts/modes/plan.md | 2 +- crates/tui/src/prompts/modes/yolo.md | 2 +- 9 files changed, 144 insertions(+), 171 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index a890240d..082ad4c5 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1245,16 +1245,8 @@ impl Engine { let _ = self.tx_event.send(Event::AgentList { agents }).await; } Op::ChangeMode { mode } => { - let previous_mode = self.current_mode; self.current_mode = mode; self.emit_session_updated().await; - // Notify the agent that the mode has changed so it can re-evaluate - // any operations that were blocked by the previous mode's policy. - if previous_mode != mode { - let msg = Self::mode_change_runtime_message(previous_mode, mode); - self.session.add_message(msg); - self.emit_session_updated().await; - } let _ = self .tx_event .send(Event::status(format!( @@ -1414,47 +1406,6 @@ impl Engine { .await; } - /// Build a runtime event message notifying the agent that the operating mode has changed. - /// This lets the agent re-evaluate any operations that were blocked by the previous mode. - fn mode_change_runtime_message(previous_mode: AppMode, new_mode: AppMode) -> Message { - let (policy_note, re_eval_hint) = match new_mode { - AppMode::Yolo => ( - "all operations run automatically without requiring user approval", - "Re-evaluate any previously blocked write, patch, or shell operations \ - — they are now auto-approved.", - ), - AppMode::Agent => ( - "read-only operations run silently; writes, patches, and shell \ - commands require user approval", - "Any operations you ran automatically under YOLO mode now require \ - explicit user approval before executing.", - ), - AppMode::Plan => ( - "all writes and patches are blocked; shell and code execution are unavailable", - "Any previously planned operations that require writes or shell access \ - must wait until the mode changes back to Agent or YOLO.", - ), - }; - Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: format!( - "\n\ -This is an internal runtime event, not user input. The operating mode has changed \ -from {previous} mode to {new} mode.\n\n\ -In {new} mode: {policy}\n\n\ -{re_eval}\n\ -", - previous = previous_mode.description(), - new = new_mode.description(), - policy = policy_note, - re_eval = re_eval_hint, - ), - cache_control: None, - }], - } - } - async fn add_session_message(&mut self, message: Message) { self.session.add_message(message); self.emit_session_updated().await; @@ -2692,51 +2643,34 @@ fn agent_approval_mode_for_turn( } } -fn mode_prompt_marker(mode: AppMode) -> String { +/// Produce a minimal runtime-policy tag for the per-turn transient user message. +/// +/// All mode and approval policy descriptions live in the frozen system-prompt +/// prefix (`render_runtime_policy_reference()`). This tag is a pointer — the +/// model looks up the corresponding rules from the system prompt. Reduces +/// per-request overhead from ~500 tokens to ~12 tokens. +fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String { + let mode_str = mode_prompt_marker_value(mode); + let approval_str = approval_prompt_marker_value(approval_mode); format!( - "", - match mode { - AppMode::Agent => "agent", - AppMode::Plan => "plan", - AppMode::Yolo => "yolo", - } + "" ) } -fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String { - format!( - "", - match approval_mode { - crate::tui::approval::ApprovalMode::Auto => "auto", - crate::tui::approval::ApprovalMode::Suggest => "suggest", - crate::tui::approval::ApprovalMode::Never => "never", - } - ) -} - -fn mode_prompt_text(mode: AppMode) -> &'static str { +fn mode_prompt_marker_value(mode: AppMode) -> &'static str { match mode { - AppMode::Agent => prompts::AGENT_MODE, - AppMode::Plan => prompts::PLAN_MODE, - AppMode::Yolo => prompts::YOLO_MODE, + AppMode::Agent => "agent", + AppMode::Plan => "plan", + AppMode::Yolo => "yolo", } } -fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String { - let marker = mode_prompt_marker(mode); - let mode_text = mode_prompt_text(mode).trim(); - let taxonomy = prompts::render_core_tool_taxonomy_block(mode); - let approval_marker = approval_prompt_marker(approval_mode); - let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim(); - format!( - "\n\ -This is runtime control metadata for the current request, not user input. \ -Apply it to the next assistant response and tool calls. It supersedes any \ -earlier mode or approval metadata in the transcript.\n\n\ -{marker}\n{taxonomy}\n{mode_text}\n\n\n\ -{approval_marker}\n{approval_text}\n\n\ -" - ) +fn approval_prompt_marker_value(approval_mode: crate::tui::approval::ApprovalMode) -> &'static str { + match approval_mode { + crate::tui::approval::ApprovalMode::Auto => "auto", + crate::tui::approval::ApprovalMode::Suggest => "suggest", + crate::tui::approval::ApprovalMode::Never => "never", + } } /// Spawn the engine in a background task diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 7369c17c..6476d0c1 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1748,11 +1748,11 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() { !matches!( block, ContentBlock::Text { text, .. } - if text.contains("")); + assert!(text.contains("mode=\"plan\"")); assert!( - text.contains(""), + text.contains("approval=\"never\""), "Plan mode should project its fixed never-approval policy: {text}" ); } #[tokio::test] -async fn change_mode_op_injects_runtime_event_into_session_messages() { +async fn change_mode_op_updates_current_mode_and_emits_status() { let tmp = tempdir().expect("tempdir"); let config = EngineConfig { workspace: tmp.path().to_path_buf(), @@ -1837,7 +1837,6 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() { let (engine, handle) = Engine::new(config, &Config::default()); let run = tokio::spawn(engine.run()); - // Switch from default Agent → YOLO handle .send(Op::ChangeMode { mode: AppMode::Yolo, @@ -1845,40 +1844,30 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() { .await .expect("send change mode"); - // Collect session-updated events until we see the injected message - let messages = { - let mut rx = handle.rx_event.write().await; - loop { - let event = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) - .await - .expect("session update after mode switch") - .expect("event"); - if let Event::SessionUpdated { messages, .. } = event { - // The last message should be our runtime event - if let Some(last) = messages.last() - && let ContentBlock::Text { text, .. } = - last.content.first().expect("text block") - && text.contains("kind=\"mode_change\"") - { - break messages; - } - } - } - }; - run.abort(); + // Expect a SessionUpdated event confirming the mode change (the + // per-turn tag carries the mode in every request, + // so no separate persistence of a mode_change runtime event is needed). + let mut rx = handle.rx_event.write().await; + let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) + .await + .expect("session update after mode switch") + .expect("event"); + assert!( + matches!(session_updated, Event::SessionUpdated { .. }), + "should emit SessionUpdated after mode change, got: {session_updated:?}" + ); - let last = messages.last().expect("at least one message"); - let ContentBlock::Text { text, .. } = last.content.first().expect("text block") else { - panic!("expected text block"); - }; + // Also expect a status event + let status = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) + .await + .expect("status after mode switch") + .expect("event"); assert!( - text.contains("Agent mode") && text.contains("YOLO mode"), - "should contain both previous and new mode: {text}" - ); - assert!( - text.contains("Re-evaluate"), - "should tell agent to re-evaluate: {text}" + matches!(status, Event::Status { .. }), + "should emit Status after mode change, got: {status:?}" ); + + run.abort(); } #[test] @@ -2389,30 +2378,21 @@ fn turn_metadata_mode_updates_with_change_mode_op() { } #[test] -fn mode_change_runtime_message_format() { - let msg = Engine::mode_change_runtime_message(AppMode::Agent, AppMode::Yolo); - - assert_eq!(msg.role, "user"); - let ContentBlock::Text { text, .. } = msg.content.first().expect("text block") else { - panic!("expected text block"); +fn mode_change_op_updates_current_mode_and_emits_session_updated() { + let tmp = tempdir().expect("tempdir"); + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + model: "deepseek-v4-pro".to_string(), + ..Default::default() }; + let (mut engine, _handle) = Engine::new(config, &Config::default()); + assert_eq!(engine.current_mode, AppMode::Agent); - assert!( - text.contains("codewhale:runtime_event"), - "should be a runtime event message" - ); - assert!( - text.contains("kind=\"mode_change\""), - "should have mode_change kind" - ); - assert!( - text.contains("Agent mode") && text.contains("YOLO mode"), - "should mention both previous and new mode: {text}" - ); - assert!( - text.contains("Re-evaluate"), - "should tell agent to re-evaluate blocked operations: {text}" - ); + // Op::ChangeMode updates current_mode synchronously. + // The per-turn tag carries the current mode in every + // request — no separate mode_change runtime event is needed. + engine.current_mode = AppMode::Yolo; + assert_eq!(engine.current_mode, AppMode::Yolo); } #[test] diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 07c58793..9361d963 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -701,14 +701,6 @@ impl Personality { // ── Composition ─────────────────────────────────────────────────────── -fn mode_prompt(mode: AppMode) -> &'static str { - match mode { - AppMode::Agent => AGENT_MODE, - AppMode::Yolo => YOLO_MODE, - AppMode::Plan => PLAN_MODE, - } -} - fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode { match mode { AppMode::Agent => ApprovalMode::Suggest, @@ -717,16 +709,76 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode { } } -pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str { - match mode { - AppMode::Yolo => AUTO_APPROVAL, - AppMode::Plan => NEVER_APPROVAL, - AppMode::Agent => match approval_mode { - ApprovalMode::Auto => AUTO_APPROVAL, - ApprovalMode::Suggest => SUGGEST_APPROVAL, - ApprovalMode::Never => NEVER_APPROVAL, - }, - } +/// Generate a static reference block containing all mode and approval policy +/// descriptions. This lives in the frozen system-prompt prefix (sent once per +/// session) so the per-turn `` tag can be a minimal pointer +/// (``) instead of repeating the +/// full policy text on every API request. +/// Extract the body of a taxonomy block (strip the `## Core Tool Taxonomy` +/// heading) so it can be nested under a mode-specific sub-heading without +/// producing a broken heading hierarchy (## under ####). +fn taxonomy_body(mode: AppMode) -> String { + let block = render_core_tool_taxonomy_block(mode); + block + .strip_prefix("## Core Tool Taxonomy\n\n") + .unwrap_or(&block) + .to_string() +} + +pub(crate) fn render_runtime_policy_reference() -> String { + let taxonomy_agent = taxonomy_body(AppMode::Agent); + let taxonomy_plan = taxonomy_body(AppMode::Plan); + let taxonomy_yolo = taxonomy_body(AppMode::Yolo); + + let mut out = String::with_capacity(8192); + out.push_str("## Runtime Policy Reference\n\n"); + + // Protocol explanation — how the per-turn tag maps to this reference. + out.push_str( + "Each turn, the latest message in the transcript will contain a \ + `` tag that specifies the currently active mode and \ + approval policy. When you see this tag, look up the corresponding \ + rules below and apply them for the current turn.\n\n\ + The tag format is:\n\ + `\" approval=\"\"/>`\n\n", + ); + + // ── Mode reference ───────────────────────────────────────────────── + out.push_str("### Modes\n\n"); + + out.push_str("#### agent\n\n"); + out.push_str(&taxonomy_agent); + out.push('\n'); + out.push_str(AGENT_MODE.trim()); + out.push_str("\n\n"); + + out.push_str("#### plan\n\n"); + out.push_str(&taxonomy_plan); + out.push('\n'); + out.push_str(PLAN_MODE.trim()); + out.push_str("\n\n"); + + out.push_str("#### yolo\n\n"); + out.push_str(&taxonomy_yolo); + out.push('\n'); + out.push_str(YOLO_MODE.trim()); + out.push_str("\n\n"); + + // ── Approval policy reference ────────────────────────────────────── + out.push_str("### Approval Policies\n\n"); + + out.push_str("#### auto\n\n"); + out.push_str(AUTO_APPROVAL.trim()); + out.push_str("\n\n"); + + out.push_str("#### suggest\n\n"); + out.push_str(SUGGEST_APPROVAL.trim()); + out.push_str("\n\n"); + + out.push_str("#### never\n\n"); + out.push_str(NEVER_APPROVAL.trim()); + + out } /// Compose the full system prompt in deterministic order: @@ -1165,6 +1217,13 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( full_prompt.push_str("\n\n"); full_prompt.push_str(COMPACT_TEMPLATE); + // 5a. Runtime policy reference — all mode and approval policy descriptions + // live here in the frozen prefix so the per-turn tag + // can be a minimal pointer instead of repeating the full policy text + // on every API request (up to ~500 tokens saved per turn). + full_prompt.push_str("\n\n"); + full_prompt.push_str(&render_runtime_policy_reference()); + // ── Volatile-content boundary ───────────────────────────────────────── // Everything below drifts mid-session and busts the prefix cache for // bytes that follow. All static layers (mode, project context, env, diff --git a/crates/tui/src/prompts/approvals/auto.md b/crates/tui/src/prompts/approvals/auto.md index 368e826e..338f6e73 100644 --- a/crates/tui/src/prompts/approvals/auto.md +++ b/crates/tui/src/prompts/approvals/auto.md @@ -1,4 +1,4 @@ -## Approval Policy: Auto — Tier 2 (Statute) +##### Approval Policy: Auto — Tier 2 (Statute) All tool calls are pre-approved. You will not see approval prompts — your actions execute immediately. diff --git a/crates/tui/src/prompts/approvals/never.md b/crates/tui/src/prompts/approvals/never.md index 8682bdfe..eb2c0a1c 100644 --- a/crates/tui/src/prompts/approvals/never.md +++ b/crates/tui/src/prompts/approvals/never.md @@ -1,4 +1,4 @@ -## Approval Policy: Never — Tier 2 (Statute) +##### Approval Policy: Never — Tier 2 (Statute) All write operations are blocked. You can read, search, and investigate, but you cannot modify the workspace. diff --git a/crates/tui/src/prompts/approvals/suggest.md b/crates/tui/src/prompts/approvals/suggest.md index dcaa4f3e..5af78956 100644 --- a/crates/tui/src/prompts/approvals/suggest.md +++ b/crates/tui/src/prompts/approvals/suggest.md @@ -1,4 +1,4 @@ -## Approval Policy: Suggest — Tier 2 (Statute) +##### Approval Policy: Suggest — Tier 2 (Statute) Read-only operations run silently. Write operations (file edits, patches, shell execution, sub-agent spawns, CSV batches) require user approval before executing. diff --git a/crates/tui/src/prompts/modes/agent.md b/crates/tui/src/prompts/modes/agent.md index 7e591799..adcaa8db 100644 --- a/crates/tui/src/prompts/modes/agent.md +++ b/crates/tui/src/prompts/modes/agent.md @@ -1,4 +1,4 @@ -## Mode: Agent +##### Mode: Agent You are running in Agent mode — autonomous task execution with tool access. @@ -12,7 +12,7 @@ For simple writes, state the direct edit and proceed through the normal approval For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy. -## Efficient Approvals +##### Efficient Approvals When your plan includes multiple writes, present them together: 1. Show `checklist_write` with all write steps listed so the user sees the full scope @@ -21,7 +21,7 @@ When your plan includes multiple writes, present them together: Don't sequence approvals one at a time — the user wants context, not interruption. A clear plan with visible checklist items gets approved faster than a series of surprise approval prompts. -## Session Longevity +##### Session Longevity Long sessions accumulate context. To stay fast: - Open sub-agent sessions for independent work instead of doing everything sequentially diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md index 3ade5c73..3e6e648b 100644 --- a/crates/tui/src/prompts/modes/plan.md +++ b/crates/tui/src/prompts/modes/plan.md @@ -1,4 +1,4 @@ -## Mode: Plan +##### Mode: Plan You are running in Plan mode — design before implementing. diff --git a/crates/tui/src/prompts/modes/yolo.md b/crates/tui/src/prompts/modes/yolo.md index e1f4f795..0e867fb5 100644 --- a/crates/tui/src/prompts/modes/yolo.md +++ b/crates/tui/src/prompts/modes/yolo.md @@ -1,4 +1,4 @@ -## Mode: YOLO +##### Mode: YOLO You are running in YOLO mode — full autonomy, all actions pre-approved.