diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index a890240d..2bb975f7 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1245,16 +1245,8 @@ impl Engine { let _ = self.tx_event.send(Event::AgentList { agents }).await; } Op::ChangeMode { mode } => { - let previous_mode = self.current_mode; self.current_mode = mode; self.emit_session_updated().await; - // Notify the agent that the mode has changed so it can re-evaluate - // any operations that were blocked by the previous mode's policy. - if previous_mode != mode { - let msg = Self::mode_change_runtime_message(previous_mode, mode); - self.session.add_message(msg); - self.emit_session_updated().await; - } let _ = self .tx_event .send(Event::status(format!( @@ -1414,47 +1406,6 @@ impl Engine { .await; } - /// Build a runtime event message notifying the agent that the operating mode has changed. - /// This lets the agent re-evaluate any operations that were blocked by the previous mode. - fn mode_change_runtime_message(previous_mode: AppMode, new_mode: AppMode) -> Message { - let (policy_note, re_eval_hint) = match new_mode { - AppMode::Yolo => ( - "all operations run automatically without requiring user approval", - "Re-evaluate any previously blocked write, patch, or shell operations \ - — they are now auto-approved.", - ), - AppMode::Agent => ( - "read-only operations run silently; writes, patches, and shell \ - commands require user approval", - "Any operations you ran automatically under YOLO mode now require \ - explicit user approval before executing.", - ), - AppMode::Plan => ( - "all writes and patches are blocked; shell and code execution are unavailable", - "Any previously planned operations that require writes or shell access \ - must wait until the mode changes back to Agent or YOLO.", - ), - }; - Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: format!( - "\n\ -This is an internal runtime event, not user input. The operating mode has changed \ -from {previous} mode to {new} mode.\n\n\ -In {new} mode: {policy}\n\n\ -{re_eval}\n\ -", - previous = previous_mode.description(), - new = new_mode.description(), - policy = policy_note, - re_eval = re_eval_hint, - ), - cache_control: None, - }], - } - } - async fn add_session_message(&mut self, message: Message) { self.session.add_message(message); self.emit_session_updated().await; @@ -2692,50 +2643,25 @@ fn agent_approval_mode_for_turn( } } -fn mode_prompt_marker(mode: AppMode) -> String { - format!( - "", - match mode { - AppMode::Agent => "agent", - AppMode::Plan => "plan", - AppMode::Yolo => "yolo", - } - ) -} - -fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String { - format!( - "", - match approval_mode { - crate::tui::approval::ApprovalMode::Auto => "auto", - crate::tui::approval::ApprovalMode::Suggest => "suggest", - crate::tui::approval::ApprovalMode::Never => "never", - } - ) -} - -fn mode_prompt_text(mode: AppMode) -> &'static str { - match mode { - AppMode::Agent => prompts::AGENT_MODE, - AppMode::Plan => prompts::PLAN_MODE, - AppMode::Yolo => prompts::YOLO_MODE, - } -} - +/// Produce a minimal runtime-policy tag for the per-turn transient user message. +/// +/// All mode and approval policy descriptions live in the frozen system-prompt +/// prefix (`render_runtime_policy_reference()`). This tag is a pointer — the +/// model looks up the corresponding rules from the system prompt. Reduces +/// per-request overhead from ~500 tokens to ~12 tokens. fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String { - let marker = mode_prompt_marker(mode); - let mode_text = mode_prompt_text(mode).trim(); - let taxonomy = prompts::render_core_tool_taxonomy_block(mode); - let approval_marker = approval_prompt_marker(approval_mode); - let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim(); + let mode_str = match mode { + AppMode::Agent => "agent", + AppMode::Plan => "plan", + AppMode::Yolo => "yolo", + }; + let approval_str = match approval_mode { + crate::tui::approval::ApprovalMode::Auto => "auto", + crate::tui::approval::ApprovalMode::Suggest => "suggest", + crate::tui::approval::ApprovalMode::Never => "never", + }; format!( - "\n\ -This is runtime control metadata for the current request, not user input. \ -Apply it to the next assistant response and tool calls. It supersedes any \ -earlier mode or approval metadata in the transcript.\n\n\ -{marker}\n{taxonomy}\n{mode_text}\n\n\n\ -{approval_marker}\n{approval_text}\n\n\ -" + "" ) } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 7369c17c..92b085d4 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1748,11 +1748,11 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() { !matches!( block, ContentBlock::Text { text, .. } - if text.contains("")); + assert!(text.contains("mode=\"plan\"")); assert!( - text.contains(""), + text.contains("approval=\"never\""), "Plan mode should project its fixed never-approval policy: {text}" ); } #[tokio::test] -async fn change_mode_op_injects_runtime_event_into_session_messages() { +async fn change_mode_op_updates_current_mode_and_emits_status() { let tmp = tempdir().expect("tempdir"); let config = EngineConfig { workspace: tmp.path().to_path_buf(), @@ -1837,7 +1837,6 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() { let (engine, handle) = Engine::new(config, &Config::default()); let run = tokio::spawn(engine.run()); - // Switch from default Agent → YOLO handle .send(Op::ChangeMode { mode: AppMode::Yolo, @@ -1845,40 +1844,43 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() { .await .expect("send change mode"); - // Collect session-updated events until we see the injected message - let messages = { - let mut rx = handle.rx_event.write().await; - loop { - let event = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) - .await - .expect("session update after mode switch") - .expect("event"); - if let Event::SessionUpdated { messages, .. } = event { - // The last message should be our runtime event - if let Some(last) = messages.last() - && let ContentBlock::Text { text, .. } = - last.content.first().expect("text block") - && text.contains("kind=\"mode_change\"") - { - break messages; - } - } - } + // Expect a SessionUpdated event confirming the mode change (the + // per-turn tag carries the mode in every request, + // so no separate persistence of a mode_change runtime event is needed). + let mut rx = handle.rx_event.write().await; + let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv()) + .await + .expect("session update after mode switch") + .expect("event"); + let Event::SessionUpdated { messages, .. } = session_updated else { + panic!( + "should emit SessionUpdated after mode change, got: {session_updated:?}" + ); }; - run.abort(); + assert!( + messages.iter().all(|message| { + message.content.iter().all(|block| { + !matches!( + block, + ContentBlock::Text { text, .. } + if text.contains(" tag reflects the current mode. + // Op::ChangeMode dispatch through the run loop is exercised by the + // integration test change_mode_op_updates_current_mode_and_emits_status. + let tmp = tempdir().expect("tempdir"); + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + model: "deepseek-v4-pro".to_string(), + ..Default::default() }; + let (mut engine, _handle) = Engine::new(config, &Config::default()); + assert_eq!(engine.current_mode, AppMode::Agent); + // Verify runtime tag in Agent mode + let agent_messages = engine.messages_with_turn_metadata(); + let agent_tag = agent_messages.last().expect("runtime tag message"); + let ContentBlock::Text { text: agent_text, .. } = + agent_tag.content.first().expect("text block") + else { + panic!("expected text runtime tag in Agent mode"); + }; assert!( - text.contains("codewhale:runtime_event"), - "should be a runtime event message" + agent_text.contains("mode=\"agent\""), + "Agent mode should produce runtime tag with mode=\"agent\", got: {agent_text}" + ); + + // Switch to YOLO + engine.current_mode = AppMode::Yolo; + assert_eq!(engine.current_mode, AppMode::Yolo); + + // Verify runtime tag reflects the YOLO mode with auto approval + let yolo_messages = engine.messages_with_turn_metadata(); + let yolo_tag = yolo_messages.last().expect("runtime tag message"); + let ContentBlock::Text { text: yolo_text, .. } = + yolo_tag.content.first().expect("text block") + else { + panic!("expected text runtime tag in YOLO mode"); + }; + assert!( + yolo_text.contains("mode=\"yolo\""), + "YOLO mode should produce runtime tag with mode=\"yolo\", got: {yolo_text}" ); assert!( - text.contains("kind=\"mode_change\""), - "should have mode_change kind" - ); - assert!( - text.contains("Agent mode") && text.contains("YOLO mode"), - "should mention both previous and new mode: {text}" - ); - assert!( - text.contains("Re-evaluate"), - "should tell agent to re-evaluate blocked operations: {text}" + yolo_text.contains("approval=\"auto\""), + "YOLO mode should project auto approval in runtime tag, got: {yolo_text}" ); } diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 07c58793..dab8bb30 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -701,32 +701,65 @@ impl Personality { // ── Composition ─────────────────────────────────────────────────────── -fn mode_prompt(mode: AppMode) -> &'static str { - match mode { - AppMode::Agent => AGENT_MODE, - AppMode::Yolo => YOLO_MODE, - AppMode::Plan => PLAN_MODE, - } -} +/// Generate a static reference block containing all mode and approval policy +/// descriptions. This lives in the frozen system-prompt prefix (sent once per +/// session) so the per-turn `` tag can be a minimal pointer +/// (``) instead of repeating the +/// full policy text on every API request. +pub(crate) fn render_runtime_policy_reference() -> String { + let taxonomy_agent = render_core_tool_taxonomy_body(AppMode::Agent); + let taxonomy_plan = render_core_tool_taxonomy_body(AppMode::Plan); + let taxonomy_yolo = render_core_tool_taxonomy_body(AppMode::Yolo); -fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode { - match mode { - AppMode::Agent => ApprovalMode::Suggest, - AppMode::Yolo => ApprovalMode::Auto, - AppMode::Plan => ApprovalMode::Never, - } -} + let mut out = String::with_capacity(8192); + out.push_str("## Runtime Policy Reference\n\n"); -pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str { - match mode { - AppMode::Yolo => AUTO_APPROVAL, - AppMode::Plan => NEVER_APPROVAL, - AppMode::Agent => match approval_mode { - ApprovalMode::Auto => AUTO_APPROVAL, - ApprovalMode::Suggest => SUGGEST_APPROVAL, - ApprovalMode::Never => NEVER_APPROVAL, - }, - } + // Protocol explanation — how the per-turn tag maps to this reference. + out.push_str( + "Each turn, the latest message in the transcript will contain a \ + `` tag that specifies the currently active mode and \ + approval policy. When you see this tag, look up the corresponding \ + rules below and apply them for the current turn.\n\n\ + The tag format is:\n\ + `\" approval=\"\"/>`\n\n", + ); + + // ── Mode reference ───────────────────────────────────────────────── + out.push_str("### Modes\n\n"); + + out.push_str("#### agent\n\n"); + out.push_str(&taxonomy_agent); + out.push_str("\n\n"); + out.push_str(AGENT_MODE.trim()); + out.push_str("\n\n"); + + out.push_str("#### plan\n\n"); + out.push_str(&taxonomy_plan); + out.push_str("\n\n"); + out.push_str(PLAN_MODE.trim()); + out.push_str("\n\n"); + + out.push_str("#### yolo\n\n"); + out.push_str(&taxonomy_yolo); + out.push_str("\n\n"); + out.push_str(YOLO_MODE.trim()); + out.push_str("\n\n"); + + // ── Approval policy reference ────────────────────────────────────── + out.push_str("### Approval Policies\n\n"); + + out.push_str("#### auto\n\n"); + out.push_str(AUTO_APPROVAL.trim()); + out.push_str("\n\n"); + + out.push_str("#### suggest\n\n"); + out.push_str(SUGGEST_APPROVAL.trim()); + out.push_str("\n\n"); + + out.push_str("#### never\n\n"); + out.push_str(NEVER_APPROVAL.trim()); + + out } /// Compose the full system prompt in deterministic order: @@ -751,7 +784,10 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"]; const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"]; const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"]; -pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String { +/// Return the core tool taxonomy body **without** a markdown heading. +/// Suitable for embedding under a mode-specific sub-heading in the +/// Runtime Policy Reference without producing a broken heading hierarchy. +pub(crate) fn render_core_tool_taxonomy_body(mode: AppMode) -> String { let core_tools = core_taxonomy_tools_for_mode(mode); let mut sentences = Vec::new(); @@ -769,7 +805,7 @@ pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String { !sentences.is_empty(), "core tool taxonomy has no active tool groups" ); - format!("## Core Tool Taxonomy\n\n{}", sentences.join(" ")) + sentences.join(" ") } fn core_taxonomy_tools_for_mode(mode: AppMode) -> Vec<&'static str> { @@ -1165,6 +1201,13 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( full_prompt.push_str("\n\n"); full_prompt.push_str(COMPACT_TEMPLATE); + // 5a. Runtime policy reference — all mode and approval policy descriptions + // live here in the frozen prefix so the per-turn tag + // can be a minimal pointer instead of repeating the full policy text + // on every API request (up to ~500 tokens saved per turn). + full_prompt.push_str("\n\n"); + full_prompt.push_str(&render_runtime_policy_reference()); + // ── Volatile-content boundary ───────────────────────────────────────── // Everything below drifts mid-session and busts the prefix cache for // bytes that follow. All static layers (mode, project context, env, @@ -1537,8 +1580,10 @@ mod tests { Personality::Calm, "deepseek-v4-pro", ); - // The generated "## Core Tool Taxonomy" block now travels in the - // request-time metadata rather than being prepended here. + // The core tool taxonomy (grep_files / git_status / run_tests hints) + // is no longer prepended as a standalone "## Core Tool Taxonomy" block. + // It now lives inside the "## Runtime Policy Reference" section of the + // system prompt, scoped under each mode sub-heading. // (The "## Toolbox" section from the Constitutional preamble remains.) assert!(!prompt.contains("## Core Tool Taxonomy")); assert!(prompt.contains("You are deepseek-v4-pro")); @@ -1546,7 +1591,7 @@ mod tests { #[test] fn plan_prompt_taxonomy_omits_run_tests() { - let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan); + let taxonomy = render_core_tool_taxonomy_body(AppMode::Plan); // Plan taxonomy should omit execution tools (verified at the source). assert!( taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"), @@ -1559,8 +1604,9 @@ mod tests { "Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell" ); // The taxonomy block is rendered correctly but no longer inlined - // into the base system prompt — it travels in request-time - // metadata instead. + // into the base system prompt — it lives inside the + // "## Runtime Policy Reference" section of the system prompt, + // scoped under each mode sub-heading. } #[test] @@ -1602,6 +1648,65 @@ mod tests { ); } + #[test] + fn runtime_policy_reference_is_included_in_full_prompt() { + let tmp = tempdir().expect("tempdir"); + let text = match system_prompt_for_mode_with_context_skills_session_and_approval( + AppMode::Agent, + tmp.path(), + None, + None, + None, + PromptSessionContext::default(), + ) { + SystemPrompt::Text(text) => text, + SystemPrompt::Blocks(_) => panic!("expected text system prompt"), + }; + + assert!( + text.contains("## Runtime Policy Reference"), + "full system prompt must contain the Runtime Policy Reference lookup table" + ); + assert!( + text.contains( + "\" approval=\"\"/>" + ), + "Runtime Policy Reference must explain the per-turn tag format" + ); + assert!( + text.contains("### Modes"), + "Runtime Policy Reference must contain the Modes section" + ); + assert!( + text.contains("#### agent"), + "Runtime Policy Reference must document Agent mode" + ); + assert!( + text.contains("#### plan"), + "Runtime Policy Reference must document Plan mode" + ); + assert!( + text.contains("#### yolo"), + "Runtime Policy Reference must document YOLO mode" + ); + assert!( + text.contains("### Approval Policies"), + "Runtime Policy Reference must contain the Approval Policies section" + ); + assert!( + text.contains("#### auto"), + "Runtime Policy Reference must document auto approval" + ); + assert!( + text.contains("#### suggest"), + "Runtime Policy Reference must document suggest approval" + ); + assert!( + text.contains("#### never"), + "Runtime Policy Reference must document never approval" + ); + } + #[test] fn system_prompt_merges_workspace_and_configured_skills_dir() { let _env_guard = crate::test_support::lock_test_env(); @@ -1996,7 +2101,7 @@ mod tests { "base prompt must not contain static CJK priming tokens" ); for mode in [AppMode::Agent, AppMode::Plan, AppMode::Yolo] { - let taxonomy = render_core_tool_taxonomy_block(mode); + let taxonomy = render_core_tool_taxonomy_body(mode); assert!( !contains_cjk(&taxonomy), "tool taxonomy must not contain static CJK priming tokens: {taxonomy:?}" diff --git a/crates/tui/src/prompts/approvals/auto.md b/crates/tui/src/prompts/approvals/auto.md index 368e826e..338f6e73 100644 --- a/crates/tui/src/prompts/approvals/auto.md +++ b/crates/tui/src/prompts/approvals/auto.md @@ -1,4 +1,4 @@ -## Approval Policy: Auto — Tier 2 (Statute) +##### Approval Policy: Auto — Tier 2 (Statute) All tool calls are pre-approved. You will not see approval prompts — your actions execute immediately. diff --git a/crates/tui/src/prompts/approvals/never.md b/crates/tui/src/prompts/approvals/never.md index 8682bdfe..eb2c0a1c 100644 --- a/crates/tui/src/prompts/approvals/never.md +++ b/crates/tui/src/prompts/approvals/never.md @@ -1,4 +1,4 @@ -## Approval Policy: Never — Tier 2 (Statute) +##### Approval Policy: Never — Tier 2 (Statute) All write operations are blocked. You can read, search, and investigate, but you cannot modify the workspace. diff --git a/crates/tui/src/prompts/approvals/suggest.md b/crates/tui/src/prompts/approvals/suggest.md index dcaa4f3e..5af78956 100644 --- a/crates/tui/src/prompts/approvals/suggest.md +++ b/crates/tui/src/prompts/approvals/suggest.md @@ -1,4 +1,4 @@ -## Approval Policy: Suggest — Tier 2 (Statute) +##### Approval Policy: Suggest — Tier 2 (Statute) Read-only operations run silently. Write operations (file edits, patches, shell execution, sub-agent spawns, CSV batches) require user approval before executing. diff --git a/crates/tui/src/prompts/modes/agent.md b/crates/tui/src/prompts/modes/agent.md index 7e591799..38ae028c 100644 --- a/crates/tui/src/prompts/modes/agent.md +++ b/crates/tui/src/prompts/modes/agent.md @@ -1,4 +1,4 @@ -## Mode: Agent +##### Mode: Agent You are running in Agent mode — autonomous task execution with tool access. @@ -12,7 +12,7 @@ For simple writes, state the direct edit and proceed through the normal approval For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy. -## Efficient Approvals +###### Efficient Approvals When your plan includes multiple writes, present them together: 1. Show `checklist_write` with all write steps listed so the user sees the full scope @@ -21,7 +21,7 @@ When your plan includes multiple writes, present them together: Don't sequence approvals one at a time — the user wants context, not interruption. A clear plan with visible checklist items gets approved faster than a series of surprise approval prompts. -## Session Longevity +###### Session Longevity Long sessions accumulate context. To stay fast: - Open sub-agent sessions for independent work instead of doing everything sequentially diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md index 3ade5c73..3e6e648b 100644 --- a/crates/tui/src/prompts/modes/plan.md +++ b/crates/tui/src/prompts/modes/plan.md @@ -1,4 +1,4 @@ -## Mode: Plan +##### Mode: Plan You are running in Plan mode — design before implementing. diff --git a/crates/tui/src/prompts/modes/yolo.md b/crates/tui/src/prompts/modes/yolo.md index e1f4f795..0e867fb5 100644 --- a/crates/tui/src/prompts/modes/yolo.md +++ b/crates/tui/src/prompts/modes/yolo.md @@ -1,4 +1,4 @@ -## Mode: YOLO +##### Mode: YOLO You are running in YOLO mode — full autonomy, all actions pre-approved.