From 1f9f860a3e8a8009dc05cbcc2bc5e5b17b9c8af0 Mon Sep 17 00:00:00 2001 From: LeoAlex0 <31839998+LeoAlex0@users.noreply.github.com> Date: Thu, 4 Jun 2026 02:52:27 +0800 Subject: [PATCH] feat(cache): project mode prompts per request Keep the stable system prompt mode-agnostic and project mode, approval policy, and tool taxonomy as request-time runtime metadata. This avoids mutating stored history while preserving provider chat-template compatibility. Harvested from PR #2687 with stewardship turn-metadata cache tests preserved. The replan replay guard remains <= 2, and cache inspect now asserts tool-result budget metadata for both deduplicated=false and deduplicated=true. (cherry picked from commit 77943304e637545b441ac135f06977065c4b350f) --- CHANGELOG.md | 4 + crates/tui/CHANGELOG.md | 4 + crates/tui/src/commands/debug.rs | 23 +- crates/tui/src/core/engine.rs | 138 +++++++-- crates/tui/src/core/engine/capacity_flow.rs | 18 +- crates/tui/src/core/engine/tests.rs | 307 +++++++++++--------- crates/tui/src/core/engine/tool_catalog.rs | 19 +- crates/tui/src/core/engine/turn_loop.rs | 18 +- crates/tui/src/core/ops.rs | 2 +- crates/tui/src/core/session.rs | 4 +- crates/tui/src/prompts.rs | 147 ++++------ 11 files changed, 398 insertions(+), 286 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 929cdbd5..201e648b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Appended volatile `` blocks after user text in outgoing user message content arrays so provider prefix caches can keep matching the stable user-input prefix across date, route, and working-set changes. +- Projected mode, approval, and tool-taxonomy prompt metadata per request + instead of mutating stored system prompts, keeping provider prefix-cache + inputs byte-stable while preserving mode-specific instructions (#2687). + Thanks @LeoAlex0 for the implementation. - Softened contribution intake automation: external issues now receive a warm triage note and are never auto-closed by the contribution gate, while the PR gate copy makes clear that dry-run observations are about maintainer safety, diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 929cdbd5..201e648b 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Appended volatile `` blocks after user text in outgoing user message content arrays so provider prefix caches can keep matching the stable user-input prefix across date, route, and working-set changes. +- Projected mode, approval, and tool-taxonomy prompt metadata per request + instead of mutating stored system prompts, keeping provider prefix-cache + inputs byte-stable while preserving mode-specific instructions (#2687). + Thanks @LeoAlex0 for the implementation. - Softened contribution intake automation: external issues now receive a warm triage note and are never auto-closed by the contribution gate, while the PR gate copy makes clear that dry-run observations are about maintainer safety, diff --git a/crates/tui/src/commands/debug.rs b/crates/tui/src/commands/debug.rs index eee41bb5..13c18ecb 100644 --- a/crates/tui/src/commands/debug.rs +++ b/crates/tui/src/commands/debug.rs @@ -1225,10 +1225,25 @@ mod tests { let result = cache(&mut app, Some("inspect")); let msg = result.message.expect("inspect output"); - assert!(msg.contains("original_chars=14000"), "got: {msg}"); - assert!(msg.contains("truncated=true"), "got: {msg}"); - assert!(msg.contains("deduplicated=false"), "got: {msg}"); - assert!(msg.contains("deduplicated=true"), "got: {msg}"); + let tool_budget_lines: Vec<_> = msg + .lines() + .filter(|line| line.contains("original_chars=14000")) + .collect(); + assert_eq!(tool_budget_lines.len(), 2, "got: {msg}"); + + let first_sighting = tool_budget_lines + .iter() + .find(|line| line.contains("deduplicated=false")) + .expect("first tool-result sighting should report non-dedup metadata"); + assert!(first_sighting.contains("sent_chars="), "got: {msg}"); + assert!(first_sighting.contains("truncated=true"), "got: {msg}"); + + let repeat_sighting = tool_budget_lines + .iter() + .find(|line| line.contains("deduplicated=true")) + .expect("repeat tool-result sighting should report dedup metadata"); + assert!(repeat_sighting.contains("sent_chars="), "got: {msg}"); + assert!(repeat_sighting.contains("truncated=false"), "got: {msg}"); } #[test] diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index ef92332a..96af3169 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -687,7 +687,6 @@ impl Engine { show_thinking: config.show_thinking, allow_shell: config.allow_shell, }, - session.approval_mode, ); let stable_prompt = Some(system_prompt); session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref())); @@ -853,11 +852,12 @@ impl Engine { self.session.trust_mode = trust_mode; self.config.trust_mode = trust_mode; self.session.auto_approve = auto_approve; - self.session.approval_mode = if auto_approve { - crate::tui::approval::ApprovalMode::Auto - } else { - approval_mode - }; + let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode); + // Only track the Agent-mode approval — Yolo/Plan have fixed + // approval policies that are derived from the mode itself. + if mode == AppMode::Agent { + self.session.approval_mode = agent_approval_mode; + } let _ = self .tx_event @@ -1236,7 +1236,6 @@ impl Engine { Op::ChangeMode { mode } => { let previous_mode = self.current_mode; self.current_mode = mode; - self.refresh_system_prompt(mode); self.emit_session_updated().await; // Notify the agent that the mode has changed so it can re-evaluate // any operations that were blocked by the previous mode's policy. @@ -1253,11 +1252,11 @@ impl Engine { ))) .await; } - Op::SetModel { model, mode } => { + Op::SetModel { model, mode: _ } => { self.session.auto_model = model.trim().eq_ignore_ascii_case("auto"); self.session.model = model; self.config.model.clone_from(&self.session.model); - self.refresh_system_prompt(mode); + self.refresh_system_prompt(); self.emit_session_updated().await; let _ = self .tx_event @@ -1304,6 +1303,10 @@ impl Engine { self.session.compaction_summary_prompt = extract_compaction_summary_prompt(system_prompt.clone()); self.session.system_prompt = system_prompt; + self.session.last_system_prompt_hash = + Some(system_prompt_hash(self.session.system_prompt.as_ref())); + // Host-supplied prompts are persisted prefixes. Keep them + // byte-stable; mode/runtime state is projected per request. self.session.system_prompt_override = system_prompt_override && self.session.system_prompt.is_some(); self.session.auto_model = model.trim().eq_ignore_ascii_case("auto"); @@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\ } } + fn runtime_prompt_message(&self) -> Message { + let mode = self.current_mode; + let approval_mode = approval_mode_for(mode, self.session.approval_mode); + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: runtime_prompt_text(mode, approval_mode), + cache_control: None, + }], + } + } + fn user_text_message_with_turn_metadata(&self, text: String) -> Message { self.user_text_message_with_turn_metadata_for_route( text, @@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\ .observe_user_message(&content, &self.session.workspace); let force_update_plan_first = should_force_update_plan_first(mode, &content); + let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode); + self.session.auto_approve = auto_approve; + // Only track the Agent-mode approval — Yolo/Plan have fixed + // approval policies that are derived from the mode itself. + if mode == AppMode::Agent { + self.session.approval_mode = agent_approval_mode; + } + // Add user message to session let user_msg = self.user_text_message_with_turn_metadata_for_route( content, @@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\ self.config.trust_mode = trust_mode; self.config.translation_enabled = translation_enabled; self.config.show_thinking = show_thinking; - self.session.auto_approve = auto_approve; - self.session.approval_mode = if auto_approve { - crate::tui::approval::ApprovalMode::Auto - } else { - approval_mode - }; - // Update system prompt to match current mode and include persisted compaction context. - self.refresh_system_prompt(mode); + // Refresh stable prompt context. Current mode is carried by the + // request-time runtime prompt projection. + self.refresh_system_prompt(); self.emit_session_updated().await; // Build tool registry and tool list for the current mode @@ -2430,8 +2448,8 @@ In {new} mode: {policy}\n\n\ ))) .await; } - /// Refresh the system prompt based on current mode and context. - fn refresh_system_prompt(&mut self, mode: AppMode) { + /// Refresh the stable system prompt based on current non-mode context. + fn refresh_system_prompt(&mut self) { let user_memory_block = crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path); let prompt_goal_objective = goal_objective_for_prompt( @@ -2439,7 +2457,7 @@ In {new} mode: {policy}\n\n\ &self.config.goal_state, ); let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval( - mode, + AppMode::Agent, &self.config.workspace, None, Some(&self.config.skills_dir), @@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\ show_thinking: self.config.show_thinking, allow_shell: self.session.allow_shell, }, - self.session.approval_mode, ); let mut stable_prompt = merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone()); @@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\ let stable_hash = system_prompt_hash(stable_prompt.as_ref()); if self.session.system_prompt_override { - self.session.last_system_prompt_hash = Some(stable_hash); return; } if self.session.last_system_prompt_hash != Some(stable_hash) { @@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt( normalized_goal_objective(configured_goal) } +// ── Mode & approval prompts as request-time runtime metadata ───────── +// +// Mode contracts and approval policies are not persisted in the session +// history and are not sent as extra system messages. Instead, each API +// request projects a transient user-role runtime metadata message at the +// tail. The stable system prompt remains byte-stable, stored history remains +// byte-stable, and strict chat-template providers never see a system message +// outside messages[0]. + +fn approval_mode_for( + mode: AppMode, + session_approval: crate::tui::approval::ApprovalMode, +) -> crate::tui::approval::ApprovalMode { + match mode { + AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto, + AppMode::Plan => crate::tui::approval::ApprovalMode::Never, + AppMode::Agent => session_approval, + } +} + +fn agent_approval_mode_for_turn( + auto_approve: bool, + approval_mode: crate::tui::approval::ApprovalMode, +) -> crate::tui::approval::ApprovalMode { + if auto_approve { + crate::tui::approval::ApprovalMode::Auto + } else { + approval_mode + } +} + +fn mode_prompt_marker(mode: AppMode) -> String { + format!( + "", + match mode { + AppMode::Agent => "agent", + AppMode::Plan => "plan", + AppMode::Yolo => "yolo", + } + ) +} + +fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String { + format!( + "", + match approval_mode { + crate::tui::approval::ApprovalMode::Auto => "auto", + crate::tui::approval::ApprovalMode::Suggest => "suggest", + crate::tui::approval::ApprovalMode::Never => "never", + } + ) +} + +fn mode_prompt_text(mode: AppMode) -> &'static str { + match mode { + AppMode::Agent => prompts::AGENT_MODE, + AppMode::Plan => prompts::PLAN_MODE, + AppMode::Yolo => prompts::YOLO_MODE, + } +} + +fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String { + let marker = mode_prompt_marker(mode); + let mode_text = mode_prompt_text(mode).trim(); + let taxonomy = prompts::render_core_tool_taxonomy_block(mode); + let approval_marker = approval_prompt_marker(approval_mode); + let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim(); + format!( + "\n\ +This is runtime control metadata for the current request, not user input. \ +Apply it to the next assistant response and tool calls. It supersedes any \ +earlier mode or approval metadata in the transcript.\n\n\ +{marker}\n{taxonomy}\n{mode_text}\n\n\n\ +{approval_marker}\n{approval_text}\n\n\ +" + ) +} + /// Spawn the engine in a background task pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle { let (engine, handle) = Engine::new(config, api_config); diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index 50cdeef4..514ad120 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -36,7 +36,7 @@ impl Engine { pub(super) async fn run_capacity_post_tool_checkpoint( &mut self, turn: &TurnContext, - mode: AppMode, + tool_registry: Option<&crate::tools::ToolRegistry>, tool_exec_lock: Arc>, mcp_pool: Option>>, @@ -56,7 +56,6 @@ impl Engine { let _ = self .apply_verify_with_tool_replay( turn, - mode, snapshot.as_ref(), tool_registry, tool_exec_lock, @@ -66,7 +65,7 @@ impl Engine { false } GuardrailAction::VerifyAndReplan => { - self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool") + self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool") .await } GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false, @@ -76,7 +75,7 @@ impl Engine { pub(super) async fn run_capacity_error_escalation_checkpoint( &mut self, turn: &TurnContext, - mode: AppMode, + step_error_count: usize, consecutive_tool_error_steps: u32, error_categories: &[ErrorCategory], @@ -136,7 +135,6 @@ impl Engine { let category_labels: Vec = error_categories.iter().map(|c| c.to_string()).collect(); self.apply_verify_and_replan( turn, - mode, Some(&forced), &format!( "error_escalation: step_errors={}, consecutive_steps={}, categories={}", @@ -385,7 +383,7 @@ impl Engine { &mut self, turn: &TurnContext, client: Option<&DeepSeekClient>, - mode: AppMode, + _mode: AppMode, snapshot: Option<&CapacitySnapshot>, ) -> bool { let before_tokens = self.estimated_input_tokens(); @@ -465,7 +463,7 @@ impl Engine { GuardrailAction::TargetedContextRefresh, None, ))); - self.refresh_system_prompt(mode); + self.refresh_system_prompt(); self.emit_session_updated().await; let after_tokens = self.estimated_input_tokens(); @@ -487,7 +485,6 @@ impl Engine { pub(super) async fn apply_verify_with_tool_replay( &mut self, turn: &TurnContext, - mode: AppMode, snapshot: Option<&CapacitySnapshot>, tool_registry: Option<&crate::tools::ToolRegistry>, tool_exec_lock: Arc>, @@ -617,7 +614,7 @@ impl Engine { GuardrailAction::VerifyWithToolReplay, Some(&verification_note), ))); - self.refresh_system_prompt(mode); + self.refresh_system_prompt(); self.emit_session_updated().await; let after_tokens = self.estimated_input_tokens(); @@ -638,7 +635,6 @@ impl Engine { pub(super) async fn apply_verify_and_replan( &mut self, turn: &TurnContext, - mode: AppMode, snapshot: Option<&CapacitySnapshot>, reason: &str, ) -> bool { @@ -685,7 +681,7 @@ impl Engine { GuardrailAction::VerifyAndReplan, Some("Replan now from canonical state. Keep steps minimal and verifiable."), ))); - self.refresh_system_prompt(mode); + self.refresh_system_prompt(); self.emit_session_updated().await; let _ = self diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 186ba2d0..7369c17c 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -303,7 +303,7 @@ fn refresh_system_prompt_uses_runtime_goal_state() { goal.create("Close the runtime goal loop".to_string(), None); } - engine.refresh_system_prompt(AppMode::Agent); + engine.refresh_system_prompt(); let prompt = match engine.session.system_prompt { Some(SystemPrompt::Text(text)) => text, Some(SystemPrompt::Blocks(blocks)) => blocks @@ -505,116 +505,36 @@ fn tool_exec_outcome_tracks_duration() { #[test] fn core_native_tools_stay_loaded_in_yolo_mode() { let always_load = HashSet::new(); - assert!(!should_default_defer_tool( - "exec_shell", - AppMode::Yolo, - &always_load - )); + assert!(!should_default_defer_tool("exec_shell", &always_load)); // git_blame remains deferred (read-only git history beyond log/show/diff). - assert!(should_default_defer_tool( - "git_blame", - AppMode::Yolo, - &always_load - )); + assert!(should_default_defer_tool("git_blame", &always_load)); } #[test] fn non_yolo_mode_retains_default_defer_policy() { let always_load = HashSet::new(); - assert!(!should_default_defer_tool( - "exec_shell", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "edit_file", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "apply_patch", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "fetch_url", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "git_diff", - AppMode::Agent, - &always_load - )); + assert!(!should_default_defer_tool("exec_shell", &always_load)); + assert!(!should_default_defer_tool("edit_file", &always_load)); + assert!(!should_default_defer_tool("apply_patch", &always_load)); + assert!(!should_default_defer_tool("fetch_url", &always_load)); + assert!(!should_default_defer_tool("git_diff", &always_load)); // #2654: read-only git history joins the active set. - assert!(!should_default_defer_tool( - "git_log", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "git_show", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "git_status", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "run_tests", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "agent_open", - AppMode::Agent, - &always_load - )); + assert!(!should_default_defer_tool("git_log", &always_load)); + assert!(!should_default_defer_tool("git_show", &always_load)); + assert!(!should_default_defer_tool("git_status", &always_load)); + assert!(!should_default_defer_tool("run_tests", &always_load)); + assert!(!should_default_defer_tool("agent_open", &always_load)); // #2605: the fetch/close side of the sub-agent surface must also stay // active so a first `agent_eval`/`agent_close` executes instead of // hydrating its schema and forcing a double-invoke. - assert!(!should_default_defer_tool( - "agent_eval", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "agent_close", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "read_file", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "web_search", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "write_file", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "task_shell_start", - AppMode::Agent, - &always_load - )); - assert!(!should_default_defer_tool( - "task_shell_wait", - AppMode::Agent, - &always_load - )); - assert!(should_default_defer_tool( - "git_blame", - AppMode::Agent, - &always_load - )); + assert!(!should_default_defer_tool("agent_eval", &always_load)); + assert!(!should_default_defer_tool("agent_close", &always_load)); + assert!(!should_default_defer_tool("read_file", &always_load)); + assert!(!should_default_defer_tool("web_search", &always_load)); + assert!(!should_default_defer_tool("write_file", &always_load)); + assert!(!should_default_defer_tool("task_shell_start", &always_load)); + assert!(!should_default_defer_tool("task_shell_wait", &always_load)); + assert!(should_default_defer_tool("git_blame", &always_load)); } #[test] @@ -815,11 +735,7 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() { #[test] fn tools_always_load_overrides_default_native_deferral() { let always_load = HashSet::from(["git_blame".to_string()]); - assert!(!should_default_defer_tool( - "git_blame", - AppMode::Agent, - &always_load - )); + assert!(!should_default_defer_tool("git_blame", &always_load)); } #[test] @@ -1795,15 +1711,20 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() { .await .expect("send change mode"); - let prompt = { + let (_prompt, messages) = { let mut rx = handle.rx_event.write().await; loop { let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv()) .await .expect("session update after mode switch") .expect("event"); - if let Event::SessionUpdated { system_prompt, .. } = event { - break match system_prompt.expect("system prompt") { + if let Event::SessionUpdated { + system_prompt, + messages, + .. + } = event + { + let prompt = match system_prompt.expect("system prompt") { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(blocks) => blocks .into_iter() @@ -1811,13 +1732,98 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() { .collect::>() .join("\n"), }; + break (prompt, messages); } } }; run.abort(); - assert!(prompt.contains("Mode: YOLO")); - assert!(prompt.contains("Approval Policy: Auto")); + assert!( + messages.iter().all(|message| message.role != "system"), + "mode switch must not persist appended system messages: {messages:?}" + ); + assert!( + messages.iter().all(|message| { + message.content.iter().all(|block| { + !matches!( + block, + ContentBlock::Text { text, .. } + if text.contains("")); + assert!( + text.contains(""), + "Plan mode should project its fixed never-approval policy: {text}" + ); } #[tokio::test] @@ -2216,7 +2222,7 @@ fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() { .working_set .observe_user_message("please inspect src/lib.rs", tmp.path()); - engine.refresh_system_prompt(AppMode::Agent); + engine.refresh_system_prompt(); let prompt = match &engine.session.system_prompt { Some(SystemPrompt::Text(text)) => text.clone(), @@ -2251,7 +2257,7 @@ fn working_set_reaches_model_as_turn_metadata() { let messages = engine.messages_with_turn_metadata(); let last_block = messages - .last() + .first() .and_then(|message| message.content.last()) .expect("turn metadata block"); let ContentBlock::Text { text, .. } = last_block else { @@ -2276,7 +2282,7 @@ fn turn_metadata_includes_current_local_date_without_working_set() { let messages = engine.messages_with_turn_metadata(); let last_block = messages - .last() + .first() .and_then(|message| message.content.last()) .expect("turn metadata block"); let ContentBlock::Text { text, .. } = last_block else { @@ -2455,7 +2461,16 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() { let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string()); engine.session.add_message(first_user.clone()); let first_request = engine.messages_with_turn_metadata(); - assert_eq!(first_request, engine.session.messages); + assert_eq!( + &first_request[..engine.session.messages.len()], + engine.session.messages.as_slice() + ); + assert_eq!(first_request.len(), engine.session.messages.len() + 1); + assert_eq!(first_request.first(), Some(&first_user)); + assert_eq!( + first_request.last().map(|message| message.role.as_str()), + Some("user") + ); engine.session.add_message(Message { role: "assistant".to_string(), @@ -2472,14 +2487,24 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() { engine.session.add_message(second_user); let second_request = engine.messages_with_turn_metadata(); - assert_eq!(second_request, engine.session.messages); + assert_eq!( + &second_request[..engine.session.messages.len()], + engine.session.messages.as_slice() + ); + assert_eq!(second_request.len(), engine.session.messages.len() + 1); assert_eq!(second_request.first(), Some(&first_user)); + let runtime = second_request.last().expect("runtime prompt"); + let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text") + else { + panic!("expected runtime prompt text"); + }; + assert!(text.contains("` must -/// be stored only on actual user-text messages, not retroactively added -/// to tool-result messages at request time. +/// be stored only on actual user-text messages. Request-time runtime metadata +/// is appended separately and must not mutate tool-result messages. #[test] fn turn_metadata_skips_tool_result_messages() { let tmp = tempdir().expect("tempdir"); @@ -2522,9 +2547,11 @@ fn turn_metadata_skips_tool_result_messages() { let messages = engine.messages_with_turn_metadata(); - // The trailing message is the tool result and MUST be untouched — + // The stored trailing message is the tool result and MUST be untouched — // no Text block sneaking in front of the ToolResult block. - let trailing = messages.last().expect("trailing message"); + let trailing = messages + .get(messages.len().saturating_sub(2)) + .expect("stored trailing message"); assert_eq!(trailing.role, "user"); assert_eq!(trailing.content.len(), 1); assert!(matches!( @@ -2546,6 +2573,14 @@ fn turn_metadata_skips_tool_result_messages() { panic!("expected Text block for turn_meta at tail"); }; assert!(meta.starts_with("\n")); + assert!(meta.contains("src/lib.rs")); + assert!( + matches!( + messages.last().and_then(|message| message.content.first()), + Some(ContentBlock::Text { text, .. }) if text.contains(", -) -> bool { +pub(super) fn should_default_defer_tool(name: &str, always_load: &HashSet) -> bool { if always_load.contains(name) { return false; } @@ -85,13 +81,9 @@ pub(super) fn should_default_defer_tool( .any(|core_tool| core_tool == &name) } -pub(super) fn apply_native_tool_deferral( - catalog: &mut [Tool], - mode: AppMode, - always_load: &HashSet, -) { +pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], always_load: &HashSet) { for tool in catalog { - tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load)); + tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load)); } } @@ -185,7 +177,7 @@ pub(super) fn build_model_tool_catalog( mode: AppMode, always_load: &HashSet, ) -> Vec { - apply_native_tool_deferral(&mut native_tools, mode, always_load); + apply_native_tool_deferral(&mut native_tools, always_load); apply_mcp_tool_deferral(&mut mcp_tools, mode); // Sort each partition by name for prefix-cache stability (#263). The // upstream `to_api_tools()` already sorts the registry's HashMap output; @@ -229,7 +221,6 @@ pub(super) fn ensure_advanced_tooling( allowed_callers: Some(vec!["direct".to_string()]), defer_loading: Some(should_default_defer_tool( CODE_EXECUTION_TOOL_NAME, - mode, always_load, )), input_examples: None, @@ -248,7 +239,7 @@ pub(super) fn ensure_advanced_tooling( && crate::dependencies::resolve_node().is_some() { let mut tool = crate::tools::js_execution::js_execution_tool_definition(); - tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load)); + tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load)); catalog.push(tool); } diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index bcb3dc3a..892d0d21 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -105,7 +105,7 @@ impl Engine { } // Ensure system prompt is up to date with latest session states - self.refresh_system_prompt(mode); + self.refresh_system_prompt(); if turn.at_max_steps() { let _ = self @@ -2149,7 +2149,6 @@ impl Engine { if self .run_capacity_post_tool_checkpoint( turn, - mode, tool_registry, tool_exec_lock.clone(), mcp_pool.clone(), @@ -2181,7 +2180,6 @@ impl Engine { if self .run_capacity_error_escalation_checkpoint( turn, - mode, step_error_count, consecutive_tool_error_steps, &step_error_categories, @@ -2254,11 +2252,15 @@ impl Engine { } pub(super) fn messages_with_turn_metadata(&self) -> Vec { - // `` is stored on user-text messages when the message is - // appended. Do not rewrite historical messages at request time: doing - // so makes the API prefix differ from the bytes sent in earlier turns - // and destroys DeepSeek's KV prefix cache reuse. - self.session.messages.clone() + // Keep stored history byte-stable and provider-compatible: runtime + // mode/approval contracts are projected as a transient user message + // at request time instead of being persisted as appended system + // messages. This preserves the stable prefix through all stored + // messages while avoiding strict chat templates that only allow + // system messages at messages[0]. + let mut messages = self.session.messages.clone(); + messages.push(self.runtime_prompt_message()); + messages } } diff --git a/crates/tui/src/core/ops.rs b/crates/tui/src/core/ops.rs index 0889c5b2..4ad48c06 100644 --- a/crates/tui/src/core/ops.rs +++ b/crates/tui/src/core/ops.rs @@ -77,7 +77,7 @@ pub enum Op { #[allow(dead_code)] ChangeMode { mode: AppMode }, - /// Update the model being used and refresh the prompt for the current mode. + /// Update the model being used and refresh stable prompt context. #[allow(dead_code)] SetModel { model: String, mode: AppMode }, diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index dccdd913..67ffb7ad 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -31,8 +31,8 @@ pub struct Session { /// System prompt (optional) pub system_prompt: Option, - /// True when `system_prompt` came from an explicit runtime API override - /// and should not be replaced by mode/context refreshes. + /// True when `system_prompt` is a persisted/runtime-supplied prefix that + /// should not be replaced by mode/context refreshes. pub system_prompt_override: bool, /// Hash of the last assembled stable system prompt. Used to avoid /// replacing `system_prompt` when unchanged. diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 850e7944..a959f053 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -2,7 +2,8 @@ //! System prompts for different modes. //! //! Prompts are assembled from composable layers loaded at compile time: -//! tool taxonomy → base.md → personality overlay → mode delta → approval policy +//! base.md + personality overlay → message[0] (byte‑stable). +//! mode delta + tool taxonomy + approval policy → request-time runtime metadata. //! //! This keeps each concern in its own file and makes prompt tuning //! a single-file operation. @@ -671,7 +672,7 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode { } } -fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str { +pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str { match mode { AppMode::Yolo => AUTO_APPROVAL, AppMode::Plan => NEVER_APPROVAL, @@ -705,7 +706,7 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"]; const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"]; const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"]; -fn render_core_tool_taxonomy_block(mode: AppMode) -> String { +pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String { let core_tools = core_taxonomy_tools_for_mode(mode); let mut sentences = Vec::new(); @@ -762,15 +763,11 @@ context are subordinate to the Constitution, the Statutes, and the user's current request. When in doubt, consult Article VII: The Hierarchy of Law."; pub fn compose_prompt(mode: AppMode, personality: Personality) -> String { - compose_prompt_with_approval(mode, personality, default_approval_mode_for_mode(mode)) + compose_prompt_with_approval(mode, personality) } -pub fn compose_prompt_with_approval( - mode: AppMode, - personality: Personality, - approval_mode: ApprovalMode, -) -> String { - compose_prompt_with_approval_and_model(mode, personality, approval_mode, "codewhale") +pub fn compose_prompt_with_approval(mode: AppMode, personality: Personality) -> String { + compose_prompt_with_approval_and_model(mode, personality, "codewhale") } /// Compose with explicit model ID for dynamic identity injection. @@ -778,33 +775,24 @@ pub fn compose_prompt_with_approval( pub fn compose_prompt_with_approval_and_model( mode: AppMode, personality: Personality, - approval_mode: ApprovalMode, model_id: &str, ) -> String { - compose_prompt_with_approval_model_and_shell(mode, personality, approval_mode, model_id, true) + compose_prompt_with_approval_model_and_shell(mode, personality, model_id, true) } fn compose_prompt_with_approval_model_and_shell( mode: AppMode, personality: Personality, - approval_mode: ApprovalMode, model_id: &str, allow_shell: bool, ) -> String { - let tool_taxonomy = render_core_tool_taxonomy_block(mode); let shell_tools_available = allow_shell && mode != AppMode::Plan; let base_prompt = render_base_prompt_for_tool_availability( effective_base_prompt().trim(), model_id, shell_tools_available, ); - let parts: [&str; 5] = [ - tool_taxonomy.as_str(), - base_prompt.as_str(), - personality.prompt().trim(), - mode_prompt(mode).trim(), - approval_prompt_for_mode(mode, approval_mode).trim(), - ]; + let parts: [&str; 2] = [base_prompt.as_str(), personality.prompt().trim()]; let mut out = String::with_capacity(parts.iter().map(|p| p.len()).sum::() + (parts.len() - 1) * 2); @@ -883,22 +871,16 @@ fn compose_mode_prompt(mode: AppMode) -> String { compose_prompt(mode, Personality::Calm) } -fn compose_mode_prompt_with_approval(mode: AppMode, approval_mode: ApprovalMode) -> String { - compose_prompt_with_approval(mode, Personality::Calm, approval_mode) +fn compose_mode_prompt_with_approval(mode: AppMode) -> String { + compose_prompt_with_approval(mode, Personality::Calm) } fn compose_mode_prompt_with_approval_and_model( mode: AppMode, - approval_mode: ApprovalMode, + _approval_mode: ApprovalMode, model_id: &str, ) -> String { - compose_prompt_with_approval_model_and_shell( - mode, - Personality::Calm, - approval_mode, - model_id, - true, - ) + compose_prompt_with_approval_model_and_shell(mode, Personality::Calm, model_id, true) } // ── Public API ──────────────────────────────────────────────────────── @@ -991,7 +973,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session( skills_dir, instructions, session_context, - default_approval_mode_for_mode(mode), ) } @@ -1002,12 +983,10 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( skills_dir: Option<&Path>, instructions: Option<&[InstructionSource]>, session_context: PromptSessionContext<'_>, - approval_mode: ApprovalMode, ) -> SystemPrompt { let mode_prompt = compose_prompt_with_approval_model_and_shell( mode, Personality::Calm, - approval_mode, session_context.model_id, session_context.allow_shell, ); @@ -1335,7 +1314,6 @@ mod tests { let prompt = compose_prompt_with_approval_and_model( AppMode::Agent, Personality::Calm, - ApprovalMode::Suggest, "deepseek-v4-flash", ); assert!( @@ -1353,7 +1331,6 @@ mod tests { let prompt = compose_prompt_with_approval_model_and_shell( AppMode::Agent, Personality::Calm, - ApprovalMode::Suggest, "deepseek-v4-pro", true, ); @@ -1369,7 +1346,6 @@ mod tests { let prompt = compose_prompt_with_approval_model_and_shell( AppMode::Agent, Personality::Calm, - ApprovalMode::Suggest, "deepseek-v4-pro", false, ); @@ -1403,47 +1379,36 @@ mod tests { } #[test] - fn composed_prompt_starts_with_core_tool_taxonomy() { + fn composed_prompt_no_longer_inlines_tool_taxonomy() { let prompt = compose_prompt_with_approval_and_model( AppMode::Agent, Personality::Calm, - ApprovalMode::Suggest, "deepseek-v4-pro", ); - let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Agent); - - assert!( - prompt.starts_with(&expected_taxonomy), - "composed prompt should start with the compact generated tool taxonomy" - ); + // The generated "## Core Tool Taxonomy" block now travels in the + // request-time metadata rather than being prepended here. + // (The "## Toolbox" section from the Constitutional preamble remains.) + assert!(!prompt.contains("## Core Tool Taxonomy")); + assert!(prompt.contains("You are deepseek-v4-pro")); } #[test] fn plan_prompt_taxonomy_omits_run_tests() { - let prompt = compose_prompt_with_approval_and_model( - AppMode::Plan, - Personality::Calm, - ApprovalMode::Never, - "deepseek-v4-pro", - ); - let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Plan); - + let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan); + // Plan taxonomy should omit execution tools (verified at the source). assert!( - prompt.starts_with(&expected_taxonomy), - "Plan prompt should start with its mode-specific tool taxonomy" - ); - assert!( - expected_taxonomy.contains("for discovery") - && expected_taxonomy.contains("for git inspection"), + taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"), "Plan taxonomy should keep read-only discovery and git guidance" ); assert!( - !expected_taxonomy.contains("run_tests") - && !expected_taxonomy.contains("run_verifiers") - && !expected_taxonomy.contains("for verification") - && !expected_taxonomy.contains("Use "), - "Plan taxonomy must not advertise unavailable verification tools: {expected_taxonomy:?}" + !taxonomy.contains("run_tests") + && !taxonomy.contains("run_verifiers") + && !taxonomy.contains("exec_shell"), + "Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell" ); + // The taxonomy block is rendered correctly but no longer inlined + // into the base system prompt — it travels in request-time + // metadata instead. } #[test] @@ -1471,7 +1436,6 @@ mod tests { None, None, PromptSessionContext::default(), - ApprovalMode::Suggest, ) { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(_) => panic!("expected text system prompt"), @@ -1677,7 +1641,6 @@ mod tests { show_thinking: true, allow_shell: true, }, - ApprovalMode::Suggest, ) { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(_) => panic!("expected text system prompt"), @@ -1749,7 +1712,6 @@ mod tests { show_thinking: true, allow_shell: true, }, - ApprovalMode::Suggest, ) { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(_) => panic!("expected text system prompt"), @@ -1794,7 +1756,6 @@ mod tests { show_thinking: false, allow_shell: true, }, - ApprovalMode::Suggest, ) { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(_) => panic!("expected text system prompt"), @@ -1849,7 +1810,6 @@ mod tests { show_thinking: true, allow_shell: true, }, - ApprovalMode::Suggest, ) { SystemPrompt::Text(text) => text, SystemPrompt::Blocks(_) => panic!("expected text system prompt"), @@ -2175,10 +2135,10 @@ mod tests { assert!(prompt.contains("You are codewhale")); // Personality layer assert!(prompt.contains("Personality: Calm")); - // Mode layer - assert!(prompt.contains("Mode: Agent")); - // Approval layer - assert!(prompt.contains("Approval Policy: Suggest")); + // Mode and approval are no longer inlined — they travel as + // request-time runtime metadata. + assert!(!prompt.contains("Mode: Agent")); + assert!(!prompt.contains("Approval Policy:")); } /// Gate against shipping a release with a missing CHANGELOG entry — which @@ -2233,32 +2193,37 @@ mod tests { let prompt = compose_prompt(AppMode::Yolo, Personality::Calm); let base_pos = prompt.find("You are codewhale").unwrap(); let personality_pos = prompt.find("Personality: Calm").unwrap(); - let mode_pos = prompt.find("Mode: YOLO").unwrap(); - let approval_pos = prompt.find("Approval Policy: Auto").unwrap(); assert!(base_pos < personality_pos); - assert!(personality_pos < mode_pos); - assert!(mode_pos < approval_pos); + // Mode and approval text are no longer inlined — they travel as + // request-time runtime metadata. } #[test] - fn each_mode_gets_correct_approval() { - assert!( - compose_prompt(AppMode::Agent, Personality::Calm).contains("Approval Policy: Suggest") - ); - assert!(compose_prompt(AppMode::Yolo, Personality::Calm).contains("Approval Policy: Auto")); - assert!( - compose_prompt(AppMode::Plan, Personality::Calm).contains("Approval Policy: Never") - ); + fn base_prompt_is_mode_agnostic() { + // Mode and approval text are no longer inlined into compose_prompt — + // they travel as request-time runtime metadata. + let agent_prompt = compose_prompt(AppMode::Agent, Personality::Calm); + let yolo_prompt = compose_prompt(AppMode::Yolo, Personality::Calm); + let plan_prompt = compose_prompt(AppMode::Plan, Personality::Calm); + assert!(!agent_prompt.contains("Mode: Agent")); + assert!(!yolo_prompt.contains("Mode: YOLO")); + assert!(!plan_prompt.contains("Mode: Plan")); + assert!(!agent_prompt.contains("Approval Policy:")); + assert!(!yolo_prompt.contains("Approval Policy:")); + assert!(!plan_prompt.contains("Approval Policy:")); + // Base prompt still contains Constitutional preamble and personality + assert!(agent_prompt.contains("You are codewhale")); + assert!(agent_prompt.contains("Personality: Calm")); } #[test] - fn agent_prompt_can_reflect_never_approval_policy() { - let prompt = - compose_prompt_with_approval(AppMode::Agent, Personality::Calm, ApprovalMode::Never); - assert!(prompt.contains("Mode: Agent")); - assert!(prompt.contains("Approval Policy: Never")); - assert!(prompt.contains("/config approval_mode suggest")); + fn approval_policy_no_longer_inlined_in_base_prompt() { + let prompt = compose_prompt_with_approval(AppMode::Agent, Personality::Calm); + assert!(!prompt.contains("Mode: Agent")); + assert!(!prompt.contains("Approval Policy:")); + // Constitutional preamble is still present + assert!(prompt.contains("You are codewhale")); } #[test]