Merge pull request #2801 from Hmbown/codex/harvest-2687-runtime-prompt-metadata

feat(cache): project mode prompts per request
This commit is contained in:
Hunter Bown
2026-06-05 09:24:30 -07:00
committed by GitHub
11 changed files with 398 additions and 286 deletions
+4
View File
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
message content arrays so provider prefix caches can keep matching the stable
user-input prefix across date, route, and working-set changes.
- Projected mode, approval, and tool-taxonomy prompt metadata per request
instead of mutating stored system prompts, keeping provider prefix-cache
inputs byte-stable while preserving mode-specific instructions (#2687).
Thanks @LeoAlex0 for the implementation.
- Softened contribution intake automation: external issues now receive a warm
triage note and are never auto-closed by the contribution gate, while the PR
gate copy makes clear that dry-run observations are about maintainer safety,
+4
View File
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
message content arrays so provider prefix caches can keep matching the stable
user-input prefix across date, route, and working-set changes.
- Projected mode, approval, and tool-taxonomy prompt metadata per request
instead of mutating stored system prompts, keeping provider prefix-cache
inputs byte-stable while preserving mode-specific instructions (#2687).
Thanks @LeoAlex0 for the implementation.
- Softened contribution intake automation: external issues now receive a warm
triage note and are never auto-closed by the contribution gate, while the PR
gate copy makes clear that dry-run observations are about maintainer safety,
+19 -4
View File
@@ -1225,10 +1225,25 @@ mod tests {
let result = cache(&mut app, Some("inspect"));
let msg = result.message.expect("inspect output");
assert!(msg.contains("original_chars=14000"), "got: {msg}");
assert!(msg.contains("truncated=true"), "got: {msg}");
assert!(msg.contains("deduplicated=false"), "got: {msg}");
assert!(msg.contains("deduplicated=true"), "got: {msg}");
let tool_budget_lines: Vec<_> = msg
.lines()
.filter(|line| line.contains("original_chars=14000"))
.collect();
assert_eq!(tool_budget_lines.len(), 2, "got: {msg}");
let first_sighting = tool_budget_lines
.iter()
.find(|line| line.contains("deduplicated=false"))
.expect("first tool-result sighting should report non-dedup metadata");
assert!(first_sighting.contains("sent_chars="), "got: {msg}");
assert!(first_sighting.contains("truncated=true"), "got: {msg}");
let repeat_sighting = tool_budget_lines
.iter()
.find(|line| line.contains("deduplicated=true"))
.expect("repeat tool-result sighting should report dedup metadata");
assert!(repeat_sighting.contains("sent_chars="), "got: {msg}");
assert!(repeat_sighting.contains("truncated=false"), "got: {msg}");
}
#[test]
+116 -22
View File
@@ -687,7 +687,6 @@ impl Engine {
show_thinking: config.show_thinking,
allow_shell: config.allow_shell,
},
session.approval_mode,
);
let stable_prompt = Some(system_prompt);
session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
@@ -853,11 +852,12 @@ impl Engine {
self.session.trust_mode = trust_mode;
self.config.trust_mode = trust_mode;
self.session.auto_approve = auto_approve;
self.session.approval_mode = if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
};
let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
// Only track the Agent-mode approval — Yolo/Plan have fixed
// approval policies that are derived from the mode itself.
if mode == AppMode::Agent {
self.session.approval_mode = agent_approval_mode;
}
let _ = self
.tx_event
@@ -1236,7 +1236,6 @@ impl Engine {
Op::ChangeMode { mode } => {
let previous_mode = self.current_mode;
self.current_mode = mode;
self.refresh_system_prompt(mode);
self.emit_session_updated().await;
// Notify the agent that the mode has changed so it can re-evaluate
// any operations that were blocked by the previous mode's policy.
@@ -1253,11 +1252,11 @@ impl Engine {
)))
.await;
}
Op::SetModel { model, mode } => {
Op::SetModel { model, mode: _ } => {
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
self.session.model = model;
self.config.model.clone_from(&self.session.model);
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;
let _ = self
.tx_event
@@ -1304,6 +1303,10 @@ impl Engine {
self.session.compaction_summary_prompt =
extract_compaction_summary_prompt(system_prompt.clone());
self.session.system_prompt = system_prompt;
self.session.last_system_prompt_hash =
Some(system_prompt_hash(self.session.system_prompt.as_ref()));
// Host-supplied prompts are persisted prefixes. Keep them
// byte-stable; mode/runtime state is projected per request.
self.session.system_prompt_override =
system_prompt_override && self.session.system_prompt.is_some();
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
@@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\
}
}
fn runtime_prompt_message(&self) -> Message {
let mode = self.current_mode;
let approval_mode = approval_mode_for(mode, self.session.approval_mode);
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: runtime_prompt_text(mode, approval_mode),
cache_control: None,
}],
}
}
fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
self.user_text_message_with_turn_metadata_for_route(
text,
@@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\
.observe_user_message(&content, &self.session.workspace);
let force_update_plan_first = should_force_update_plan_first(mode, &content);
let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
self.session.auto_approve = auto_approve;
// Only track the Agent-mode approval — Yolo/Plan have fixed
// approval policies that are derived from the mode itself.
if mode == AppMode::Agent {
self.session.approval_mode = agent_approval_mode;
}
// Add user message to session
let user_msg = self.user_text_message_with_turn_metadata_for_route(
content,
@@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\
self.config.trust_mode = trust_mode;
self.config.translation_enabled = translation_enabled;
self.config.show_thinking = show_thinking;
self.session.auto_approve = auto_approve;
self.session.approval_mode = if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
};
// Update system prompt to match current mode and include persisted compaction context.
self.refresh_system_prompt(mode);
// Refresh stable prompt context. Current mode is carried by the
// request-time runtime prompt projection.
self.refresh_system_prompt();
self.emit_session_updated().await;
// Build tool registry and tool list for the current mode
@@ -2430,8 +2448,8 @@ In {new} mode: {policy}\n\n\
)))
.await;
}
/// Refresh the system prompt based on current mode and context.
fn refresh_system_prompt(&mut self, mode: AppMode) {
/// Refresh the stable system prompt based on current non-mode context.
fn refresh_system_prompt(&mut self) {
let user_memory_block =
crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
let prompt_goal_objective = goal_objective_for_prompt(
@@ -2439,7 +2457,7 @@ In {new} mode: {policy}\n\n\
&self.config.goal_state,
);
let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval(
mode,
AppMode::Agent,
&self.config.workspace,
None,
Some(&self.config.skills_dir),
@@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\
show_thinking: self.config.show_thinking,
allow_shell: self.session.allow_shell,
},
self.session.approval_mode,
);
let mut stable_prompt =
merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
@@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\
let stable_hash = system_prompt_hash(stable_prompt.as_ref());
if self.session.system_prompt_override {
self.session.last_system_prompt_hash = Some(stable_hash);
return;
}
if self.session.last_system_prompt_hash != Some(stable_hash) {
@@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt(
normalized_goal_objective(configured_goal)
}
// ── Mode & approval prompts as request-time runtime metadata ─────────
//
// Mode contracts and approval policies are not persisted in the session
// history and are not sent as extra system messages. Instead, each API
// request projects a transient user-role runtime metadata message at the
// tail. The stable system prompt remains byte-stable, stored history remains
// byte-stable, and strict chat-template providers never see a system message
// outside messages[0].
fn approval_mode_for(
mode: AppMode,
session_approval: crate::tui::approval::ApprovalMode,
) -> crate::tui::approval::ApprovalMode {
match mode {
AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto,
AppMode::Plan => crate::tui::approval::ApprovalMode::Never,
AppMode::Agent => session_approval,
}
}
fn agent_approval_mode_for_turn(
auto_approve: bool,
approval_mode: crate::tui::approval::ApprovalMode,
) -> crate::tui::approval::ApprovalMode {
if auto_approve {
crate::tui::approval::ApprovalMode::Auto
} else {
approval_mode
}
}
fn mode_prompt_marker(mode: AppMode) -> String {
format!(
"<mode_prompt mode=\"{}\">",
match mode {
AppMode::Agent => "agent",
AppMode::Plan => "plan",
AppMode::Yolo => "yolo",
}
)
}
fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
format!(
"<approval_policy policy=\"{}\">",
match approval_mode {
crate::tui::approval::ApprovalMode::Auto => "auto",
crate::tui::approval::ApprovalMode::Suggest => "suggest",
crate::tui::approval::ApprovalMode::Never => "never",
}
)
}
fn mode_prompt_text(mode: AppMode) -> &'static str {
match mode {
AppMode::Agent => prompts::AGENT_MODE,
AppMode::Plan => prompts::PLAN_MODE,
AppMode::Yolo => prompts::YOLO_MODE,
}
}
fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
let marker = mode_prompt_marker(mode);
let mode_text = mode_prompt_text(mode).trim();
let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
let approval_marker = approval_prompt_marker(approval_mode);
let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
format!(
"<runtime_prompt visibility=\"internal\">\n\
This is runtime control metadata for the current request, not user input. \
Apply it to the next assistant response and tool calls. It supersedes any \
earlier mode or approval metadata in the transcript.\n\n\
{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
{approval_marker}\n{approval_text}\n</approval_policy>\n\
</runtime_prompt>"
)
}
/// Spawn the engine in a background task
pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
let (engine, handle) = Engine::new(config, api_config);
+7 -11
View File
@@ -36,7 +36,7 @@ impl Engine {
pub(super) async fn run_capacity_post_tool_checkpoint(
&mut self,
turn: &TurnContext,
mode: AppMode,
tool_registry: Option<&crate::tools::ToolRegistry>,
tool_exec_lock: Arc<RwLock<()>>,
mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
@@ -56,7 +56,6 @@ impl Engine {
let _ = self
.apply_verify_with_tool_replay(
turn,
mode,
snapshot.as_ref(),
tool_registry,
tool_exec_lock,
@@ -66,7 +65,7 @@ impl Engine {
false
}
GuardrailAction::VerifyAndReplan => {
self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool")
self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool")
.await
}
GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false,
@@ -76,7 +75,7 @@ impl Engine {
pub(super) async fn run_capacity_error_escalation_checkpoint(
&mut self,
turn: &TurnContext,
mode: AppMode,
step_error_count: usize,
consecutive_tool_error_steps: u32,
error_categories: &[ErrorCategory],
@@ -136,7 +135,6 @@ impl Engine {
let category_labels: Vec<String> = error_categories.iter().map(|c| c.to_string()).collect();
self.apply_verify_and_replan(
turn,
mode,
Some(&forced),
&format!(
"error_escalation: step_errors={}, consecutive_steps={}, categories={}",
@@ -385,7 +383,7 @@ impl Engine {
&mut self,
turn: &TurnContext,
client: Option<&DeepSeekClient>,
mode: AppMode,
_mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
) -> bool {
let before_tokens = self.estimated_input_tokens();
@@ -465,7 +463,7 @@ impl Engine {
GuardrailAction::TargetedContextRefresh,
None,
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;
let after_tokens = self.estimated_input_tokens();
@@ -487,7 +485,6 @@ impl Engine {
pub(super) async fn apply_verify_with_tool_replay(
&mut self,
turn: &TurnContext,
mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
tool_registry: Option<&crate::tools::ToolRegistry>,
tool_exec_lock: Arc<RwLock<()>>,
@@ -617,7 +614,7 @@ impl Engine {
GuardrailAction::VerifyWithToolReplay,
Some(&verification_note),
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;
let after_tokens = self.estimated_input_tokens();
@@ -638,7 +635,6 @@ impl Engine {
pub(super) async fn apply_verify_and_replan(
&mut self,
turn: &TurnContext,
mode: AppMode,
snapshot: Option<&CapacitySnapshot>,
reason: &str,
) -> bool {
@@ -685,7 +681,7 @@ impl Engine {
GuardrailAction::VerifyAndReplan,
Some("Replan now from canonical state. Keep steps minimal and verifiable."),
)));
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
self.emit_session_updated().await;
let _ = self
+174 -133
View File
@@ -303,7 +303,7 @@ fn refresh_system_prompt_uses_runtime_goal_state() {
goal.create("Close the runtime goal loop".to_string(), None);
}
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
let prompt = match engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text,
Some(SystemPrompt::Blocks(blocks)) => blocks
@@ -505,116 +505,36 @@ fn tool_exec_outcome_tracks_duration() {
#[test]
fn core_native_tools_stay_loaded_in_yolo_mode() {
let always_load = HashSet::new();
assert!(!should_default_defer_tool(
"exec_shell",
AppMode::Yolo,
&always_load
));
assert!(!should_default_defer_tool("exec_shell", &always_load));
// git_blame remains deferred (read-only git history beyond log/show/diff).
assert!(should_default_defer_tool(
"git_blame",
AppMode::Yolo,
&always_load
));
assert!(should_default_defer_tool("git_blame", &always_load));
}
#[test]
fn non_yolo_mode_retains_default_defer_policy() {
let always_load = HashSet::new();
assert!(!should_default_defer_tool(
"exec_shell",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"edit_file",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"apply_patch",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"fetch_url",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"git_diff",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool("exec_shell", &always_load));
assert!(!should_default_defer_tool("edit_file", &always_load));
assert!(!should_default_defer_tool("apply_patch", &always_load));
assert!(!should_default_defer_tool("fetch_url", &always_load));
assert!(!should_default_defer_tool("git_diff", &always_load));
// #2654: read-only git history joins the active set.
assert!(!should_default_defer_tool(
"git_log",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"git_show",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"git_status",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"run_tests",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"agent_open",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool("git_log", &always_load));
assert!(!should_default_defer_tool("git_show", &always_load));
assert!(!should_default_defer_tool("git_status", &always_load));
assert!(!should_default_defer_tool("run_tests", &always_load));
assert!(!should_default_defer_tool("agent_open", &always_load));
// #2605: the fetch/close side of the sub-agent surface must also stay
// active so a first `agent_eval`/`agent_close` executes instead of
// hydrating its schema and forcing a double-invoke.
assert!(!should_default_defer_tool(
"agent_eval",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"agent_close",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"read_file",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"web_search",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"write_file",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"task_shell_start",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"task_shell_wait",
AppMode::Agent,
&always_load
));
assert!(should_default_defer_tool(
"git_blame",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool("agent_eval", &always_load));
assert!(!should_default_defer_tool("agent_close", &always_load));
assert!(!should_default_defer_tool("read_file", &always_load));
assert!(!should_default_defer_tool("web_search", &always_load));
assert!(!should_default_defer_tool("write_file", &always_load));
assert!(!should_default_defer_tool("task_shell_start", &always_load));
assert!(!should_default_defer_tool("task_shell_wait", &always_load));
assert!(should_default_defer_tool("git_blame", &always_load));
}
#[test]
@@ -815,11 +735,7 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
#[test]
fn tools_always_load_overrides_default_native_deferral() {
let always_load = HashSet::from(["git_blame".to_string()]);
assert!(!should_default_defer_tool(
"git_blame",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool("git_blame", &always_load));
}
#[test]
@@ -1795,15 +1711,20 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
.await
.expect("send change mode");
let prompt = {
let (_prompt, messages) = {
let mut rx = handle.rx_event.write().await;
loop {
let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
.await
.expect("session update after mode switch")
.expect("event");
if let Event::SessionUpdated { system_prompt, .. } = event {
break match system_prompt.expect("system prompt") {
if let Event::SessionUpdated {
system_prompt,
messages,
..
} = event
{
let prompt = match system_prompt.expect("system prompt") {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(blocks) => blocks
.into_iter()
@@ -1811,13 +1732,98 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
.collect::<Vec<_>>()
.join("\n"),
};
break (prompt, messages);
}
}
};
run.abort();
assert!(prompt.contains("Mode: YOLO"));
assert!(prompt.contains("Approval Policy: Auto"));
assert!(
messages.iter().all(|message| message.role != "system"),
"mode switch must not persist appended system messages: {messages:?}"
);
assert!(
messages.iter().all(|message| {
message.content.iter().all(|block| {
!matches!(
block,
ContentBlock::Text { text, .. }
if text.contains("<mode_prompt") || text.contains("<approval_policy")
)
})
}),
"mode/approval prompts should be request-time metadata, not session history"
);
}
#[test]
fn turn_approval_mode_prefers_auto_approve_flag() {
use crate::tui::approval::ApprovalMode;
assert_eq!(
agent_approval_mode_for_turn(true, ApprovalMode::Suggest),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(
AppMode::Agent,
agent_approval_mode_for_turn(true, ApprovalMode::Never),
),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(AppMode::Yolo, ApprovalMode::Suggest),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(AppMode::Plan, ApprovalMode::Auto),
ApprovalMode::Never
);
}
#[test]
fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
use crate::tui::approval::ApprovalMode;
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.current_mode = AppMode::Plan;
engine.session.approval_mode = ApprovalMode::Suggest;
engine.session.messages = vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "summary after compaction".to_string(),
cache_control: None,
}],
}];
let stored = engine.session.messages.clone();
let request_messages = engine.messages_with_turn_metadata();
assert_eq!(engine.session.messages, stored);
assert_eq!(request_messages.len(), stored.len() + 1);
assert!(
request_messages
.iter()
.all(|message| message.role != "system"),
"runtime prompts must not create appended system messages"
);
let runtime = request_messages.last().expect("runtime prompt message");
assert_eq!(runtime.role, "user");
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
else {
panic!("expected text runtime prompt");
};
assert!(text.contains("<runtime_prompt"));
assert!(text.contains("<mode_prompt mode=\"plan\">"));
assert!(
text.contains("<approval_policy policy=\"never\">"),
"Plan mode should project its fixed never-approval policy: {text}"
);
}
#[tokio::test]
@@ -2216,7 +2222,7 @@ fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
let prompt = match &engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text.clone(),
@@ -2251,7 +2257,7 @@ fn working_set_reaches_model_as_turn_metadata() {
let messages = engine.messages_with_turn_metadata();
let last_block = messages
.last()
.first()
.and_then(|message| message.content.last())
.expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
@@ -2276,7 +2282,7 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
let messages = engine.messages_with_turn_metadata();
let last_block = messages
.last()
.first()
.and_then(|message| message.content.last())
.expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
@@ -2455,7 +2461,16 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
engine.session.add_message(first_user.clone());
let first_request = engine.messages_with_turn_metadata();
assert_eq!(first_request, engine.session.messages);
assert_eq!(
&first_request[..engine.session.messages.len()],
engine.session.messages.as_slice()
);
assert_eq!(first_request.len(), engine.session.messages.len() + 1);
assert_eq!(first_request.first(), Some(&first_user));
assert_eq!(
first_request.last().map(|message| message.role.as_str()),
Some("user")
);
engine.session.add_message(Message {
role: "assistant".to_string(),
@@ -2472,14 +2487,24 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
engine.session.add_message(second_user);
let second_request = engine.messages_with_turn_metadata();
assert_eq!(second_request, engine.session.messages);
assert_eq!(
&second_request[..engine.session.messages.len()],
engine.session.messages.as_slice()
);
assert_eq!(second_request.len(), engine.session.messages.len() + 1);
assert_eq!(second_request.first(), Some(&first_user));
let runtime = second_request.last().expect("runtime prompt");
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
else {
panic!("expected runtime prompt text");
};
assert!(text.contains("<runtime_prompt"));
}
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
/// the wire but are stored as role="user" internally. `<turn_meta>` must
/// be stored only on actual user-text messages, not retroactively added
/// to tool-result messages at request time.
/// be stored only on actual user-text messages. Request-time runtime metadata
/// is appended separately and must not mutate tool-result messages.
#[test]
fn turn_metadata_skips_tool_result_messages() {
let tmp = tempdir().expect("tempdir");
@@ -2522,9 +2547,11 @@ fn turn_metadata_skips_tool_result_messages() {
let messages = engine.messages_with_turn_metadata();
// The trailing message is the tool result and MUST be untouched —
// The stored trailing message is the tool result and MUST be untouched —
// no Text block sneaking in front of the ToolResult block.
let trailing = messages.last().expect("trailing message");
let trailing = messages
.get(messages.len().saturating_sub(2))
.expect("stored trailing message");
assert_eq!(trailing.role, "user");
assert_eq!(trailing.content.len(), 1);
assert!(matches!(
@@ -2546,6 +2573,14 @@ fn turn_metadata_skips_tool_result_messages() {
panic!("expected Text block for turn_meta at tail");
};
assert!(meta.starts_with("<turn_meta>\n"));
assert!(meta.contains("src/lib.rs"));
assert!(
matches!(
messages.last().and_then(|message| message.content.first()),
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
),
"request projection should append transient runtime metadata"
);
}
/// User text must appear before turn_meta in the content array so that
@@ -2588,8 +2623,8 @@ fn user_message_turn_meta_is_appended_not_prepended() {
}
/// When the turn is mid-execution and the trailing user message is a
/// tool result, no turn_meta is injected at request time. The working_set
/// surfaces again on the next stored user-text message.
/// tool result, no turn_meta is injected into that tool-result message. The
/// working_set surfaces again on the next stored user-text message.
#[test]
fn turn_metadata_skips_when_only_tool_results_trail() {
let tmp = tempdir().expect("tempdir");
@@ -2622,14 +2657,21 @@ fn turn_metadata_skips_when_only_tool_results_trail() {
let messages = engine.messages_with_turn_metadata();
// Returned unchanged: the single tool-result message, no Text
// prefix, content length == 1.
let only = messages.last().expect("trailing message");
// Stored tool-result message is unchanged: no Text prefix, content length == 1.
let only = messages.first().expect("stored tool result message");
assert_eq!(only.content.len(), 1);
assert!(matches!(
only.content.first(),
Some(ContentBlock::ToolResult { .. })
));
assert_eq!(messages.len(), 2);
assert!(
matches!(
messages.last().and_then(|message| message.content.first()),
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
),
"request projection should still append transient runtime metadata"
);
}
#[test]
@@ -2641,10 +2683,10 @@ fn refresh_system_prompt_is_noop_when_unchanged() {
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
let first_hash = engine.session.last_system_prompt_hash;
let first_prompt = engine.session.system_prompt.clone();
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
assert_eq!(engine.session.last_system_prompt_hash, first_hash);
assert_eq!(engine.session.system_prompt, first_prompt);
@@ -2691,7 +2733,7 @@ fn text_system_prompt_override_via_runtime_sync_survives_refresh() {
let expected = Some(prompt.clone());
sync_runtime_system_prompt_override(&mut engine, prompt);
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
assert_eq!(engine.session.system_prompt, expected);
}
@@ -2712,7 +2754,7 @@ fn blocks_system_prompt_override_via_runtime_sync_survives_mode_change_refresh()
let expected = Some(prompt.clone());
sync_runtime_system_prompt_override(&mut engine, prompt);
engine.refresh_system_prompt(AppMode::Plan);
engine.refresh_system_prompt();
assert_eq!(engine.session.system_prompt, expected);
}
@@ -2732,7 +2774,7 @@ fn compaction_summary_stays_in_stable_system_prompt() {
.session
.working_set
.observe_user_message("continue in src/main.rs", tmp.path());
engine.refresh_system_prompt(AppMode::Agent);
engine.refresh_system_prompt();
engine.merge_compaction_summary(Some(SystemPrompt::Blocks(vec![SystemBlock {
block_type: "text".to_string(),
text: format!("{COMPACTION_SUMMARY_MARKER}\nsummary"),
@@ -2885,7 +2927,6 @@ async fn post_tool_replay_invoked_when_high_non_severe_risk() {
let restarted = engine
.run_capacity_post_tool_checkpoint(
&turn,
AppMode::Agent,
Some(&registry),
Arc::new(RwLock::new(())),
None,
@@ -2946,7 +2987,7 @@ async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let restarted = engine
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
.await;
assert!(restarted);
@@ -3004,7 +3045,7 @@ async fn capacity_disabled_by_default_keeps_messages_intact() {
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let restarted = engine
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
.await;
// Capacity is disabled → no replan, no message clear.
+5 -14
View File
@@ -67,11 +67,7 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
"write_file",
];
pub(super) fn should_default_defer_tool(
name: &str,
_mode: AppMode,
always_load: &HashSet<String>,
) -> bool {
pub(super) fn should_default_defer_tool(name: &str, always_load: &HashSet<String>) -> bool {
if always_load.contains(name) {
return false;
}
@@ -85,13 +81,9 @@ pub(super) fn should_default_defer_tool(
.any(|core_tool| core_tool == &name)
}
pub(super) fn apply_native_tool_deferral(
catalog: &mut [Tool],
mode: AppMode,
always_load: &HashSet<String>,
) {
pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], always_load: &HashSet<String>) {
for tool in catalog {
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
}
}
@@ -185,7 +177,7 @@ pub(super) fn build_model_tool_catalog(
mode: AppMode,
always_load: &HashSet<String>,
) -> Vec<Tool> {
apply_native_tool_deferral(&mut native_tools, mode, always_load);
apply_native_tool_deferral(&mut native_tools, always_load);
apply_mcp_tool_deferral(&mut mcp_tools, mode);
// Sort each partition by name for prefix-cache stability (#263). The
// upstream `to_api_tools()` already sorts the registry's HashMap output;
@@ -229,7 +221,6 @@ pub(super) fn ensure_advanced_tooling(
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(should_default_defer_tool(
CODE_EXECUTION_TOOL_NAME,
mode,
always_load,
)),
input_examples: None,
@@ -248,7 +239,7 @@ pub(super) fn ensure_advanced_tooling(
&& crate::dependencies::resolve_node().is_some()
{
let mut tool = crate::tools::js_execution::js_execution_tool_definition();
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
catalog.push(tool);
}
+10 -8
View File
@@ -105,7 +105,7 @@ impl Engine {
}
// Ensure system prompt is up to date with latest session states
self.refresh_system_prompt(mode);
self.refresh_system_prompt();
if turn.at_max_steps() {
let _ = self
@@ -2149,7 +2149,6 @@ impl Engine {
if self
.run_capacity_post_tool_checkpoint(
turn,
mode,
tool_registry,
tool_exec_lock.clone(),
mcp_pool.clone(),
@@ -2181,7 +2180,6 @@ impl Engine {
if self
.run_capacity_error_escalation_checkpoint(
turn,
mode,
step_error_count,
consecutive_tool_error_steps,
&step_error_categories,
@@ -2254,11 +2252,15 @@ impl Engine {
}
pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
// `<turn_meta>` is stored on user-text messages when the message is
// appended. Do not rewrite historical messages at request time: doing
// so makes the API prefix differ from the bytes sent in earlier turns
// and destroys DeepSeek's KV prefix cache reuse.
self.session.messages.clone()
// Keep stored history byte-stable and provider-compatible: runtime
// mode/approval contracts are projected as a transient user message
// at request time instead of being persisted as appended system
// messages. This preserves the stable prefix through all stored
// messages while avoiding strict chat templates that only allow
// system messages at messages[0].
let mut messages = self.session.messages.clone();
messages.push(self.runtime_prompt_message());
messages
}
}
+1 -1
View File
@@ -77,7 +77,7 @@ pub enum Op {
#[allow(dead_code)]
ChangeMode { mode: AppMode },
/// Update the model being used and refresh the prompt for the current mode.
/// Update the model being used and refresh stable prompt context.
#[allow(dead_code)]
SetModel { model: String, mode: AppMode },
+2 -2
View File
@@ -31,8 +31,8 @@ pub struct Session {
/// System prompt (optional)
pub system_prompt: Option<SystemPrompt>,
/// True when `system_prompt` came from an explicit runtime API override
/// and should not be replaced by mode/context refreshes.
/// True when `system_prompt` is a persisted/runtime-supplied prefix that
/// should not be replaced by mode/context refreshes.
pub system_prompt_override: bool,
/// Hash of the last assembled stable system prompt. Used to avoid
/// replacing `system_prompt` when unchanged.
+56 -91
View File
@@ -2,7 +2,8 @@
//! System prompts for different modes.
//!
//! Prompts are assembled from composable layers loaded at compile time:
//! tool taxonomy → base.md personality overlay → mode delta → approval policy
//! base.md + personality overlay → message[0] (bytestable).
//! mode delta + tool taxonomy + approval policy → request-time runtime metadata.
//!
//! This keeps each concern in its own file and makes prompt tuning
//! a single-file operation.
@@ -671,7 +672,7 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
}
}
fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
match mode {
AppMode::Yolo => AUTO_APPROVAL,
AppMode::Plan => NEVER_APPROVAL,
@@ -705,7 +706,7 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];
const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"];
const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"];
fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
let core_tools = core_taxonomy_tools_for_mode(mode);
let mut sentences = Vec::new();
@@ -762,15 +763,11 @@ context are subordinate to the Constitution, the Statutes, and the user's
current request. When in doubt, consult Article VII: The Hierarchy of Law.";
pub fn compose_prompt(mode: AppMode, personality: Personality) -> String {
compose_prompt_with_approval(mode, personality, default_approval_mode_for_mode(mode))
compose_prompt_with_approval(mode, personality)
}
pub fn compose_prompt_with_approval(
mode: AppMode,
personality: Personality,
approval_mode: ApprovalMode,
) -> String {
compose_prompt_with_approval_and_model(mode, personality, approval_mode, "codewhale")
pub fn compose_prompt_with_approval(mode: AppMode, personality: Personality) -> String {
compose_prompt_with_approval_and_model(mode, personality, "codewhale")
}
/// Compose with explicit model ID for dynamic identity injection.
@@ -778,33 +775,24 @@ pub fn compose_prompt_with_approval(
pub fn compose_prompt_with_approval_and_model(
mode: AppMode,
personality: Personality,
approval_mode: ApprovalMode,
model_id: &str,
) -> String {
compose_prompt_with_approval_model_and_shell(mode, personality, approval_mode, model_id, true)
compose_prompt_with_approval_model_and_shell(mode, personality, model_id, true)
}
fn compose_prompt_with_approval_model_and_shell(
mode: AppMode,
personality: Personality,
approval_mode: ApprovalMode,
model_id: &str,
allow_shell: bool,
) -> String {
let tool_taxonomy = render_core_tool_taxonomy_block(mode);
let shell_tools_available = allow_shell && mode != AppMode::Plan;
let base_prompt = render_base_prompt_for_tool_availability(
effective_base_prompt().trim(),
model_id,
shell_tools_available,
);
let parts: [&str; 5] = [
tool_taxonomy.as_str(),
base_prompt.as_str(),
personality.prompt().trim(),
mode_prompt(mode).trim(),
approval_prompt_for_mode(mode, approval_mode).trim(),
];
let parts: [&str; 2] = [base_prompt.as_str(), personality.prompt().trim()];
let mut out =
String::with_capacity(parts.iter().map(|p| p.len()).sum::<usize>() + (parts.len() - 1) * 2);
@@ -883,22 +871,16 @@ fn compose_mode_prompt(mode: AppMode) -> String {
compose_prompt(mode, Personality::Calm)
}
fn compose_mode_prompt_with_approval(mode: AppMode, approval_mode: ApprovalMode) -> String {
compose_prompt_with_approval(mode, Personality::Calm, approval_mode)
fn compose_mode_prompt_with_approval(mode: AppMode) -> String {
compose_prompt_with_approval(mode, Personality::Calm)
}
fn compose_mode_prompt_with_approval_and_model(
mode: AppMode,
approval_mode: ApprovalMode,
_approval_mode: ApprovalMode,
model_id: &str,
) -> String {
compose_prompt_with_approval_model_and_shell(
mode,
Personality::Calm,
approval_mode,
model_id,
true,
)
compose_prompt_with_approval_model_and_shell(mode, Personality::Calm, model_id, true)
}
// ── Public API ────────────────────────────────────────────────────────
@@ -991,7 +973,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
skills_dir,
instructions,
session_context,
default_approval_mode_for_mode(mode),
)
}
@@ -1002,12 +983,10 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
skills_dir: Option<&Path>,
instructions: Option<&[InstructionSource]>,
session_context: PromptSessionContext<'_>,
approval_mode: ApprovalMode,
) -> SystemPrompt {
let mode_prompt = compose_prompt_with_approval_model_and_shell(
mode,
Personality::Calm,
approval_mode,
session_context.model_id,
session_context.allow_shell,
);
@@ -1335,7 +1314,6 @@ mod tests {
let prompt = compose_prompt_with_approval_and_model(
AppMode::Agent,
Personality::Calm,
ApprovalMode::Suggest,
"deepseek-v4-flash",
);
assert!(
@@ -1353,7 +1331,6 @@ mod tests {
let prompt = compose_prompt_with_approval_model_and_shell(
AppMode::Agent,
Personality::Calm,
ApprovalMode::Suggest,
"deepseek-v4-pro",
true,
);
@@ -1369,7 +1346,6 @@ mod tests {
let prompt = compose_prompt_with_approval_model_and_shell(
AppMode::Agent,
Personality::Calm,
ApprovalMode::Suggest,
"deepseek-v4-pro",
false,
);
@@ -1403,47 +1379,36 @@ mod tests {
}
#[test]
fn composed_prompt_starts_with_core_tool_taxonomy() {
fn composed_prompt_no_longer_inlines_tool_taxonomy() {
let prompt = compose_prompt_with_approval_and_model(
AppMode::Agent,
Personality::Calm,
ApprovalMode::Suggest,
"deepseek-v4-pro",
);
let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Agent);
assert!(
prompt.starts_with(&expected_taxonomy),
"composed prompt should start with the compact generated tool taxonomy"
);
// The generated "## Core Tool Taxonomy" block now travels in the
// request-time <mode_prompt> metadata rather than being prepended here.
// (The "## Toolbox" section from the Constitutional preamble remains.)
assert!(!prompt.contains("## Core Tool Taxonomy"));
assert!(prompt.contains("You are deepseek-v4-pro"));
}
#[test]
fn plan_prompt_taxonomy_omits_run_tests() {
let prompt = compose_prompt_with_approval_and_model(
AppMode::Plan,
Personality::Calm,
ApprovalMode::Never,
"deepseek-v4-pro",
);
let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
// Plan taxonomy should omit execution tools (verified at the source).
assert!(
prompt.starts_with(&expected_taxonomy),
"Plan prompt should start with its mode-specific tool taxonomy"
);
assert!(
expected_taxonomy.contains("for discovery")
&& expected_taxonomy.contains("for git inspection"),
taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"),
"Plan taxonomy should keep read-only discovery and git guidance"
);
assert!(
!expected_taxonomy.contains("run_tests")
&& !expected_taxonomy.contains("run_verifiers")
&& !expected_taxonomy.contains("for verification")
&& !expected_taxonomy.contains("Use "),
"Plan taxonomy must not advertise unavailable verification tools: {expected_taxonomy:?}"
!taxonomy.contains("run_tests")
&& !taxonomy.contains("run_verifiers")
&& !taxonomy.contains("exec_shell"),
"Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell"
);
// The taxonomy block is rendered correctly but no longer inlined
// into the base system prompt — it travels in request-time
// <mode_prompt> metadata instead.
}
#[test]
@@ -1471,7 +1436,6 @@ mod tests {
None,
None,
PromptSessionContext::default(),
ApprovalMode::Suggest,
) {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1677,7 +1641,6 @@ mod tests {
show_thinking: true,
allow_shell: true,
},
ApprovalMode::Suggest,
) {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1749,7 +1712,6 @@ mod tests {
show_thinking: true,
allow_shell: true,
},
ApprovalMode::Suggest,
) {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1794,7 +1756,6 @@ mod tests {
show_thinking: false,
allow_shell: true,
},
ApprovalMode::Suggest,
) {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -1849,7 +1810,6 @@ mod tests {
show_thinking: true,
allow_shell: true,
},
ApprovalMode::Suggest,
) {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
@@ -2175,10 +2135,10 @@ mod tests {
assert!(prompt.contains("You are codewhale"));
// Personality layer
assert!(prompt.contains("Personality: Calm"));
// Mode layer
assert!(prompt.contains("Mode: Agent"));
// Approval layer
assert!(prompt.contains("Approval Policy: Suggest"));
// Mode and approval are no longer inlined — they travel as
// request-time runtime metadata.
assert!(!prompt.contains("Mode: Agent"));
assert!(!prompt.contains("Approval Policy:"));
}
/// Gate against shipping a release with a missing CHANGELOG entry — which
@@ -2233,32 +2193,37 @@ mod tests {
let prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
let base_pos = prompt.find("You are codewhale").unwrap();
let personality_pos = prompt.find("Personality: Calm").unwrap();
let mode_pos = prompt.find("Mode: YOLO").unwrap();
let approval_pos = prompt.find("Approval Policy: Auto").unwrap();
assert!(base_pos < personality_pos);
assert!(personality_pos < mode_pos);
assert!(mode_pos < approval_pos);
// Mode and approval text are no longer inlined — they travel as
// request-time runtime metadata.
}
#[test]
fn each_mode_gets_correct_approval() {
assert!(
compose_prompt(AppMode::Agent, Personality::Calm).contains("Approval Policy: Suggest")
);
assert!(compose_prompt(AppMode::Yolo, Personality::Calm).contains("Approval Policy: Auto"));
assert!(
compose_prompt(AppMode::Plan, Personality::Calm).contains("Approval Policy: Never")
);
fn base_prompt_is_mode_agnostic() {
// Mode and approval text are no longer inlined into compose_prompt —
// they travel as request-time runtime metadata.
let agent_prompt = compose_prompt(AppMode::Agent, Personality::Calm);
let yolo_prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
let plan_prompt = compose_prompt(AppMode::Plan, Personality::Calm);
assert!(!agent_prompt.contains("Mode: Agent"));
assert!(!yolo_prompt.contains("Mode: YOLO"));
assert!(!plan_prompt.contains("Mode: Plan"));
assert!(!agent_prompt.contains("Approval Policy:"));
assert!(!yolo_prompt.contains("Approval Policy:"));
assert!(!plan_prompt.contains("Approval Policy:"));
// Base prompt still contains Constitutional preamble and personality
assert!(agent_prompt.contains("You are codewhale"));
assert!(agent_prompt.contains("Personality: Calm"));
}
#[test]
fn agent_prompt_can_reflect_never_approval_policy() {
let prompt =
compose_prompt_with_approval(AppMode::Agent, Personality::Calm, ApprovalMode::Never);
assert!(prompt.contains("Mode: Agent"));
assert!(prompt.contains("Approval Policy: Never"));
assert!(prompt.contains("/config approval_mode suggest"));
fn approval_policy_no_longer_inlined_in_base_prompt() {
let prompt = compose_prompt_with_approval(AppMode::Agent, Personality::Calm);
assert!(!prompt.contains("Mode: Agent"));
assert!(!prompt.contains("Approval Policy:"));
// Constitutional preamble is still present
assert!(prompt.contains("You are codewhale"));
}
#[test]