Merge pull request #2801 from Hmbown/codex/harvest-2687-runtime-prompt-metadata
feat(cache): project mode prompts per request
This commit is contained in:
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
|
||||
message content arrays so provider prefix caches can keep matching the stable
|
||||
user-input prefix across date, route, and working-set changes.
|
||||
- Projected mode, approval, and tool-taxonomy prompt metadata per request
|
||||
instead of mutating stored system prompts, keeping provider prefix-cache
|
||||
inputs byte-stable while preserving mode-specific instructions (#2687).
|
||||
Thanks @LeoAlex0 for the implementation.
|
||||
- Softened contribution intake automation: external issues now receive a warm
|
||||
triage note and are never auto-closed by the contribution gate, while the PR
|
||||
gate copy makes clear that dry-run observations are about maintainer safety,
|
||||
|
||||
@@ -91,6 +91,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
|
||||
message content arrays so provider prefix caches can keep matching the stable
|
||||
user-input prefix across date, route, and working-set changes.
|
||||
- Projected mode, approval, and tool-taxonomy prompt metadata per request
|
||||
instead of mutating stored system prompts, keeping provider prefix-cache
|
||||
inputs byte-stable while preserving mode-specific instructions (#2687).
|
||||
Thanks @LeoAlex0 for the implementation.
|
||||
- Softened contribution intake automation: external issues now receive a warm
|
||||
triage note and are never auto-closed by the contribution gate, while the PR
|
||||
gate copy makes clear that dry-run observations are about maintainer safety,
|
||||
|
||||
@@ -1225,10 +1225,25 @@ mod tests {
|
||||
let result = cache(&mut app, Some("inspect"));
|
||||
let msg = result.message.expect("inspect output");
|
||||
|
||||
assert!(msg.contains("original_chars=14000"), "got: {msg}");
|
||||
assert!(msg.contains("truncated=true"), "got: {msg}");
|
||||
assert!(msg.contains("deduplicated=false"), "got: {msg}");
|
||||
assert!(msg.contains("deduplicated=true"), "got: {msg}");
|
||||
let tool_budget_lines: Vec<_> = msg
|
||||
.lines()
|
||||
.filter(|line| line.contains("original_chars=14000"))
|
||||
.collect();
|
||||
assert_eq!(tool_budget_lines.len(), 2, "got: {msg}");
|
||||
|
||||
let first_sighting = tool_budget_lines
|
||||
.iter()
|
||||
.find(|line| line.contains("deduplicated=false"))
|
||||
.expect("first tool-result sighting should report non-dedup metadata");
|
||||
assert!(first_sighting.contains("sent_chars="), "got: {msg}");
|
||||
assert!(first_sighting.contains("truncated=true"), "got: {msg}");
|
||||
|
||||
let repeat_sighting = tool_budget_lines
|
||||
.iter()
|
||||
.find(|line| line.contains("deduplicated=true"))
|
||||
.expect("repeat tool-result sighting should report dedup metadata");
|
||||
assert!(repeat_sighting.contains("sent_chars="), "got: {msg}");
|
||||
assert!(repeat_sighting.contains("truncated=false"), "got: {msg}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
+116
-22
@@ -687,7 +687,6 @@ impl Engine {
|
||||
show_thinking: config.show_thinking,
|
||||
allow_shell: config.allow_shell,
|
||||
},
|
||||
session.approval_mode,
|
||||
);
|
||||
let stable_prompt = Some(system_prompt);
|
||||
session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
|
||||
@@ -853,11 +852,12 @@ impl Engine {
|
||||
self.session.trust_mode = trust_mode;
|
||||
self.config.trust_mode = trust_mode;
|
||||
self.session.auto_approve = auto_approve;
|
||||
self.session.approval_mode = if auto_approve {
|
||||
crate::tui::approval::ApprovalMode::Auto
|
||||
} else {
|
||||
approval_mode
|
||||
};
|
||||
let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
|
||||
// Only track the Agent-mode approval — Yolo/Plan have fixed
|
||||
// approval policies that are derived from the mode itself.
|
||||
if mode == AppMode::Agent {
|
||||
self.session.approval_mode = agent_approval_mode;
|
||||
}
|
||||
|
||||
let _ = self
|
||||
.tx_event
|
||||
@@ -1236,7 +1236,6 @@ impl Engine {
|
||||
Op::ChangeMode { mode } => {
|
||||
let previous_mode = self.current_mode;
|
||||
self.current_mode = mode;
|
||||
self.refresh_system_prompt(mode);
|
||||
self.emit_session_updated().await;
|
||||
// Notify the agent that the mode has changed so it can re-evaluate
|
||||
// any operations that were blocked by the previous mode's policy.
|
||||
@@ -1253,11 +1252,11 @@ impl Engine {
|
||||
)))
|
||||
.await;
|
||||
}
|
||||
Op::SetModel { model, mode } => {
|
||||
Op::SetModel { model, mode: _ } => {
|
||||
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
self.session.model = model;
|
||||
self.config.model.clone_from(&self.session.model);
|
||||
self.refresh_system_prompt(mode);
|
||||
self.refresh_system_prompt();
|
||||
self.emit_session_updated().await;
|
||||
let _ = self
|
||||
.tx_event
|
||||
@@ -1304,6 +1303,10 @@ impl Engine {
|
||||
self.session.compaction_summary_prompt =
|
||||
extract_compaction_summary_prompt(system_prompt.clone());
|
||||
self.session.system_prompt = system_prompt;
|
||||
self.session.last_system_prompt_hash =
|
||||
Some(system_prompt_hash(self.session.system_prompt.as_ref()));
|
||||
// Host-supplied prompts are persisted prefixes. Keep them
|
||||
// byte-stable; mode/runtime state is projected per request.
|
||||
self.session.system_prompt_override =
|
||||
system_prompt_override && self.session.system_prompt.is_some();
|
||||
self.session.auto_model = model.trim().eq_ignore_ascii_case("auto");
|
||||
@@ -1485,6 +1488,18 @@ In {new} mode: {policy}\n\n\
|
||||
}
|
||||
}
|
||||
|
||||
fn runtime_prompt_message(&self) -> Message {
|
||||
let mode = self.current_mode;
|
||||
let approval_mode = approval_mode_for(mode, self.session.approval_mode);
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: runtime_prompt_text(mode, approval_mode),
|
||||
cache_control: None,
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
||||
fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
|
||||
self.user_text_message_with_turn_metadata_for_route(
|
||||
text,
|
||||
@@ -1633,6 +1648,14 @@ In {new} mode: {policy}\n\n\
|
||||
.observe_user_message(&content, &self.session.workspace);
|
||||
let force_update_plan_first = should_force_update_plan_first(mode, &content);
|
||||
|
||||
let agent_approval_mode = agent_approval_mode_for_turn(auto_approve, approval_mode);
|
||||
self.session.auto_approve = auto_approve;
|
||||
// Only track the Agent-mode approval — Yolo/Plan have fixed
|
||||
// approval policies that are derived from the mode itself.
|
||||
if mode == AppMode::Agent {
|
||||
self.session.approval_mode = agent_approval_mode;
|
||||
}
|
||||
|
||||
// Add user message to session
|
||||
let user_msg = self.user_text_message_with_turn_metadata_for_route(
|
||||
content,
|
||||
@@ -1670,15 +1693,10 @@ In {new} mode: {policy}\n\n\
|
||||
self.config.trust_mode = trust_mode;
|
||||
self.config.translation_enabled = translation_enabled;
|
||||
self.config.show_thinking = show_thinking;
|
||||
self.session.auto_approve = auto_approve;
|
||||
self.session.approval_mode = if auto_approve {
|
||||
crate::tui::approval::ApprovalMode::Auto
|
||||
} else {
|
||||
approval_mode
|
||||
};
|
||||
|
||||
// Update system prompt to match current mode and include persisted compaction context.
|
||||
self.refresh_system_prompt(mode);
|
||||
// Refresh stable prompt context. Current mode is carried by the
|
||||
// request-time runtime prompt projection.
|
||||
self.refresh_system_prompt();
|
||||
self.emit_session_updated().await;
|
||||
|
||||
// Build tool registry and tool list for the current mode
|
||||
@@ -2430,8 +2448,8 @@ In {new} mode: {policy}\n\n\
|
||||
)))
|
||||
.await;
|
||||
}
|
||||
/// Refresh the system prompt based on current mode and context.
|
||||
fn refresh_system_prompt(&mut self, mode: AppMode) {
|
||||
/// Refresh the stable system prompt based on current non-mode context.
|
||||
fn refresh_system_prompt(&mut self) {
|
||||
let user_memory_block =
|
||||
crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
|
||||
let prompt_goal_objective = goal_objective_for_prompt(
|
||||
@@ -2439,7 +2457,7 @@ In {new} mode: {policy}\n\n\
|
||||
&self.config.goal_state,
|
||||
);
|
||||
let base = prompts::system_prompt_for_mode_with_context_skills_session_and_approval(
|
||||
mode,
|
||||
AppMode::Agent,
|
||||
&self.config.workspace,
|
||||
None,
|
||||
Some(&self.config.skills_dir),
|
||||
@@ -2454,7 +2472,6 @@ In {new} mode: {policy}\n\n\
|
||||
show_thinking: self.config.show_thinking,
|
||||
allow_shell: self.session.allow_shell,
|
||||
},
|
||||
self.session.approval_mode,
|
||||
);
|
||||
let mut stable_prompt =
|
||||
merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
|
||||
@@ -2472,7 +2489,6 @@ In {new} mode: {policy}\n\n\
|
||||
|
||||
let stable_hash = system_prompt_hash(stable_prompt.as_ref());
|
||||
if self.session.system_prompt_override {
|
||||
self.session.last_system_prompt_hash = Some(stable_hash);
|
||||
return;
|
||||
}
|
||||
if self.session.last_system_prompt_hash != Some(stable_hash) {
|
||||
@@ -2634,6 +2650,84 @@ fn goal_objective_for_prompt(
|
||||
normalized_goal_objective(configured_goal)
|
||||
}
|
||||
|
||||
// ── Mode & approval prompts as request-time runtime metadata ─────────
|
||||
//
|
||||
// Mode contracts and approval policies are not persisted in the session
|
||||
// history and are not sent as extra system messages. Instead, each API
|
||||
// request projects a transient user-role runtime metadata message at the
|
||||
// tail. The stable system prompt remains byte-stable, stored history remains
|
||||
// byte-stable, and strict chat-template providers never see a system message
|
||||
// outside messages[0].
|
||||
|
||||
fn approval_mode_for(
|
||||
mode: AppMode,
|
||||
session_approval: crate::tui::approval::ApprovalMode,
|
||||
) -> crate::tui::approval::ApprovalMode {
|
||||
match mode {
|
||||
AppMode::Yolo => crate::tui::approval::ApprovalMode::Auto,
|
||||
AppMode::Plan => crate::tui::approval::ApprovalMode::Never,
|
||||
AppMode::Agent => session_approval,
|
||||
}
|
||||
}
|
||||
|
||||
fn agent_approval_mode_for_turn(
|
||||
auto_approve: bool,
|
||||
approval_mode: crate::tui::approval::ApprovalMode,
|
||||
) -> crate::tui::approval::ApprovalMode {
|
||||
if auto_approve {
|
||||
crate::tui::approval::ApprovalMode::Auto
|
||||
} else {
|
||||
approval_mode
|
||||
}
|
||||
}
|
||||
|
||||
fn mode_prompt_marker(mode: AppMode) -> String {
|
||||
format!(
|
||||
"<mode_prompt mode=\"{}\">",
|
||||
match mode {
|
||||
AppMode::Agent => "agent",
|
||||
AppMode::Plan => "plan",
|
||||
AppMode::Yolo => "yolo",
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
|
||||
format!(
|
||||
"<approval_policy policy=\"{}\">",
|
||||
match approval_mode {
|
||||
crate::tui::approval::ApprovalMode::Auto => "auto",
|
||||
crate::tui::approval::ApprovalMode::Suggest => "suggest",
|
||||
crate::tui::approval::ApprovalMode::Never => "never",
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
fn mode_prompt_text(mode: AppMode) -> &'static str {
|
||||
match mode {
|
||||
AppMode::Agent => prompts::AGENT_MODE,
|
||||
AppMode::Plan => prompts::PLAN_MODE,
|
||||
AppMode::Yolo => prompts::YOLO_MODE,
|
||||
}
|
||||
}
|
||||
|
||||
fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
|
||||
let marker = mode_prompt_marker(mode);
|
||||
let mode_text = mode_prompt_text(mode).trim();
|
||||
let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
|
||||
let approval_marker = approval_prompt_marker(approval_mode);
|
||||
let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
|
||||
format!(
|
||||
"<runtime_prompt visibility=\"internal\">\n\
|
||||
This is runtime control metadata for the current request, not user input. \
|
||||
Apply it to the next assistant response and tool calls. It supersedes any \
|
||||
earlier mode or approval metadata in the transcript.\n\n\
|
||||
{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
|
||||
{approval_marker}\n{approval_text}\n</approval_policy>\n\
|
||||
</runtime_prompt>"
|
||||
)
|
||||
}
|
||||
|
||||
/// Spawn the engine in a background task
|
||||
pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
|
||||
let (engine, handle) = Engine::new(config, api_config);
|
||||
|
||||
@@ -36,7 +36,7 @@ impl Engine {
|
||||
pub(super) async fn run_capacity_post_tool_checkpoint(
|
||||
&mut self,
|
||||
turn: &TurnContext,
|
||||
mode: AppMode,
|
||||
|
||||
tool_registry: Option<&crate::tools::ToolRegistry>,
|
||||
tool_exec_lock: Arc<RwLock<()>>,
|
||||
mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
|
||||
@@ -56,7 +56,6 @@ impl Engine {
|
||||
let _ = self
|
||||
.apply_verify_with_tool_replay(
|
||||
turn,
|
||||
mode,
|
||||
snapshot.as_ref(),
|
||||
tool_registry,
|
||||
tool_exec_lock,
|
||||
@@ -66,7 +65,7 @@ impl Engine {
|
||||
false
|
||||
}
|
||||
GuardrailAction::VerifyAndReplan => {
|
||||
self.apply_verify_and_replan(turn, mode, snapshot.as_ref(), "high_risk_post_tool")
|
||||
self.apply_verify_and_replan(turn, snapshot.as_ref(), "high_risk_post_tool")
|
||||
.await
|
||||
}
|
||||
GuardrailAction::NoIntervention | GuardrailAction::TargetedContextRefresh => false,
|
||||
@@ -76,7 +75,7 @@ impl Engine {
|
||||
pub(super) async fn run_capacity_error_escalation_checkpoint(
|
||||
&mut self,
|
||||
turn: &TurnContext,
|
||||
mode: AppMode,
|
||||
|
||||
step_error_count: usize,
|
||||
consecutive_tool_error_steps: u32,
|
||||
error_categories: &[ErrorCategory],
|
||||
@@ -136,7 +135,6 @@ impl Engine {
|
||||
let category_labels: Vec<String> = error_categories.iter().map(|c| c.to_string()).collect();
|
||||
self.apply_verify_and_replan(
|
||||
turn,
|
||||
mode,
|
||||
Some(&forced),
|
||||
&format!(
|
||||
"error_escalation: step_errors={}, consecutive_steps={}, categories={}",
|
||||
@@ -385,7 +383,7 @@ impl Engine {
|
||||
&mut self,
|
||||
turn: &TurnContext,
|
||||
client: Option<&DeepSeekClient>,
|
||||
mode: AppMode,
|
||||
_mode: AppMode,
|
||||
snapshot: Option<&CapacitySnapshot>,
|
||||
) -> bool {
|
||||
let before_tokens = self.estimated_input_tokens();
|
||||
@@ -465,7 +463,7 @@ impl Engine {
|
||||
GuardrailAction::TargetedContextRefresh,
|
||||
None,
|
||||
)));
|
||||
self.refresh_system_prompt(mode);
|
||||
self.refresh_system_prompt();
|
||||
self.emit_session_updated().await;
|
||||
|
||||
let after_tokens = self.estimated_input_tokens();
|
||||
@@ -487,7 +485,6 @@ impl Engine {
|
||||
pub(super) async fn apply_verify_with_tool_replay(
|
||||
&mut self,
|
||||
turn: &TurnContext,
|
||||
mode: AppMode,
|
||||
snapshot: Option<&CapacitySnapshot>,
|
||||
tool_registry: Option<&crate::tools::ToolRegistry>,
|
||||
tool_exec_lock: Arc<RwLock<()>>,
|
||||
@@ -617,7 +614,7 @@ impl Engine {
|
||||
GuardrailAction::VerifyWithToolReplay,
|
||||
Some(&verification_note),
|
||||
)));
|
||||
self.refresh_system_prompt(mode);
|
||||
self.refresh_system_prompt();
|
||||
self.emit_session_updated().await;
|
||||
|
||||
let after_tokens = self.estimated_input_tokens();
|
||||
@@ -638,7 +635,6 @@ impl Engine {
|
||||
pub(super) async fn apply_verify_and_replan(
|
||||
&mut self,
|
||||
turn: &TurnContext,
|
||||
mode: AppMode,
|
||||
snapshot: Option<&CapacitySnapshot>,
|
||||
reason: &str,
|
||||
) -> bool {
|
||||
@@ -685,7 +681,7 @@ impl Engine {
|
||||
GuardrailAction::VerifyAndReplan,
|
||||
Some("Replan now from canonical state. Keep steps minimal and verifiable."),
|
||||
)));
|
||||
self.refresh_system_prompt(mode);
|
||||
self.refresh_system_prompt();
|
||||
self.emit_session_updated().await;
|
||||
|
||||
let _ = self
|
||||
|
||||
+174
-133
@@ -303,7 +303,7 @@ fn refresh_system_prompt_uses_runtime_goal_state() {
|
||||
goal.create("Close the runtime goal loop".to_string(), None);
|
||||
}
|
||||
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
let prompt = match engine.session.system_prompt {
|
||||
Some(SystemPrompt::Text(text)) => text,
|
||||
Some(SystemPrompt::Blocks(blocks)) => blocks
|
||||
@@ -505,116 +505,36 @@ fn tool_exec_outcome_tracks_duration() {
|
||||
#[test]
|
||||
fn core_native_tools_stay_loaded_in_yolo_mode() {
|
||||
let always_load = HashSet::new();
|
||||
assert!(!should_default_defer_tool(
|
||||
"exec_shell",
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool("exec_shell", &always_load));
|
||||
// git_blame remains deferred (read-only git history beyond log/show/diff).
|
||||
assert!(should_default_defer_tool(
|
||||
"git_blame",
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
assert!(should_default_defer_tool("git_blame", &always_load));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_yolo_mode_retains_default_defer_policy() {
|
||||
let always_load = HashSet::new();
|
||||
assert!(!should_default_defer_tool(
|
||||
"exec_shell",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"edit_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"apply_patch",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"fetch_url",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_diff",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool("exec_shell", &always_load));
|
||||
assert!(!should_default_defer_tool("edit_file", &always_load));
|
||||
assert!(!should_default_defer_tool("apply_patch", &always_load));
|
||||
assert!(!should_default_defer_tool("fetch_url", &always_load));
|
||||
assert!(!should_default_defer_tool("git_diff", &always_load));
|
||||
// #2654: read-only git history joins the active set.
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_log",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_show",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_status",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"run_tests",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_open",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool("git_log", &always_load));
|
||||
assert!(!should_default_defer_tool("git_show", &always_load));
|
||||
assert!(!should_default_defer_tool("git_status", &always_load));
|
||||
assert!(!should_default_defer_tool("run_tests", &always_load));
|
||||
assert!(!should_default_defer_tool("agent_open", &always_load));
|
||||
// #2605: the fetch/close side of the sub-agent surface must also stay
|
||||
// active so a first `agent_eval`/`agent_close` executes instead of
|
||||
// hydrating its schema and forcing a double-invoke.
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_eval",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_close",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"read_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"web_search",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"write_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"task_shell_start",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"task_shell_wait",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(should_default_defer_tool(
|
||||
"git_blame",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool("agent_eval", &always_load));
|
||||
assert!(!should_default_defer_tool("agent_close", &always_load));
|
||||
assert!(!should_default_defer_tool("read_file", &always_load));
|
||||
assert!(!should_default_defer_tool("web_search", &always_load));
|
||||
assert!(!should_default_defer_tool("write_file", &always_load));
|
||||
assert!(!should_default_defer_tool("task_shell_start", &always_load));
|
||||
assert!(!should_default_defer_tool("task_shell_wait", &always_load));
|
||||
assert!(should_default_defer_tool("git_blame", &always_load));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -815,11 +735,7 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
|
||||
#[test]
|
||||
fn tools_always_load_overrides_default_native_deferral() {
|
||||
let always_load = HashSet::from(["git_blame".to_string()]);
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_blame",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool("git_blame", &always_load));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1795,15 +1711,20 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
|
||||
.await
|
||||
.expect("send change mode");
|
||||
|
||||
let prompt = {
|
||||
let (_prompt, messages) = {
|
||||
let mut rx = handle.rx_event.write().await;
|
||||
loop {
|
||||
let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
|
||||
.await
|
||||
.expect("session update after mode switch")
|
||||
.expect("event");
|
||||
if let Event::SessionUpdated { system_prompt, .. } = event {
|
||||
break match system_prompt.expect("system prompt") {
|
||||
if let Event::SessionUpdated {
|
||||
system_prompt,
|
||||
messages,
|
||||
..
|
||||
} = event
|
||||
{
|
||||
let prompt = match system_prompt.expect("system prompt") {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(blocks) => blocks
|
||||
.into_iter()
|
||||
@@ -1811,13 +1732,98 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n"),
|
||||
};
|
||||
break (prompt, messages);
|
||||
}
|
||||
}
|
||||
};
|
||||
run.abort();
|
||||
|
||||
assert!(prompt.contains("Mode: YOLO"));
|
||||
assert!(prompt.contains("Approval Policy: Auto"));
|
||||
assert!(
|
||||
messages.iter().all(|message| message.role != "system"),
|
||||
"mode switch must not persist appended system messages: {messages:?}"
|
||||
);
|
||||
assert!(
|
||||
messages.iter().all(|message| {
|
||||
message.content.iter().all(|block| {
|
||||
!matches!(
|
||||
block,
|
||||
ContentBlock::Text { text, .. }
|
||||
if text.contains("<mode_prompt") || text.contains("<approval_policy")
|
||||
)
|
||||
})
|
||||
}),
|
||||
"mode/approval prompts should be request-time metadata, not session history"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn turn_approval_mode_prefers_auto_approve_flag() {
|
||||
use crate::tui::approval::ApprovalMode;
|
||||
|
||||
assert_eq!(
|
||||
agent_approval_mode_for_turn(true, ApprovalMode::Suggest),
|
||||
ApprovalMode::Auto
|
||||
);
|
||||
assert_eq!(
|
||||
approval_mode_for(
|
||||
AppMode::Agent,
|
||||
agent_approval_mode_for_turn(true, ApprovalMode::Never),
|
||||
),
|
||||
ApprovalMode::Auto
|
||||
);
|
||||
assert_eq!(
|
||||
approval_mode_for(AppMode::Yolo, ApprovalMode::Suggest),
|
||||
ApprovalMode::Auto
|
||||
);
|
||||
assert_eq!(
|
||||
approval_mode_for(AppMode::Plan, ApprovalMode::Auto),
|
||||
ApprovalMode::Never
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
|
||||
use crate::tui::approval::ApprovalMode;
|
||||
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let config = EngineConfig {
|
||||
workspace: tmp.path().to_path_buf(),
|
||||
..Default::default()
|
||||
};
|
||||
let (mut engine, _handle) = Engine::new(config, &Config::default());
|
||||
engine.current_mode = AppMode::Plan;
|
||||
engine.session.approval_mode = ApprovalMode::Suggest;
|
||||
engine.session.messages = vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "summary after compaction".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
}];
|
||||
let stored = engine.session.messages.clone();
|
||||
|
||||
let request_messages = engine.messages_with_turn_metadata();
|
||||
|
||||
assert_eq!(engine.session.messages, stored);
|
||||
assert_eq!(request_messages.len(), stored.len() + 1);
|
||||
assert!(
|
||||
request_messages
|
||||
.iter()
|
||||
.all(|message| message.role != "system"),
|
||||
"runtime prompts must not create appended system messages"
|
||||
);
|
||||
let runtime = request_messages.last().expect("runtime prompt message");
|
||||
assert_eq!(runtime.role, "user");
|
||||
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
|
||||
else {
|
||||
panic!("expected text runtime prompt");
|
||||
};
|
||||
assert!(text.contains("<runtime_prompt"));
|
||||
assert!(text.contains("<mode_prompt mode=\"plan\">"));
|
||||
assert!(
|
||||
text.contains("<approval_policy policy=\"never\">"),
|
||||
"Plan mode should project its fixed never-approval policy: {text}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -2216,7 +2222,7 @@ fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
|
||||
.working_set
|
||||
.observe_user_message("please inspect src/lib.rs", tmp.path());
|
||||
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
|
||||
let prompt = match &engine.session.system_prompt {
|
||||
Some(SystemPrompt::Text(text)) => text.clone(),
|
||||
@@ -2251,7 +2257,7 @@ fn working_set_reaches_model_as_turn_metadata() {
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let last_block = messages
|
||||
.last()
|
||||
.first()
|
||||
.and_then(|message| message.content.last())
|
||||
.expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
@@ -2276,7 +2282,7 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let last_block = messages
|
||||
.last()
|
||||
.first()
|
||||
.and_then(|message| message.content.last())
|
||||
.expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
@@ -2455,7 +2461,16 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
|
||||
let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
|
||||
engine.session.add_message(first_user.clone());
|
||||
let first_request = engine.messages_with_turn_metadata();
|
||||
assert_eq!(first_request, engine.session.messages);
|
||||
assert_eq!(
|
||||
&first_request[..engine.session.messages.len()],
|
||||
engine.session.messages.as_slice()
|
||||
);
|
||||
assert_eq!(first_request.len(), engine.session.messages.len() + 1);
|
||||
assert_eq!(first_request.first(), Some(&first_user));
|
||||
assert_eq!(
|
||||
first_request.last().map(|message| message.role.as_str()),
|
||||
Some("user")
|
||||
);
|
||||
|
||||
engine.session.add_message(Message {
|
||||
role: "assistant".to_string(),
|
||||
@@ -2472,14 +2487,24 @@ fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
|
||||
engine.session.add_message(second_user);
|
||||
|
||||
let second_request = engine.messages_with_turn_metadata();
|
||||
assert_eq!(second_request, engine.session.messages);
|
||||
assert_eq!(
|
||||
&second_request[..engine.session.messages.len()],
|
||||
engine.session.messages.as_slice()
|
||||
);
|
||||
assert_eq!(second_request.len(), engine.session.messages.len() + 1);
|
||||
assert_eq!(second_request.first(), Some(&first_user));
|
||||
let runtime = second_request.last().expect("runtime prompt");
|
||||
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
|
||||
else {
|
||||
panic!("expected runtime prompt text");
|
||||
};
|
||||
assert!(text.contains("<runtime_prompt"));
|
||||
}
|
||||
|
||||
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
|
||||
/// the wire but are stored as role="user" internally. `<turn_meta>` must
|
||||
/// be stored only on actual user-text messages, not retroactively added
|
||||
/// to tool-result messages at request time.
|
||||
/// be stored only on actual user-text messages. Request-time runtime metadata
|
||||
/// is appended separately and must not mutate tool-result messages.
|
||||
#[test]
|
||||
fn turn_metadata_skips_tool_result_messages() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -2522,9 +2547,11 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
|
||||
// The trailing message is the tool result and MUST be untouched —
|
||||
// The stored trailing message is the tool result and MUST be untouched —
|
||||
// no Text block sneaking in front of the ToolResult block.
|
||||
let trailing = messages.last().expect("trailing message");
|
||||
let trailing = messages
|
||||
.get(messages.len().saturating_sub(2))
|
||||
.expect("stored trailing message");
|
||||
assert_eq!(trailing.role, "user");
|
||||
assert_eq!(trailing.content.len(), 1);
|
||||
assert!(matches!(
|
||||
@@ -2546,6 +2573,14 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
panic!("expected Text block for turn_meta at tail");
|
||||
};
|
||||
assert!(meta.starts_with("<turn_meta>\n"));
|
||||
assert!(meta.contains("src/lib.rs"));
|
||||
assert!(
|
||||
matches!(
|
||||
messages.last().and_then(|message| message.content.first()),
|
||||
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
|
||||
),
|
||||
"request projection should append transient runtime metadata"
|
||||
);
|
||||
}
|
||||
|
||||
/// User text must appear before turn_meta in the content array so that
|
||||
@@ -2588,8 +2623,8 @@ fn user_message_turn_meta_is_appended_not_prepended() {
|
||||
}
|
||||
|
||||
/// When the turn is mid-execution and the trailing user message is a
|
||||
/// tool result, no turn_meta is injected at request time. The working_set
|
||||
/// surfaces again on the next stored user-text message.
|
||||
/// tool result, no turn_meta is injected into that tool-result message. The
|
||||
/// working_set surfaces again on the next stored user-text message.
|
||||
#[test]
|
||||
fn turn_metadata_skips_when_only_tool_results_trail() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -2622,14 +2657,21 @@ fn turn_metadata_skips_when_only_tool_results_trail() {
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
|
||||
// Returned unchanged: the single tool-result message, no Text
|
||||
// prefix, content length == 1.
|
||||
let only = messages.last().expect("trailing message");
|
||||
// Stored tool-result message is unchanged: no Text prefix, content length == 1.
|
||||
let only = messages.first().expect("stored tool result message");
|
||||
assert_eq!(only.content.len(), 1);
|
||||
assert!(matches!(
|
||||
only.content.first(),
|
||||
Some(ContentBlock::ToolResult { .. })
|
||||
));
|
||||
assert_eq!(messages.len(), 2);
|
||||
assert!(
|
||||
matches!(
|
||||
messages.last().and_then(|message| message.content.first()),
|
||||
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
|
||||
),
|
||||
"request projection should still append transient runtime metadata"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2641,10 +2683,10 @@ fn refresh_system_prompt_is_noop_when_unchanged() {
|
||||
};
|
||||
let (mut engine, _handle) = Engine::new(config, &Config::default());
|
||||
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
let first_hash = engine.session.last_system_prompt_hash;
|
||||
let first_prompt = engine.session.system_prompt.clone();
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
|
||||
assert_eq!(engine.session.last_system_prompt_hash, first_hash);
|
||||
assert_eq!(engine.session.system_prompt, first_prompt);
|
||||
@@ -2691,7 +2733,7 @@ fn text_system_prompt_override_via_runtime_sync_survives_refresh() {
|
||||
let expected = Some(prompt.clone());
|
||||
|
||||
sync_runtime_system_prompt_override(&mut engine, prompt);
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
|
||||
assert_eq!(engine.session.system_prompt, expected);
|
||||
}
|
||||
@@ -2712,7 +2754,7 @@ fn blocks_system_prompt_override_via_runtime_sync_survives_mode_change_refresh()
|
||||
let expected = Some(prompt.clone());
|
||||
|
||||
sync_runtime_system_prompt_override(&mut engine, prompt);
|
||||
engine.refresh_system_prompt(AppMode::Plan);
|
||||
engine.refresh_system_prompt();
|
||||
|
||||
assert_eq!(engine.session.system_prompt, expected);
|
||||
}
|
||||
@@ -2732,7 +2774,7 @@ fn compaction_summary_stays_in_stable_system_prompt() {
|
||||
.session
|
||||
.working_set
|
||||
.observe_user_message("continue in src/main.rs", tmp.path());
|
||||
engine.refresh_system_prompt(AppMode::Agent);
|
||||
engine.refresh_system_prompt();
|
||||
engine.merge_compaction_summary(Some(SystemPrompt::Blocks(vec![SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text: format!("{COMPACTION_SUMMARY_MARKER}\nsummary"),
|
||||
@@ -2885,7 +2927,6 @@ async fn post_tool_replay_invoked_when_high_non_severe_risk() {
|
||||
let restarted = engine
|
||||
.run_capacity_post_tool_checkpoint(
|
||||
&turn,
|
||||
AppMode::Agent,
|
||||
Some(®istry),
|
||||
Arc::new(RwLock::new(())),
|
||||
None,
|
||||
@@ -2946,7 +2987,7 @@ async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
|
||||
let before_len = engine.session.messages.len();
|
||||
let turn = TurnContext::new(10);
|
||||
let restarted = engine
|
||||
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
|
||||
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
|
||||
.await;
|
||||
|
||||
assert!(restarted);
|
||||
@@ -3004,7 +3045,7 @@ async fn capacity_disabled_by_default_keeps_messages_intact() {
|
||||
let before_len = engine.session.messages.len();
|
||||
let turn = TurnContext::new(10);
|
||||
let restarted = engine
|
||||
.run_capacity_error_escalation_checkpoint(&turn, AppMode::Agent, 2, 2, &[])
|
||||
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
|
||||
.await;
|
||||
|
||||
// Capacity is disabled → no replan, no message clear.
|
||||
|
||||
@@ -67,11 +67,7 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
|
||||
"write_file",
|
||||
];
|
||||
|
||||
pub(super) fn should_default_defer_tool(
|
||||
name: &str,
|
||||
_mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) -> bool {
|
||||
pub(super) fn should_default_defer_tool(name: &str, always_load: &HashSet<String>) -> bool {
|
||||
if always_load.contains(name) {
|
||||
return false;
|
||||
}
|
||||
@@ -85,13 +81,9 @@ pub(super) fn should_default_defer_tool(
|
||||
.any(|core_tool| core_tool == &name)
|
||||
}
|
||||
|
||||
pub(super) fn apply_native_tool_deferral(
|
||||
catalog: &mut [Tool],
|
||||
mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) {
|
||||
pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], always_load: &HashSet<String>) {
|
||||
for tool in catalog {
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -185,7 +177,7 @@ pub(super) fn build_model_tool_catalog(
|
||||
mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) -> Vec<Tool> {
|
||||
apply_native_tool_deferral(&mut native_tools, mode, always_load);
|
||||
apply_native_tool_deferral(&mut native_tools, always_load);
|
||||
apply_mcp_tool_deferral(&mut mcp_tools, mode);
|
||||
// Sort each partition by name for prefix-cache stability (#263). The
|
||||
// upstream `to_api_tools()` already sorts the registry's HashMap output;
|
||||
@@ -229,7 +221,6 @@ pub(super) fn ensure_advanced_tooling(
|
||||
allowed_callers: Some(vec!["direct".to_string()]),
|
||||
defer_loading: Some(should_default_defer_tool(
|
||||
CODE_EXECUTION_TOOL_NAME,
|
||||
mode,
|
||||
always_load,
|
||||
)),
|
||||
input_examples: None,
|
||||
@@ -248,7 +239,7 @@ pub(super) fn ensure_advanced_tooling(
|
||||
&& crate::dependencies::resolve_node().is_some()
|
||||
{
|
||||
let mut tool = crate::tools::js_execution::js_execution_tool_definition();
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, always_load));
|
||||
catalog.push(tool);
|
||||
}
|
||||
|
||||
|
||||
@@ -105,7 +105,7 @@ impl Engine {
|
||||
}
|
||||
|
||||
// Ensure system prompt is up to date with latest session states
|
||||
self.refresh_system_prompt(mode);
|
||||
self.refresh_system_prompt();
|
||||
|
||||
if turn.at_max_steps() {
|
||||
let _ = self
|
||||
@@ -2149,7 +2149,6 @@ impl Engine {
|
||||
if self
|
||||
.run_capacity_post_tool_checkpoint(
|
||||
turn,
|
||||
mode,
|
||||
tool_registry,
|
||||
tool_exec_lock.clone(),
|
||||
mcp_pool.clone(),
|
||||
@@ -2181,7 +2180,6 @@ impl Engine {
|
||||
if self
|
||||
.run_capacity_error_escalation_checkpoint(
|
||||
turn,
|
||||
mode,
|
||||
step_error_count,
|
||||
consecutive_tool_error_steps,
|
||||
&step_error_categories,
|
||||
@@ -2254,11 +2252,15 @@ impl Engine {
|
||||
}
|
||||
|
||||
pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
|
||||
// `<turn_meta>` is stored on user-text messages when the message is
|
||||
// appended. Do not rewrite historical messages at request time: doing
|
||||
// so makes the API prefix differ from the bytes sent in earlier turns
|
||||
// and destroys DeepSeek's KV prefix cache reuse.
|
||||
self.session.messages.clone()
|
||||
// Keep stored history byte-stable and provider-compatible: runtime
|
||||
// mode/approval contracts are projected as a transient user message
|
||||
// at request time instead of being persisted as appended system
|
||||
// messages. This preserves the stable prefix through all stored
|
||||
// messages while avoiding strict chat templates that only allow
|
||||
// system messages at messages[0].
|
||||
let mut messages = self.session.messages.clone();
|
||||
messages.push(self.runtime_prompt_message());
|
||||
messages
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ pub enum Op {
|
||||
#[allow(dead_code)]
|
||||
ChangeMode { mode: AppMode },
|
||||
|
||||
/// Update the model being used and refresh the prompt for the current mode.
|
||||
/// Update the model being used and refresh stable prompt context.
|
||||
#[allow(dead_code)]
|
||||
SetModel { model: String, mode: AppMode },
|
||||
|
||||
|
||||
@@ -31,8 +31,8 @@ pub struct Session {
|
||||
|
||||
/// System prompt (optional)
|
||||
pub system_prompt: Option<SystemPrompt>,
|
||||
/// True when `system_prompt` came from an explicit runtime API override
|
||||
/// and should not be replaced by mode/context refreshes.
|
||||
/// True when `system_prompt` is a persisted/runtime-supplied prefix that
|
||||
/// should not be replaced by mode/context refreshes.
|
||||
pub system_prompt_override: bool,
|
||||
/// Hash of the last assembled stable system prompt. Used to avoid
|
||||
/// replacing `system_prompt` when unchanged.
|
||||
|
||||
+56
-91
@@ -2,7 +2,8 @@
|
||||
//! System prompts for different modes.
|
||||
//!
|
||||
//! Prompts are assembled from composable layers loaded at compile time:
|
||||
//! tool taxonomy → base.md → personality overlay → mode delta → approval policy
|
||||
//! base.md + personality overlay → message[0] (byte‑stable).
|
||||
//! mode delta + tool taxonomy + approval policy → request-time runtime metadata.
|
||||
//!
|
||||
//! This keeps each concern in its own file and makes prompt tuning
|
||||
//! a single-file operation.
|
||||
@@ -671,7 +672,7 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
|
||||
}
|
||||
}
|
||||
|
||||
fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
|
||||
pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
|
||||
match mode {
|
||||
AppMode::Yolo => AUTO_APPROVAL,
|
||||
AppMode::Plan => NEVER_APPROVAL,
|
||||
@@ -705,7 +706,7 @@ const TOOL_TAXONOMY_DISCOVERY: &[&str] = &["grep_files", "file_search"];
|
||||
const TOOL_TAXONOMY_GIT: &[&str] = &["git_status", "git_diff"];
|
||||
const TOOL_TAXONOMY_VERIFICATION: &[&str] = &["run_tests", "run_verifiers"];
|
||||
|
||||
fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
|
||||
pub(crate) fn render_core_tool_taxonomy_block(mode: AppMode) -> String {
|
||||
let core_tools = core_taxonomy_tools_for_mode(mode);
|
||||
let mut sentences = Vec::new();
|
||||
|
||||
@@ -762,15 +763,11 @@ context are subordinate to the Constitution, the Statutes, and the user's
|
||||
current request. When in doubt, consult Article VII: The Hierarchy of Law.";
|
||||
|
||||
pub fn compose_prompt(mode: AppMode, personality: Personality) -> String {
|
||||
compose_prompt_with_approval(mode, personality, default_approval_mode_for_mode(mode))
|
||||
compose_prompt_with_approval(mode, personality)
|
||||
}
|
||||
|
||||
pub fn compose_prompt_with_approval(
|
||||
mode: AppMode,
|
||||
personality: Personality,
|
||||
approval_mode: ApprovalMode,
|
||||
) -> String {
|
||||
compose_prompt_with_approval_and_model(mode, personality, approval_mode, "codewhale")
|
||||
pub fn compose_prompt_with_approval(mode: AppMode, personality: Personality) -> String {
|
||||
compose_prompt_with_approval_and_model(mode, personality, "codewhale")
|
||||
}
|
||||
|
||||
/// Compose with explicit model ID for dynamic identity injection.
|
||||
@@ -778,33 +775,24 @@ pub fn compose_prompt_with_approval(
|
||||
pub fn compose_prompt_with_approval_and_model(
|
||||
mode: AppMode,
|
||||
personality: Personality,
|
||||
approval_mode: ApprovalMode,
|
||||
model_id: &str,
|
||||
) -> String {
|
||||
compose_prompt_with_approval_model_and_shell(mode, personality, approval_mode, model_id, true)
|
||||
compose_prompt_with_approval_model_and_shell(mode, personality, model_id, true)
|
||||
}
|
||||
|
||||
fn compose_prompt_with_approval_model_and_shell(
|
||||
mode: AppMode,
|
||||
personality: Personality,
|
||||
approval_mode: ApprovalMode,
|
||||
model_id: &str,
|
||||
allow_shell: bool,
|
||||
) -> String {
|
||||
let tool_taxonomy = render_core_tool_taxonomy_block(mode);
|
||||
let shell_tools_available = allow_shell && mode != AppMode::Plan;
|
||||
let base_prompt = render_base_prompt_for_tool_availability(
|
||||
effective_base_prompt().trim(),
|
||||
model_id,
|
||||
shell_tools_available,
|
||||
);
|
||||
let parts: [&str; 5] = [
|
||||
tool_taxonomy.as_str(),
|
||||
base_prompt.as_str(),
|
||||
personality.prompt().trim(),
|
||||
mode_prompt(mode).trim(),
|
||||
approval_prompt_for_mode(mode, approval_mode).trim(),
|
||||
];
|
||||
let parts: [&str; 2] = [base_prompt.as_str(), personality.prompt().trim()];
|
||||
|
||||
let mut out =
|
||||
String::with_capacity(parts.iter().map(|p| p.len()).sum::<usize>() + (parts.len() - 1) * 2);
|
||||
@@ -883,22 +871,16 @@ fn compose_mode_prompt(mode: AppMode) -> String {
|
||||
compose_prompt(mode, Personality::Calm)
|
||||
}
|
||||
|
||||
fn compose_mode_prompt_with_approval(mode: AppMode, approval_mode: ApprovalMode) -> String {
|
||||
compose_prompt_with_approval(mode, Personality::Calm, approval_mode)
|
||||
fn compose_mode_prompt_with_approval(mode: AppMode) -> String {
|
||||
compose_prompt_with_approval(mode, Personality::Calm)
|
||||
}
|
||||
|
||||
fn compose_mode_prompt_with_approval_and_model(
|
||||
mode: AppMode,
|
||||
approval_mode: ApprovalMode,
|
||||
_approval_mode: ApprovalMode,
|
||||
model_id: &str,
|
||||
) -> String {
|
||||
compose_prompt_with_approval_model_and_shell(
|
||||
mode,
|
||||
Personality::Calm,
|
||||
approval_mode,
|
||||
model_id,
|
||||
true,
|
||||
)
|
||||
compose_prompt_with_approval_model_and_shell(mode, Personality::Calm, model_id, true)
|
||||
}
|
||||
|
||||
// ── Public API ────────────────────────────────────────────────────────
|
||||
@@ -991,7 +973,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
|
||||
skills_dir,
|
||||
instructions,
|
||||
session_context,
|
||||
default_approval_mode_for_mode(mode),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1002,12 +983,10 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
|
||||
skills_dir: Option<&Path>,
|
||||
instructions: Option<&[InstructionSource]>,
|
||||
session_context: PromptSessionContext<'_>,
|
||||
approval_mode: ApprovalMode,
|
||||
) -> SystemPrompt {
|
||||
let mode_prompt = compose_prompt_with_approval_model_and_shell(
|
||||
mode,
|
||||
Personality::Calm,
|
||||
approval_mode,
|
||||
session_context.model_id,
|
||||
session_context.allow_shell,
|
||||
);
|
||||
@@ -1335,7 +1314,6 @@ mod tests {
|
||||
let prompt = compose_prompt_with_approval_and_model(
|
||||
AppMode::Agent,
|
||||
Personality::Calm,
|
||||
ApprovalMode::Suggest,
|
||||
"deepseek-v4-flash",
|
||||
);
|
||||
assert!(
|
||||
@@ -1353,7 +1331,6 @@ mod tests {
|
||||
let prompt = compose_prompt_with_approval_model_and_shell(
|
||||
AppMode::Agent,
|
||||
Personality::Calm,
|
||||
ApprovalMode::Suggest,
|
||||
"deepseek-v4-pro",
|
||||
true,
|
||||
);
|
||||
@@ -1369,7 +1346,6 @@ mod tests {
|
||||
let prompt = compose_prompt_with_approval_model_and_shell(
|
||||
AppMode::Agent,
|
||||
Personality::Calm,
|
||||
ApprovalMode::Suggest,
|
||||
"deepseek-v4-pro",
|
||||
false,
|
||||
);
|
||||
@@ -1403,47 +1379,36 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn composed_prompt_starts_with_core_tool_taxonomy() {
|
||||
fn composed_prompt_no_longer_inlines_tool_taxonomy() {
|
||||
let prompt = compose_prompt_with_approval_and_model(
|
||||
AppMode::Agent,
|
||||
Personality::Calm,
|
||||
ApprovalMode::Suggest,
|
||||
"deepseek-v4-pro",
|
||||
);
|
||||
let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Agent);
|
||||
|
||||
assert!(
|
||||
prompt.starts_with(&expected_taxonomy),
|
||||
"composed prompt should start with the compact generated tool taxonomy"
|
||||
);
|
||||
// The generated "## Core Tool Taxonomy" block now travels in the
|
||||
// request-time <mode_prompt> metadata rather than being prepended here.
|
||||
// (The "## Toolbox" section from the Constitutional preamble remains.)
|
||||
assert!(!prompt.contains("## Core Tool Taxonomy"));
|
||||
assert!(prompt.contains("You are deepseek-v4-pro"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_prompt_taxonomy_omits_run_tests() {
|
||||
let prompt = compose_prompt_with_approval_and_model(
|
||||
AppMode::Plan,
|
||||
Personality::Calm,
|
||||
ApprovalMode::Never,
|
||||
"deepseek-v4-pro",
|
||||
);
|
||||
let expected_taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
|
||||
|
||||
let taxonomy = render_core_tool_taxonomy_block(AppMode::Plan);
|
||||
// Plan taxonomy should omit execution tools (verified at the source).
|
||||
assert!(
|
||||
prompt.starts_with(&expected_taxonomy),
|
||||
"Plan prompt should start with its mode-specific tool taxonomy"
|
||||
);
|
||||
assert!(
|
||||
expected_taxonomy.contains("for discovery")
|
||||
&& expected_taxonomy.contains("for git inspection"),
|
||||
taxonomy.contains("for discovery") && taxonomy.contains("for git inspection"),
|
||||
"Plan taxonomy should keep read-only discovery and git guidance"
|
||||
);
|
||||
assert!(
|
||||
!expected_taxonomy.contains("run_tests")
|
||||
&& !expected_taxonomy.contains("run_verifiers")
|
||||
&& !expected_taxonomy.contains("for verification")
|
||||
&& !expected_taxonomy.contains("Use "),
|
||||
"Plan taxonomy must not advertise unavailable verification tools: {expected_taxonomy:?}"
|
||||
!taxonomy.contains("run_tests")
|
||||
&& !taxonomy.contains("run_verifiers")
|
||||
&& !taxonomy.contains("exec_shell"),
|
||||
"Plan taxonomy must not mention run_tests, run_verifiers, or exec_shell"
|
||||
);
|
||||
// The taxonomy block is rendered correctly but no longer inlined
|
||||
// into the base system prompt — it travels in request-time
|
||||
// <mode_prompt> metadata instead.
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1471,7 +1436,6 @@ mod tests {
|
||||
None,
|
||||
None,
|
||||
PromptSessionContext::default(),
|
||||
ApprovalMode::Suggest,
|
||||
) {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
|
||||
@@ -1677,7 +1641,6 @@ mod tests {
|
||||
show_thinking: true,
|
||||
allow_shell: true,
|
||||
},
|
||||
ApprovalMode::Suggest,
|
||||
) {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
|
||||
@@ -1749,7 +1712,6 @@ mod tests {
|
||||
show_thinking: true,
|
||||
allow_shell: true,
|
||||
},
|
||||
ApprovalMode::Suggest,
|
||||
) {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
|
||||
@@ -1794,7 +1756,6 @@ mod tests {
|
||||
show_thinking: false,
|
||||
allow_shell: true,
|
||||
},
|
||||
ApprovalMode::Suggest,
|
||||
) {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
|
||||
@@ -1849,7 +1810,6 @@ mod tests {
|
||||
show_thinking: true,
|
||||
allow_shell: true,
|
||||
},
|
||||
ApprovalMode::Suggest,
|
||||
) {
|
||||
SystemPrompt::Text(text) => text,
|
||||
SystemPrompt::Blocks(_) => panic!("expected text system prompt"),
|
||||
@@ -2175,10 +2135,10 @@ mod tests {
|
||||
assert!(prompt.contains("You are codewhale"));
|
||||
// Personality layer
|
||||
assert!(prompt.contains("Personality: Calm"));
|
||||
// Mode layer
|
||||
assert!(prompt.contains("Mode: Agent"));
|
||||
// Approval layer
|
||||
assert!(prompt.contains("Approval Policy: Suggest"));
|
||||
// Mode and approval are no longer inlined — they travel as
|
||||
// request-time runtime metadata.
|
||||
assert!(!prompt.contains("Mode: Agent"));
|
||||
assert!(!prompt.contains("Approval Policy:"));
|
||||
}
|
||||
|
||||
/// Gate against shipping a release with a missing CHANGELOG entry — which
|
||||
@@ -2233,32 +2193,37 @@ mod tests {
|
||||
let prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
|
||||
let base_pos = prompt.find("You are codewhale").unwrap();
|
||||
let personality_pos = prompt.find("Personality: Calm").unwrap();
|
||||
let mode_pos = prompt.find("Mode: YOLO").unwrap();
|
||||
let approval_pos = prompt.find("Approval Policy: Auto").unwrap();
|
||||
|
||||
assert!(base_pos < personality_pos);
|
||||
assert!(personality_pos < mode_pos);
|
||||
assert!(mode_pos < approval_pos);
|
||||
// Mode and approval text are no longer inlined — they travel as
|
||||
// request-time runtime metadata.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn each_mode_gets_correct_approval() {
|
||||
assert!(
|
||||
compose_prompt(AppMode::Agent, Personality::Calm).contains("Approval Policy: Suggest")
|
||||
);
|
||||
assert!(compose_prompt(AppMode::Yolo, Personality::Calm).contains("Approval Policy: Auto"));
|
||||
assert!(
|
||||
compose_prompt(AppMode::Plan, Personality::Calm).contains("Approval Policy: Never")
|
||||
);
|
||||
fn base_prompt_is_mode_agnostic() {
|
||||
// Mode and approval text are no longer inlined into compose_prompt —
|
||||
// they travel as request-time runtime metadata.
|
||||
let agent_prompt = compose_prompt(AppMode::Agent, Personality::Calm);
|
||||
let yolo_prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
|
||||
let plan_prompt = compose_prompt(AppMode::Plan, Personality::Calm);
|
||||
assert!(!agent_prompt.contains("Mode: Agent"));
|
||||
assert!(!yolo_prompt.contains("Mode: YOLO"));
|
||||
assert!(!plan_prompt.contains("Mode: Plan"));
|
||||
assert!(!agent_prompt.contains("Approval Policy:"));
|
||||
assert!(!yolo_prompt.contains("Approval Policy:"));
|
||||
assert!(!plan_prompt.contains("Approval Policy:"));
|
||||
// Base prompt still contains Constitutional preamble and personality
|
||||
assert!(agent_prompt.contains("You are codewhale"));
|
||||
assert!(agent_prompt.contains("Personality: Calm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_prompt_can_reflect_never_approval_policy() {
|
||||
let prompt =
|
||||
compose_prompt_with_approval(AppMode::Agent, Personality::Calm, ApprovalMode::Never);
|
||||
assert!(prompt.contains("Mode: Agent"));
|
||||
assert!(prompt.contains("Approval Policy: Never"));
|
||||
assert!(prompt.contains("/config approval_mode suggest"));
|
||||
fn approval_policy_no_longer_inlined_in_base_prompt() {
|
||||
let prompt = compose_prompt_with_approval(AppMode::Agent, Personality::Calm);
|
||||
assert!(!prompt.contains("Mode: Agent"));
|
||||
assert!(!prompt.contains("Approval Policy:"));
|
||||
// Constitutional preamble is still present
|
||||
assert!(prompt.contains("You are codewhale"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user