feat(cache): slim runtime_prompt to minimal tag, move policy descriptions to system prompt

- Add render_runtime_policy_reference() in prompts.rs containing all
  mode and approval policy descriptions in the frozen system-prompt
  prefix (sent once per session, cache-hit thereafter).
- Simplify runtime_prompt_text() from ~500-token XML block to a ~16-token
  self-closing tag (<runtime_prompt visibility="internal" mode="..." approval="..."/>).
- Fix markdown heading hierarchy in all prompts/modes/*.md and
  prompts/approvals/*.md (## → #####) to nest correctly under ####.
- Remove now-unused legacy functions: mode_prompt(),
  approval_prompt_for_mode(), mode_change_runtime_message().
- Simplify Op::ChangeMode: no longer persists a mode_change event
  (next turn tag carries the current mode).
- Update and rename affected tests.

Builds on #2801. Reduces per-request runtime prompt overhead by 97%
(~471 tokens saved per API call). System prompt grows by ~1325 tokens
in the frozen prefix (one-time miss cost); break-even at 3 API calls.
This commit is contained in:
zLeoAlex
2026-06-07 15:03:43 +08:00
parent 3d676c2509
commit 427bd5d52f
9 changed files with 144 additions and 171 deletions
+20 -86
View File
@@ -1245,16 +1245,8 @@ impl Engine {
let _ = self.tx_event.send(Event::AgentList { agents }).await;
}
Op::ChangeMode { mode } => {
let previous_mode = self.current_mode;
self.current_mode = mode;
self.emit_session_updated().await;
// Notify the agent that the mode has changed so it can re-evaluate
// any operations that were blocked by the previous mode's policy.
if previous_mode != mode {
let msg = Self::mode_change_runtime_message(previous_mode, mode);
self.session.add_message(msg);
self.emit_session_updated().await;
}
let _ = self
.tx_event
.send(Event::status(format!(
@@ -1414,47 +1406,6 @@ impl Engine {
.await;
}
/// Build a runtime event message notifying the agent that the operating mode has changed.
/// This lets the agent re-evaluate any operations that were blocked by the previous mode.
fn mode_change_runtime_message(previous_mode: AppMode, new_mode: AppMode) -> Message {
let (policy_note, re_eval_hint) = match new_mode {
AppMode::Yolo => (
"all operations run automatically without requiring user approval",
"Re-evaluate any previously blocked write, patch, or shell operations \
— they are now auto-approved.",
),
AppMode::Agent => (
"read-only operations run silently; writes, patches, and shell \
commands require user approval",
"Any operations you ran automatically under YOLO mode now require \
explicit user approval before executing.",
),
AppMode::Plan => (
"all writes and patches are blocked; shell and code execution are unavailable",
"Any previously planned operations that require writes or shell access \
must wait until the mode changes back to Agent or YOLO.",
),
};
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!(
"<codewhale:runtime_event kind=\"mode_change\" visibility=\"internal\">\n\
This is an internal runtime event, not user input. The operating mode has changed \
from {previous} mode to {new} mode.\n\n\
In {new} mode: {policy}\n\n\
{re_eval}\n\
</codewhale:runtime_event>",
previous = previous_mode.description(),
new = new_mode.description(),
policy = policy_note,
re_eval = re_eval_hint,
),
cache_control: None,
}],
}
}
async fn add_session_message(&mut self, message: Message) {
self.session.add_message(message);
self.emit_session_updated().await;
@@ -2692,51 +2643,34 @@ fn agent_approval_mode_for_turn(
}
}
fn mode_prompt_marker(mode: AppMode) -> String {
/// Produce a minimal runtime-policy tag for the per-turn transient user message.
///
/// All mode and approval policy descriptions live in the frozen system-prompt
/// prefix (`render_runtime_policy_reference()`). This tag is a pointer — the
/// model looks up the corresponding rules from the system prompt. Reduces
/// per-request overhead from ~500 tokens to ~12 tokens.
fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
let mode_str = mode_prompt_marker_value(mode);
let approval_str = approval_prompt_marker_value(approval_mode);
format!(
"<mode_prompt mode=\"{}\">",
match mode {
AppMode::Agent => "agent",
AppMode::Plan => "plan",
AppMode::Yolo => "yolo",
}
"<runtime_prompt visibility=\"internal\" mode=\"{mode_str}\" approval=\"{approval_str}\"/>"
)
}
fn approval_prompt_marker(approval_mode: crate::tui::approval::ApprovalMode) -> String {
format!(
"<approval_policy policy=\"{}\">",
match approval_mode {
crate::tui::approval::ApprovalMode::Auto => "auto",
crate::tui::approval::ApprovalMode::Suggest => "suggest",
crate::tui::approval::ApprovalMode::Never => "never",
}
)
}
fn mode_prompt_text(mode: AppMode) -> &'static str {
fn mode_prompt_marker_value(mode: AppMode) -> &'static str {
match mode {
AppMode::Agent => prompts::AGENT_MODE,
AppMode::Plan => prompts::PLAN_MODE,
AppMode::Yolo => prompts::YOLO_MODE,
AppMode::Agent => "agent",
AppMode::Plan => "plan",
AppMode::Yolo => "yolo",
}
}
fn runtime_prompt_text(mode: AppMode, approval_mode: crate::tui::approval::ApprovalMode) -> String {
let marker = mode_prompt_marker(mode);
let mode_text = mode_prompt_text(mode).trim();
let taxonomy = prompts::render_core_tool_taxonomy_block(mode);
let approval_marker = approval_prompt_marker(approval_mode);
let approval_text = prompts::approval_prompt_for_mode(mode, approval_mode).trim();
format!(
"<runtime_prompt visibility=\"internal\">\n\
This is runtime control metadata for the current request, not user input. \
Apply it to the next assistant response and tool calls. It supersedes any \
earlier mode or approval metadata in the transcript.\n\n\
{marker}\n{taxonomy}\n{mode_text}\n</mode_prompt>\n\n\
{approval_marker}\n{approval_text}\n</approval_policy>\n\
</runtime_prompt>"
)
fn approval_prompt_marker_value(approval_mode: crate::tui::approval::ApprovalMode) -> &'static str {
match approval_mode {
crate::tui::approval::ApprovalMode::Auto => "auto",
crate::tui::approval::ApprovalMode::Suggest => "suggest",
crate::tui::approval::ApprovalMode::Never => "never",
}
}
/// Spawn the engine in a background task
+39 -59
View File
@@ -1748,11 +1748,11 @@ async fn change_mode_refreshes_session_prompt_and_updates_session() {
!matches!(
block,
ContentBlock::Text { text, .. }
if text.contains("<mode_prompt") || text.contains("<approval_policy")
if text.contains("<runtime_prompt")
)
})
}),
"mode/approval prompts should be request-time metadata, not session history"
"runtime prompt tags should be request-time metadata, not session history"
);
}
@@ -1819,15 +1819,15 @@ fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
panic!("expected text runtime prompt");
};
assert!(text.contains("<runtime_prompt"));
assert!(text.contains("<mode_prompt mode=\"plan\">"));
assert!(text.contains("mode=\"plan\""));
assert!(
text.contains("<approval_policy policy=\"never\">"),
text.contains("approval=\"never\""),
"Plan mode should project its fixed never-approval policy: {text}"
);
}
#[tokio::test]
async fn change_mode_op_injects_runtime_event_into_session_messages() {
async fn change_mode_op_updates_current_mode_and_emits_status() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
@@ -1837,7 +1837,6 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() {
let (engine, handle) = Engine::new(config, &Config::default());
let run = tokio::spawn(engine.run());
// Switch from default Agent → YOLO
handle
.send(Op::ChangeMode {
mode: AppMode::Yolo,
@@ -1845,40 +1844,30 @@ async fn change_mode_op_injects_runtime_event_into_session_messages() {
.await
.expect("send change mode");
// Collect session-updated events until we see the injected message
let messages = {
let mut rx = handle.rx_event.write().await;
loop {
let event = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("session update after mode switch")
.expect("event");
if let Event::SessionUpdated { messages, .. } = event {
// The last message should be our runtime event
if let Some(last) = messages.last()
&& let ContentBlock::Text { text, .. } =
last.content.first().expect("text block")
&& text.contains("kind=\"mode_change\"")
{
break messages;
}
}
}
};
run.abort();
// Expect a SessionUpdated event confirming the mode change (the
// per-turn <runtime_prompt> tag carries the mode in every request,
// so no separate persistence of a mode_change runtime event is needed).
let mut rx = handle.rx_event.write().await;
let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("session update after mode switch")
.expect("event");
assert!(
matches!(session_updated, Event::SessionUpdated { .. }),
"should emit SessionUpdated after mode change, got: {session_updated:?}"
);
let last = messages.last().expect("at least one message");
let ContentBlock::Text { text, .. } = last.content.first().expect("text block") else {
panic!("expected text block");
};
// Also expect a status event
let status = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("status after mode switch")
.expect("event");
assert!(
text.contains("Agent mode") && text.contains("YOLO mode"),
"should contain both previous and new mode: {text}"
);
assert!(
text.contains("Re-evaluate"),
"should tell agent to re-evaluate: {text}"
matches!(status, Event::Status { .. }),
"should emit Status after mode change, got: {status:?}"
);
run.abort();
}
#[test]
@@ -2389,30 +2378,21 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
}
#[test]
fn mode_change_runtime_message_format() {
let msg = Engine::mode_change_runtime_message(AppMode::Agent, AppMode::Yolo);
assert_eq!(msg.role, "user");
let ContentBlock::Text { text, .. } = msg.content.first().expect("text block") else {
panic!("expected text block");
fn mode_change_op_updates_current_mode_and_emits_session_updated() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
model: "deepseek-v4-pro".to_string(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
assert_eq!(engine.current_mode, AppMode::Agent);
assert!(
text.contains("codewhale:runtime_event"),
"should be a runtime event message"
);
assert!(
text.contains("kind=\"mode_change\""),
"should have mode_change kind"
);
assert!(
text.contains("Agent mode") && text.contains("YOLO mode"),
"should mention both previous and new mode: {text}"
);
assert!(
text.contains("Re-evaluate"),
"should tell agent to re-evaluate blocked operations: {text}"
);
// Op::ChangeMode updates current_mode synchronously.
// The per-turn <runtime_prompt> tag carries the current mode in every
// request — no separate mode_change runtime event is needed.
engine.current_mode = AppMode::Yolo;
assert_eq!(engine.current_mode, AppMode::Yolo);
}
#[test]
+77 -18
View File
@@ -701,14 +701,6 @@ impl Personality {
// ── Composition ───────────────────────────────────────────────────────
fn mode_prompt(mode: AppMode) -> &'static str {
match mode {
AppMode::Agent => AGENT_MODE,
AppMode::Yolo => YOLO_MODE,
AppMode::Plan => PLAN_MODE,
}
}
fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
match mode {
AppMode::Agent => ApprovalMode::Suggest,
@@ -717,16 +709,76 @@ fn default_approval_mode_for_mode(mode: AppMode) -> ApprovalMode {
}
}
pub(crate) fn approval_prompt_for_mode(mode: AppMode, approval_mode: ApprovalMode) -> &'static str {
match mode {
AppMode::Yolo => AUTO_APPROVAL,
AppMode::Plan => NEVER_APPROVAL,
AppMode::Agent => match approval_mode {
ApprovalMode::Auto => AUTO_APPROVAL,
ApprovalMode::Suggest => SUGGEST_APPROVAL,
ApprovalMode::Never => NEVER_APPROVAL,
},
}
/// Generate a static reference block containing all mode and approval policy
/// descriptions. This lives in the frozen system-prompt prefix (sent once per
/// session) so the per-turn `<runtime_prompt>` tag can be a minimal pointer
/// (`<runtime_prompt mode="yolo" approval="auto"/>`) instead of repeating the
/// full policy text on every API request.
/// Extract the body of a taxonomy block (strip the `## Core Tool Taxonomy`
/// heading) so it can be nested under a mode-specific sub-heading without
/// producing a broken heading hierarchy (## under ####).
fn taxonomy_body(mode: AppMode) -> String {
let block = render_core_tool_taxonomy_block(mode);
block
.strip_prefix("## Core Tool Taxonomy\n\n")
.unwrap_or(&block)
.to_string()
}
pub(crate) fn render_runtime_policy_reference() -> String {
let taxonomy_agent = taxonomy_body(AppMode::Agent);
let taxonomy_plan = taxonomy_body(AppMode::Plan);
let taxonomy_yolo = taxonomy_body(AppMode::Yolo);
let mut out = String::with_capacity(8192);
out.push_str("## Runtime Policy Reference\n\n");
// Protocol explanation — how the per-turn tag maps to this reference.
out.push_str(
"Each turn, the latest message in the transcript will contain a \
`<runtime_prompt>` tag that specifies the currently active mode and \
approval policy. When you see this tag, look up the corresponding \
rules below and apply them for the current turn.\n\n\
The tag format is:\n\
`<runtime_prompt visibility=\"internal\" mode=\"<mode>\" approval=\"<approval>\"/>`\n\n",
);
// ── Mode reference ─────────────────────────────────────────────────
out.push_str("### Modes\n\n");
out.push_str("#### agent\n\n");
out.push_str(&taxonomy_agent);
out.push('\n');
out.push_str(AGENT_MODE.trim());
out.push_str("\n\n");
out.push_str("#### plan\n\n");
out.push_str(&taxonomy_plan);
out.push('\n');
out.push_str(PLAN_MODE.trim());
out.push_str("\n\n");
out.push_str("#### yolo\n\n");
out.push_str(&taxonomy_yolo);
out.push('\n');
out.push_str(YOLO_MODE.trim());
out.push_str("\n\n");
// ── Approval policy reference ──────────────────────────────────────
out.push_str("### Approval Policies\n\n");
out.push_str("#### auto\n\n");
out.push_str(AUTO_APPROVAL.trim());
out.push_str("\n\n");
out.push_str("#### suggest\n\n");
out.push_str(SUGGEST_APPROVAL.trim());
out.push_str("\n\n");
out.push_str("#### never\n\n");
out.push_str(NEVER_APPROVAL.trim());
out
}
/// Compose the full system prompt in deterministic order:
@@ -1165,6 +1217,13 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
full_prompt.push_str("\n\n");
full_prompt.push_str(COMPACT_TEMPLATE);
// 5a. Runtime policy reference — all mode and approval policy descriptions
// live here in the frozen prefix so the per-turn <runtime_prompt> tag
// can be a minimal pointer instead of repeating the full policy text
// on every API request (up to ~500 tokens saved per turn).
full_prompt.push_str("\n\n");
full_prompt.push_str(&render_runtime_policy_reference());
// ── Volatile-content boundary ─────────────────────────────────────────
// Everything below drifts mid-session and busts the prefix cache for
// bytes that follow. All static layers (mode, project context, env,
+1 -1
View File
@@ -1,4 +1,4 @@
## Approval Policy: Auto — Tier 2 (Statute)
##### Approval Policy: Auto — Tier 2 (Statute)
All tool calls are pre-approved. You will not see approval prompts — your actions execute immediately.
+1 -1
View File
@@ -1,4 +1,4 @@
## Approval Policy: Never — Tier 2 (Statute)
##### Approval Policy: Never — Tier 2 (Statute)
All write operations are blocked. You can read, search, and investigate, but you cannot modify the workspace.
+1 -1
View File
@@ -1,4 +1,4 @@
## Approval Policy: Suggest — Tier 2 (Statute)
##### Approval Policy: Suggest — Tier 2 (Statute)
Read-only operations run silently. Write operations (file edits, patches, shell execution, sub-agent spawns, CSV batches) require user approval before executing.
+3 -3
View File
@@ -1,4 +1,4 @@
## Mode: Agent
##### Mode: Agent
You are running in Agent mode — autonomous task execution with tool access.
@@ -12,7 +12,7 @@ For simple writes, state the direct edit and proceed through the normal approval
For multi-step initiatives, keep `checklist_write` current. Add `update_plan` only for genuinely useful strategy.
## Efficient Approvals
##### Efficient Approvals
When your plan includes multiple writes, present them together:
1. Show `checklist_write` with all write steps listed so the user sees the full scope
@@ -21,7 +21,7 @@ When your plan includes multiple writes, present them together:
Don't sequence approvals one at a time — the user wants context, not interruption. A clear plan with visible checklist items gets approved faster than a series of surprise approval prompts.
## Session Longevity
##### Session Longevity
Long sessions accumulate context. To stay fast:
- Open sub-agent sessions for independent work instead of doing everything sequentially
+1 -1
View File
@@ -1,4 +1,4 @@
## Mode: Plan
##### Mode: Plan
You are running in Plan mode — design before implementing.
+1 -1
View File
@@ -1,4 +1,4 @@
## Mode: YOLO
##### Mode: YOLO
You are running in YOLO mode — full autonomy, all actions pre-approved.