perf(engine): stabilize system prompt and move working set metadata

This commit is contained in:
Hunter Bown
2026-05-04 22:06:55 -05:00
parent a14227edf8
commit b48b68f078
6 changed files with 185 additions and 141 deletions
+44 -24
View File
@@ -8,6 +8,8 @@
//! - Tool execution orchestration
use std::collections::HashMap;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::path::PathBuf;
use std::sync::{Arc, Mutex as StdMutex};
use std::time::{Duration, Instant};
@@ -35,8 +37,8 @@ use crate::mcp::McpPool;
#[cfg(test)]
use crate::models::ToolCaller;
use crate::models::{
ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
StreamEvent, SystemPrompt, Tool, Usage,
ContentBlock, ContentBlockStart, Delta, LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, Message,
MessageRequest, StreamEvent, SystemPrompt, Tool, Usage,
};
use crate::prompts;
use crate::seam_manager::{SeamConfig, SeamManager};
@@ -353,8 +355,9 @@ impl Engine {
config.mcp_config_path.clone(),
);
// Set up system prompt with project context (default to agent mode)
let working_set_summary = session.working_set.summary_block(&config.workspace);
// Set up stable system prompt with project context (default to agent mode).
// Per-turn working-set metadata is injected into the latest user
// message at request time so file churn does not rewrite this prefix.
let user_memory_block =
crate::memory::compose_block(config.memory_enabled, &config.memory_path);
let system_prompt = prompts::system_prompt_for_mode_with_context_skills_and_session(
@@ -368,8 +371,9 @@ impl Engine {
goal_objective: config.goal_objective.as_deref(),
},
);
session.system_prompt =
append_working_set_summary(Some(system_prompt), working_set_summary.as_deref());
let stable_prompt = Some(system_prompt);
session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
session.system_prompt = stable_prompt;
let subagent_manager =
new_shared_subagent_manager(config.workspace.clone(), config.max_subagents);
@@ -1645,10 +1649,6 @@ impl Engine {
/// Refresh the system prompt based on current mode and context.
fn refresh_system_prompt(&mut self, mode: AppMode) {
let working_set_summary = self
.session
.working_set
.summary_block(&self.config.workspace);
let user_memory_block =
crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
let base = prompts::system_prompt_for_mode_with_context_skills_and_session(
@@ -1664,8 +1664,11 @@ impl Engine {
);
let stable_prompt =
merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
self.session.system_prompt =
append_working_set_summary(stable_prompt, working_set_summary.as_deref());
let stable_hash = system_prompt_hash(stable_prompt.as_ref());
if self.session.last_system_prompt_hash != Some(stable_hash) {
self.session.system_prompt = stable_prompt;
self.session.last_system_prompt_hash = Some(stable_hash);
}
}
fn merge_compaction_summary(&mut self, summary_prompt: Option<SystemPrompt>) {
@@ -1676,18 +1679,36 @@ impl Engine {
self.session.compaction_summary_prompt.as_ref(),
summary_prompt.clone(),
);
let current_without_working_set =
remove_working_set_summary(self.session.system_prompt.as_ref());
let merged = merge_system_prompts(current_without_working_set.as_ref(), summary_prompt);
let working_set_summary = self
.session
.working_set
.summary_block(&self.config.workspace);
self.session.system_prompt =
append_working_set_summary(merged, working_set_summary.as_deref());
let merged = merge_system_prompts(self.session.system_prompt.as_ref(), summary_prompt);
self.session.last_system_prompt_hash = Some(system_prompt_hash(merged.as_ref()));
self.session.system_prompt = merged;
}
}
fn system_prompt_hash(prompt: Option<&SystemPrompt>) -> u64 {
let mut hasher = DefaultHasher::new();
match prompt {
Some(SystemPrompt::Text(text)) => {
0u8.hash(&mut hasher);
text.hash(&mut hasher);
}
Some(SystemPrompt::Blocks(blocks)) => {
1u8.hash(&mut hasher);
for block in blocks {
block.block_type.hash(&mut hasher);
block.text.hash(&mut hasher);
if let Some(cache_control) = &block.cache_control {
cache_control.cache_type.hash(&mut hasher);
}
}
}
None => {
2u8.hash(&mut hasher);
}
}
hasher.finish()
}
/// Spawn the engine in a background task
pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
let (engine, handle) = Engine::new(config, api_config);
@@ -1775,9 +1796,8 @@ mod context;
pub(crate) use context::compact_tool_result_for_context;
use context::{
COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget,
estimate_input_tokens_conservative, extract_compaction_summary_prompt,
is_context_length_error_message, remove_working_set_summary, summarize_text,
TURN_MAX_OUTPUT_TOKENS, context_input_budget, estimate_input_tokens_conservative,
extract_compaction_summary_prompt, is_context_length_error_message, summarize_text,
turn_response_headroom_tokens,
};
mod dispatch;
+1 -51
View File
@@ -6,7 +6,7 @@
use crate::compaction::estimate_tokens;
use crate::error_taxonomy::ErrorCategory;
use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model};
use crate::models::{Message, SystemPrompt, context_window_for_model};
use crate::tools::spec::ToolResult;
/// Max output tokens requested for normal agent turns. Generous on purpose:
@@ -288,56 +288,6 @@ pub(super) fn extract_compaction_summary_prompt(
}
}
pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
match prompt {
Some(SystemPrompt::Blocks(blocks)) => {
let filtered: Vec<SystemBlock> = blocks
.iter()
.filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
.cloned()
.collect();
if filtered.is_empty() {
None
} else {
Some(SystemPrompt::Blocks(filtered))
}
}
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
None => None,
}
}
pub(super) fn append_working_set_summary(
prompt: Option<SystemPrompt>,
working_set_summary: Option<&str>,
) -> Option<SystemPrompt> {
let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
return prompt;
};
let working_set_block = SystemBlock {
block_type: "text".to_string(),
text: summary.to_string(),
cache_control: None,
};
match prompt {
Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
SystemBlock {
block_type: "text".to_string(),
text,
cache_control: None,
},
working_set_block,
])),
Some(SystemPrompt::Blocks(mut blocks)) => {
blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
blocks.push(working_set_block);
Some(SystemPrompt::Blocks(blocks))
}
None => Some(SystemPrompt::Blocks(vec![working_set_block])),
}
}
fn estimate_text_tokens_conservative(text: &str) -> usize {
text.chars().count().div_ceil(3)
}
+76 -24
View File
@@ -501,7 +501,7 @@ fn subagent_results_are_summarized_before_parent_context_insertion() {
}
#[test]
fn refresh_system_prompt_places_working_set_after_stable_prefix() {
fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
@@ -518,20 +518,74 @@ fn refresh_system_prompt_places_working_set_after_stable_prefix() {
engine.refresh_system_prompt(AppMode::Agent);
let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else {
panic!("expected structured prompt blocks");
};
let last = blocks.last().expect("working-set block");
assert!(last.text.contains(WORKING_SET_SUMMARY_MARKER));
assert!(
blocks[..blocks.len() - 1]
let prompt = match &engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text.clone(),
Some(SystemPrompt::Blocks(blocks)) => blocks
.iter()
.all(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
);
.map(|block| block.text.as_str())
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}
#[test]
fn compaction_summary_stays_before_volatile_working_set() {
fn working_set_reaches_model_as_turn_metadata() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "please inspect src/lib.rs".to_string(),
cache_control: None,
}],
});
let messages = engine.messages_with_turn_metadata();
let first_block = messages
.last()
.and_then(|message| message.content.first())
.expect("turn metadata block");
let ContentBlock::Text { text, .. } = first_block else {
panic!("expected text metadata block");
};
assert!(text.starts_with("<turn_meta>\n"));
assert!(text.contains(WORKING_SET_SUMMARY_MARKER));
assert!(text.contains("src/lib.rs"));
}
#[test]
fn refresh_system_prompt_is_noop_when_unchanged() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.refresh_system_prompt(AppMode::Agent);
let first_hash = engine.session.last_system_prompt_hash;
let first_prompt = engine.session.system_prompt.clone();
engine.refresh_system_prompt(AppMode::Agent);
assert_eq!(engine.session.last_system_prompt_hash, first_hash);
assert_eq!(engine.session.system_prompt, first_prompt);
}
#[test]
fn compaction_summary_stays_in_stable_system_prompt() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/main.rs"), "fn main() {}").expect("write");
@@ -552,20 +606,18 @@ fn compaction_summary_stays_before_volatile_working_set() {
cache_control: None,
}])));
let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else {
panic!("expected structured prompt blocks");
let prompt = match &engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text.clone(),
Some(SystemPrompt::Blocks(blocks)) => blocks
.iter()
.map(|block| block.text.as_str())
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
let summary_index = blocks
.iter()
.position(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
.expect("summary block");
let working_set_index = blocks
.iter()
.position(|block| block.text.contains(WORKING_SET_SUMMARY_MARKER))
.expect("working-set block");
assert!(summary_index < working_set_index);
assert_eq!(working_set_index, blocks.len() - 1);
assert!(prompt.contains(COMPACTION_SUMMARY_MARKER));
assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}
#[tokio::test]
@@ -635,7 +687,7 @@ async fn pre_request_refresh_invoked_when_medium_risk() {
engine.config.model = "deepseek-v3.2-128k".to_string();
let long = "x".repeat(5_000);
for _ in 0..200 {
for _ in 0..900 {
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
+32 -1
View File
@@ -230,7 +230,7 @@ impl Engine {
};
let request = MessageRequest {
model: self.session.model.clone(),
messages: self.session.messages.clone(),
messages: self.messages_with_turn_metadata(),
max_tokens: TURN_MAX_OUTPUT_TOKENS,
system: self.session.system_prompt.clone(),
tools: active_tools.clone(),
@@ -1594,4 +1594,35 @@ impl Engine {
}
(TurnOutcomeStatus::Completed, None)
}
pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
let Some(summary) = self
.session
.working_set
.summary_block(&self.config.workspace)
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
else {
return self.session.messages.clone();
};
let mut messages = self.session.messages.clone();
let Some(last_user) = messages
.iter_mut()
.rev()
.find(|message| message.role == "user")
else {
return messages;
};
let turn_meta = format!("<turn_meta>\n{summary}\n</turn_meta>");
last_user.content.insert(
0,
ContentBlock::Text {
text: turn_meta,
cache_control: None,
},
);
messages
}
}
+4
View File
@@ -25,6 +25,9 @@ pub struct Session {
/// System prompt (optional)
pub system_prompt: Option<SystemPrompt>,
/// Hash of the last assembled stable system prompt. Used to avoid
/// replacing `system_prompt` when unchanged.
pub last_system_prompt_hash: Option<u64>,
/// Persisted summary blocks generated by context compaction.
pub compaction_summary_prompt: Option<SystemPrompt>,
@@ -131,6 +134,7 @@ impl Session {
} else {
None
},
last_system_prompt_hash: None,
working_set: WorkingSet::default(),
cycle_count: 0,
current_cycle_started: Utc::now(),
+28 -41
View File
@@ -254,11 +254,11 @@ pub fn system_prompt_for_mode_with_context(
/// 4. `## Context Management` (compile-time constant, Agent/Yolo only)
/// 5. compaction handoff template (compile-time constant)
/// 6. handoff block — file-backed; rewritten by `/compact` and on exit
/// 7. working-set summary — drifts when a new path is observed
///
/// Anything appended after a volatile block forfeits the cache for the rest
/// of the request. New blocks belong above the handoff/working-set boundary
/// unless they themselves are turn-volatile.
/// of the request. New blocks belong above the handoff boundary unless they
/// themselves are turn-volatile. Working-set metadata is now injected into the
/// latest user message as per-turn metadata instead of this system prompt.
pub fn system_prompt_for_mode_with_context_and_skills(
mode: AppMode,
workspace: &Path,
@@ -283,7 +283,7 @@ pub fn system_prompt_for_mode_with_context_and_skills(
pub fn system_prompt_for_mode_with_context_skills_and_session(
mode: AppMode,
workspace: &Path,
working_set_summary: Option<&str>,
_working_set_summary: Option<&str>,
skills_dir: Option<&Path>,
instructions: Option<&[PathBuf]>,
session_context: PromptSessionContext<'_>,
@@ -360,6 +360,7 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\
### Prompt-cache awareness\n\n\
DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\
- **Working set location:** the current repo working set is injected into the latest user message inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
- **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\
- **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\
- **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\
@@ -382,13 +383,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
full_prompt = format!("{full_prompt}\n\n{handoff_block}");
}
// 7. Working-set summary (drifts when a new path is observed).
if let Some(summary) = working_set_summary
&& !summary.trim().is_empty()
{
full_prompt = format!("{full_prompt}\n\n{summary}");
}
SystemPrompt::Text(full_prompt)
}
@@ -547,7 +541,7 @@ mod tests {
}
#[test]
fn session_goal_is_injected_above_volatile_prompt_tail() {
fn session_goal_is_injected_above_handoff_tail() {
let tmp = tempdir().expect("tempdir");
let prompt = match system_prompt_for_mode_with_context_skills_and_session(
AppMode::Agent,
@@ -566,11 +560,10 @@ mod tests {
let goal_pos = prompt.find("<session_goal>").expect("goal block");
let compact_pos = prompt.find("## Compaction Handoff").expect("compact block");
let working_set_pos = prompt.find("## Repo Working Set").expect("working set");
assert!(prompt.contains("Fix transcript corruption"));
assert!(goal_pos < compact_pos);
assert!(goal_pos < working_set_pos);
assert!(!prompt.contains("src/lib.rs"));
}
#[test]
@@ -729,12 +722,10 @@ mod tests {
}
#[test]
fn system_prompt_with_working_set_summary_is_byte_stable_for_constant_summary() {
// The `working_set_summary` argument is the volatile surface (suspect
// #1 in #263). Independently verifying THIS surface needs a separate
// test in working_set.rs; here we just pin that the surrounding
// prompt construction faithfully embeds whatever summary it's given
// without injecting any non-determinism on its own.
fn system_prompt_ignores_working_set_summary_argument() {
// Working-set metadata is now injected into the latest user message
// per turn. The legacy argument remains for call-site compatibility
// but must not reintroduce volatile bytes into the system prompt.
let tmp = tempdir().expect("tempdir");
let workspace = tmp.path();
let summary = "## Repo Working Set\nWorkspace: /tmp/x\n";
@@ -754,16 +745,18 @@ mod tests {
&a,
&b,
);
assert!(a.contains(summary), "summary must be embedded as-is");
assert!(
!a.contains(summary),
"summary must not be embedded in system prompt"
);
}
#[test]
fn system_prompt_with_handoff_file_is_byte_stable_when_file_is_unchanged() {
// Companion to the working-set stability test: if `.deepseek/handoff.md`
// hasn't moved between two builds, the rendered prompt must produce
// identical bytes. The handoff block is the second volatile surface
// (the first is the working-set summary) — both land below the static
// boundary in `system_prompt_for_mode_with_context_and_skills`.
// If `.deepseek/handoff.md` hasn't moved between two builds, the
// rendered prompt must produce identical bytes. The handoff block
// lands below the static boundary in
// `system_prompt_for_mode_with_context_and_skills`.
let tmp = tempdir().expect("tempdir");
let workspace = tmp.path();
let handoff_dir = workspace.join(".deepseek");
@@ -792,14 +785,11 @@ mod tests {
}
#[test]
fn handoff_and_working_set_appear_after_static_blocks() {
// Cache-prefix invariant: the volatile blocks (handoff, working_set)
// must come *after* the static `## Context Management` and the
// compaction handoff template (`## Compaction Handoff`) so a churn
// in either volatile section doesn't drag the static blocks out of
// the cached prefix. Pre-fix ordering placed handoff between the
// skills block and `## Context Management`, which busted the cache
// every time `/compact` rewrote the file.
fn handoff_appears_after_static_blocks_without_working_set() {
// Cache-prefix invariant: the handoff block must come after static
// `## Context Management` and the compaction handoff template
// (`## Compaction Handoff`). Working-set metadata is per-turn user
// metadata now, not a system-prompt tail block.
let tmp = tempdir().expect("tempdir");
let workspace = tmp.path();
let handoff_dir = workspace.join(".deepseek");
@@ -822,9 +812,10 @@ mod tests {
let handoff_pos = prompt
.find(HANDOFF_BLOCK_MARKER)
.expect("handoff block present when fixture file exists");
let working_set_pos = prompt
.find("## Repo Working Set")
.expect("working-set summary present when supplied");
assert!(
!prompt.contains("## Repo Working Set"),
"working-set summary must stay out of the system prompt"
);
assert!(
context_pos < handoff_pos,
@@ -834,10 +825,6 @@ mod tests {
compact_pos < handoff_pos,
"## Compaction Handoff must precede the handoff block"
);
assert!(
handoff_pos < working_set_pos,
"handoff block must precede the working-set summary (most-volatile last)"
);
}
#[test]