perf(engine): stabilize system prompt and move working set metadata

2026-05-04 22:06:55 -05:00
parent a14227edf8
commit b48b68f078
6 changed files with 185 additions and 141 deletions
@@ -8,6 +8,8 @@
 //! - Tool execution orchestration

 use std::collections::HashMap;
+use std::collections::hash_map::DefaultHasher;
+use std::hash::{Hash, Hasher};
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex as StdMutex};
 use std::time::{Duration, Instant};
@@ -35,8 +37,8 @@ use crate::mcp::McpPool;
 #[cfg(test)]
 use crate::models::ToolCaller;
 use crate::models::{
-    ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
-    StreamEvent, SystemPrompt, Tool, Usage,
+    ContentBlock, ContentBlockStart, Delta, LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, Message,
+    MessageRequest, StreamEvent, SystemPrompt, Tool, Usage,
 };
 use crate::prompts;
 use crate::seam_manager::{SeamConfig, SeamManager};
@@ -353,8 +355,9 @@ impl Engine {
            config.mcp_config_path.clone(),
        );

-        // Set up system prompt with project context (default to agent mode)
-        let working_set_summary = session.working_set.summary_block(&config.workspace);
+        // Set up stable system prompt with project context (default to agent mode).
+        // Per-turn working-set metadata is injected into the latest user
+        // message at request time so file churn does not rewrite this prefix.
        let user_memory_block =
            crate::memory::compose_block(config.memory_enabled, &config.memory_path);
        let system_prompt = prompts::system_prompt_for_mode_with_context_skills_and_session(
@@ -368,8 +371,9 @@ impl Engine {
                goal_objective: config.goal_objective.as_deref(),
            },
        );
-        session.system_prompt =
-            append_working_set_summary(Some(system_prompt), working_set_summary.as_deref());
+        let stable_prompt = Some(system_prompt);
+        session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref()));
+        session.system_prompt = stable_prompt;

        let subagent_manager =
            new_shared_subagent_manager(config.workspace.clone(), config.max_subagents);
@@ -1645,10 +1649,6 @@ impl Engine {

    /// Refresh the system prompt based on current mode and context.
    fn refresh_system_prompt(&mut self, mode: AppMode) {
-        let working_set_summary = self
-            .session
-            .working_set
-            .summary_block(&self.config.workspace);
        let user_memory_block =
            crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path);
        let base = prompts::system_prompt_for_mode_with_context_skills_and_session(
@@ -1664,8 +1664,11 @@ impl Engine {
        );
        let stable_prompt =
            merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone());
-        self.session.system_prompt =
-            append_working_set_summary(stable_prompt, working_set_summary.as_deref());
+        let stable_hash = system_prompt_hash(stable_prompt.as_ref());
+        if self.session.last_system_prompt_hash != Some(stable_hash) {
+            self.session.system_prompt = stable_prompt;
+            self.session.last_system_prompt_hash = Some(stable_hash);
+        }
    }

    fn merge_compaction_summary(&mut self, summary_prompt: Option<SystemPrompt>) {
@@ -1676,18 +1679,36 @@ impl Engine {
            self.session.compaction_summary_prompt.as_ref(),
            summary_prompt.clone(),
        );
-        let current_without_working_set =
-            remove_working_set_summary(self.session.system_prompt.as_ref());
-        let merged = merge_system_prompts(current_without_working_set.as_ref(), summary_prompt);
-        let working_set_summary = self
-            .session
-            .working_set
-            .summary_block(&self.config.workspace);
-        self.session.system_prompt =
-            append_working_set_summary(merged, working_set_summary.as_deref());
+        let merged = merge_system_prompts(self.session.system_prompt.as_ref(), summary_prompt);
+        self.session.last_system_prompt_hash = Some(system_prompt_hash(merged.as_ref()));
+        self.session.system_prompt = merged;
    }
 }

+fn system_prompt_hash(prompt: Option<&SystemPrompt>) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    match prompt {
+        Some(SystemPrompt::Text(text)) => {
+            0u8.hash(&mut hasher);
+            text.hash(&mut hasher);
+        }
+        Some(SystemPrompt::Blocks(blocks)) => {
+            1u8.hash(&mut hasher);
+            for block in blocks {
+                block.block_type.hash(&mut hasher);
+                block.text.hash(&mut hasher);
+                if let Some(cache_control) = &block.cache_control {
+                    cache_control.cache_type.hash(&mut hasher);
+                }
+            }
+        }
+        None => {
+            2u8.hash(&mut hasher);
+        }
+    }
+    hasher.finish()
+}
+
 /// Spawn the engine in a background task
 pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle {
    let (engine, handle) = Engine::new(config, api_config);
@@ -1775,9 +1796,8 @@ mod context;
 pub(crate) use context::compact_tool_result_for_context;
 use context::{
    COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
-    TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget,
-    estimate_input_tokens_conservative, extract_compaction_summary_prompt,
-    is_context_length_error_message, remove_working_set_summary, summarize_text,
+    TURN_MAX_OUTPUT_TOKENS, context_input_budget, estimate_input_tokens_conservative,
+    extract_compaction_summary_prompt, is_context_length_error_message, summarize_text,
    turn_response_headroom_tokens,
 };
 mod dispatch;
@@ -6,7 +6,7 @@

 use crate::compaction::estimate_tokens;
 use crate::error_taxonomy::ErrorCategory;
-use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model};
+use crate::models::{Message, SystemPrompt, context_window_for_model};
 use crate::tools::spec::ToolResult;

 /// Max output tokens requested for normal agent turns. Generous on purpose:
@@ -288,56 +288,6 @@ pub(super) fn extract_compaction_summary_prompt(
    }
 }

-pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
-    match prompt {
-        Some(SystemPrompt::Blocks(blocks)) => {
-            let filtered: Vec<SystemBlock> = blocks
-                .iter()
-                .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
-                .cloned()
-                .collect();
-            if filtered.is_empty() {
-                None
-            } else {
-                Some(SystemPrompt::Blocks(filtered))
-            }
-        }
-        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
-        None => None,
-    }
-}
-
-pub(super) fn append_working_set_summary(
-    prompt: Option<SystemPrompt>,
-    working_set_summary: Option<&str>,
-) -> Option<SystemPrompt> {
-    let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
-        return prompt;
-    };
-    let working_set_block = SystemBlock {
-        block_type: "text".to_string(),
-        text: summary.to_string(),
-        cache_control: None,
-    };
-
-    match prompt {
-        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
-            SystemBlock {
-                block_type: "text".to_string(),
-                text,
-                cache_control: None,
-            },
-            working_set_block,
-        ])),
-        Some(SystemPrompt::Blocks(mut blocks)) => {
-            blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
-            blocks.push(working_set_block);
-            Some(SystemPrompt::Blocks(blocks))
-        }
-        None => Some(SystemPrompt::Blocks(vec![working_set_block])),
-    }
-}
-
 fn estimate_text_tokens_conservative(text: &str) -> usize {
    text.chars().count().div_ceil(3)
 }
@@ -501,7 +501,7 @@ fn subagent_results_are_summarized_before_parent_context_insertion() {
 }

 #[test]
-fn refresh_system_prompt_places_working_set_after_stable_prefix() {
+fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
@@ -518,20 +518,74 @@ fn refresh_system_prompt_places_working_set_after_stable_prefix() {

    engine.refresh_system_prompt(AppMode::Agent);

-    let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else {
-        panic!("expected structured prompt blocks");
-    };
-    let last = blocks.last().expect("working-set block");
-    assert!(last.text.contains(WORKING_SET_SUMMARY_MARKER));
-    assert!(
-        blocks[..blocks.len() - 1]
+    let prompt = match &engine.session.system_prompt {
+        Some(SystemPrompt::Text(text)) => text.clone(),
+        Some(SystemPrompt::Blocks(blocks)) => blocks
            .iter()
-            .all(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
-    );
+            .map(|block| block.text.as_str())
+            .collect::<Vec<_>>()
+            .join("\n"),
+        None => panic!("expected system prompt"),
+    };
+    assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
 }

 #[test]
-fn compaction_summary_stays_before_volatile_working_set() {
+fn working_set_reaches_model_as_turn_metadata() {
+    let tmp = tempdir().expect("tempdir");
+    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
+    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
+
+    let config = EngineConfig {
+        workspace: tmp.path().to_path_buf(),
+        ..Default::default()
+    };
+    let (mut engine, _handle) = Engine::new(config, &Config::default());
+    engine
+        .session
+        .working_set
+        .observe_user_message("please inspect src/lib.rs", tmp.path());
+    engine.session.add_message(Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: "please inspect src/lib.rs".to_string(),
+            cache_control: None,
+        }],
+    });
+
+    let messages = engine.messages_with_turn_metadata();
+    let first_block = messages
+        .last()
+        .and_then(|message| message.content.first())
+        .expect("turn metadata block");
+    let ContentBlock::Text { text, .. } = first_block else {
+        panic!("expected text metadata block");
+    };
+    assert!(text.starts_with("<turn_meta>\n"));
+    assert!(text.contains(WORKING_SET_SUMMARY_MARKER));
+    assert!(text.contains("src/lib.rs"));
+}
+
+#[test]
+fn refresh_system_prompt_is_noop_when_unchanged() {
+    let tmp = tempdir().expect("tempdir");
+    let config = EngineConfig {
+        workspace: tmp.path().to_path_buf(),
+        ..Default::default()
+    };
+    let (mut engine, _handle) = Engine::new(config, &Config::default());
+
+    engine.refresh_system_prompt(AppMode::Agent);
+    let first_hash = engine.session.last_system_prompt_hash;
+    let first_prompt = engine.session.system_prompt.clone();
+    engine.refresh_system_prompt(AppMode::Agent);
+
+    assert_eq!(engine.session.last_system_prompt_hash, first_hash);
+    assert_eq!(engine.session.system_prompt, first_prompt);
+}
+
+#[test]
+fn compaction_summary_stays_in_stable_system_prompt() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/main.rs"), "fn main() {}").expect("write");
@@ -552,20 +606,18 @@ fn compaction_summary_stays_before_volatile_working_set() {
        cache_control: None,
    }])));

-    let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else {
-        panic!("expected structured prompt blocks");
+    let prompt = match &engine.session.system_prompt {
+        Some(SystemPrompt::Text(text)) => text.clone(),
+        Some(SystemPrompt::Blocks(blocks)) => blocks
+            .iter()
+            .map(|block| block.text.as_str())
+            .collect::<Vec<_>>()
+            .join("\n"),
+        None => panic!("expected system prompt"),
    };
-    let summary_index = blocks
-        .iter()
-        .position(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
-        .expect("summary block");
-    let working_set_index = blocks
-        .iter()
-        .position(|block| block.text.contains(WORKING_SET_SUMMARY_MARKER))
-        .expect("working-set block");

-    assert!(summary_index < working_set_index);
-    assert_eq!(working_set_index, blocks.len() - 1);
+    assert!(prompt.contains(COMPACTION_SUMMARY_MARKER));
+    assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
 }

 #[tokio::test]
@@ -635,7 +687,7 @@ async fn pre_request_refresh_invoked_when_medium_risk() {
    engine.config.model = "deepseek-v3.2-128k".to_string();

    let long = "x".repeat(5_000);
-    for _ in 0..200 {
+    for _ in 0..900 {
        engine.session.messages.push(Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
@@ -230,7 +230,7 @@ impl Engine {
            };
            let request = MessageRequest {
                model: self.session.model.clone(),
-                messages: self.session.messages.clone(),
+                messages: self.messages_with_turn_metadata(),
                max_tokens: TURN_MAX_OUTPUT_TOKENS,
                system: self.session.system_prompt.clone(),
                tools: active_tools.clone(),
@@ -1594,4 +1594,35 @@ impl Engine {
        }
        (TurnOutcomeStatus::Completed, None)
    }
+
+    pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
+        let Some(summary) = self
+            .session
+            .working_set
+            .summary_block(&self.config.workspace)
+            .map(|s| s.trim().to_string())
+            .filter(|s| !s.is_empty())
+        else {
+            return self.session.messages.clone();
+        };
+
+        let mut messages = self.session.messages.clone();
+        let Some(last_user) = messages
+            .iter_mut()
+            .rev()
+            .find(|message| message.role == "user")
+        else {
+            return messages;
+        };
+
+        let turn_meta = format!("<turn_meta>\n{summary}\n</turn_meta>");
+        last_user.content.insert(
+            0,
+            ContentBlock::Text {
+                text: turn_meta,
+                cache_control: None,
+            },
+        );
+        messages
+    }
 }
@@ -25,6 +25,9 @@ pub struct Session {

    /// System prompt (optional)
    pub system_prompt: Option<SystemPrompt>,
+    /// Hash of the last assembled stable system prompt. Used to avoid
+    /// replacing `system_prompt` when unchanged.
+    pub last_system_prompt_hash: Option<u64>,
    /// Persisted summary blocks generated by context compaction.
    pub compaction_summary_prompt: Option<SystemPrompt>,

@@ -131,6 +134,7 @@ impl Session {
            } else {
                None
            },
+            last_system_prompt_hash: None,
            working_set: WorkingSet::default(),
            cycle_count: 0,
            current_cycle_started: Utc::now(),
@@ -254,11 +254,11 @@ pub fn system_prompt_for_mode_with_context(
 ///   4. `## Context Management` (compile-time constant, Agent/Yolo only)
 ///   5. compaction handoff template (compile-time constant)
 ///   6. handoff block — file-backed; rewritten by `/compact` and on exit
-///   7. working-set summary — drifts when a new path is observed
 ///
 /// Anything appended after a volatile block forfeits the cache for the rest
-/// of the request. New blocks belong above the handoff/working-set boundary
-/// unless they themselves are turn-volatile.
+/// of the request. New blocks belong above the handoff boundary unless they
+/// themselves are turn-volatile. Working-set metadata is now injected into the
+/// latest user message as per-turn metadata instead of this system prompt.
 pub fn system_prompt_for_mode_with_context_and_skills(
    mode: AppMode,
    workspace: &Path,
@@ -283,7 +283,7 @@ pub fn system_prompt_for_mode_with_context_and_skills(
 pub fn system_prompt_for_mode_with_context_skills_and_session(
    mode: AppMode,
    workspace: &Path,
-    working_set_summary: Option<&str>,
+    _working_set_summary: Option<&str>,
    skills_dir: Option<&Path>,
    instructions: Option<&[PathBuf]>,
    session_context: PromptSessionContext<'_>,
@@ -360,6 +360,7 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
             If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\
             ### Prompt-cache awareness\n\n\
             DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\
+             - **Working set location:** the current repo working set is injected into the latest user message inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
             - **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\
             - **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\
             - **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\
@@ -382,13 +383,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session(
        full_prompt = format!("{full_prompt}\n\n{handoff_block}");
    }

-    // 7. Working-set summary (drifts when a new path is observed).
-    if let Some(summary) = working_set_summary
-        && !summary.trim().is_empty()
-    {
-        full_prompt = format!("{full_prompt}\n\n{summary}");
-    }
-
    SystemPrompt::Text(full_prompt)
 }

@@ -547,7 +541,7 @@ mod tests {
    }

    #[test]
-    fn session_goal_is_injected_above_volatile_prompt_tail() {
+    fn session_goal_is_injected_above_handoff_tail() {
        let tmp = tempdir().expect("tempdir");
        let prompt = match system_prompt_for_mode_with_context_skills_and_session(
            AppMode::Agent,
@@ -566,11 +560,10 @@ mod tests {

        let goal_pos = prompt.find("<session_goal>").expect("goal block");
        let compact_pos = prompt.find("## Compaction Handoff").expect("compact block");
-        let working_set_pos = prompt.find("## Repo Working Set").expect("working set");

        assert!(prompt.contains("Fix transcript corruption"));
        assert!(goal_pos < compact_pos);
-        assert!(goal_pos < working_set_pos);
+        assert!(!prompt.contains("src/lib.rs"));
    }

    #[test]
@@ -729,12 +722,10 @@ mod tests {
    }

    #[test]
-    fn system_prompt_with_working_set_summary_is_byte_stable_for_constant_summary() {
-        // The `working_set_summary` argument is the volatile surface (suspect
-        // #1 in #263). Independently verifying THIS surface needs a separate
-        // test in working_set.rs; here we just pin that the surrounding
-        // prompt construction faithfully embeds whatever summary it's given
-        // without injecting any non-determinism on its own.
+    fn system_prompt_ignores_working_set_summary_argument() {
+        // Working-set metadata is now injected into the latest user message
+        // per turn. The legacy argument remains for call-site compatibility
+        // but must not reintroduce volatile bytes into the system prompt.
        let tmp = tempdir().expect("tempdir");
        let workspace = tmp.path();
        let summary = "## Repo Working Set\nWorkspace: /tmp/x\n";
@@ -754,16 +745,18 @@ mod tests {
            &a,
            &b,
        );
-        assert!(a.contains(summary), "summary must be embedded as-is");
+        assert!(
+            !a.contains(summary),
+            "summary must not be embedded in system prompt"
+        );
    }

    #[test]
    fn system_prompt_with_handoff_file_is_byte_stable_when_file_is_unchanged() {
-        // Companion to the working-set stability test: if `.deepseek/handoff.md`
-        // hasn't moved between two builds, the rendered prompt must produce
-        // identical bytes. The handoff block is the second volatile surface
-        // (the first is the working-set summary) — both land below the static
-        // boundary in `system_prompt_for_mode_with_context_and_skills`.
+        // If `.deepseek/handoff.md` hasn't moved between two builds, the
+        // rendered prompt must produce identical bytes. The handoff block
+        // lands below the static boundary in
+        // `system_prompt_for_mode_with_context_and_skills`.
        let tmp = tempdir().expect("tempdir");
        let workspace = tmp.path();
        let handoff_dir = workspace.join(".deepseek");
@@ -792,14 +785,11 @@ mod tests {
    }

    #[test]
-    fn handoff_and_working_set_appear_after_static_blocks() {
-        // Cache-prefix invariant: the volatile blocks (handoff, working_set)
-        // must come *after* the static `## Context Management` and the
-        // compaction handoff template (`## Compaction Handoff`) so a churn
-        // in either volatile section doesn't drag the static blocks out of
-        // the cached prefix. Pre-fix ordering placed handoff between the
-        // skills block and `## Context Management`, which busted the cache
-        // every time `/compact` rewrote the file.
+    fn handoff_appears_after_static_blocks_without_working_set() {
+        // Cache-prefix invariant: the handoff block must come after static
+        // `## Context Management` and the compaction handoff template
+        // (`## Compaction Handoff`). Working-set metadata is per-turn user
+        // metadata now, not a system-prompt tail block.
        let tmp = tempdir().expect("tempdir");
        let workspace = tmp.path();
        let handoff_dir = workspace.join(".deepseek");
@@ -822,9 +812,10 @@ mod tests {
        let handoff_pos = prompt
            .find(HANDOFF_BLOCK_MARKER)
            .expect("handoff block present when fixture file exists");
-        let working_set_pos = prompt
-            .find("## Repo Working Set")
-            .expect("working-set summary present when supplied");
+        assert!(
+            !prompt.contains("## Repo Working Set"),
+            "working-set summary must stay out of the system prompt"
+        );

        assert!(
            context_pos < handoff_pos,
@@ -834,10 +825,6 @@ mod tests {
            compact_pos < handoff_pos,
            "## Compaction Handoff must precede the handoff block"
        );
-        assert!(
-            handoff_pos < working_set_pos,
-            "handoff block must precede the working-set summary (most-volatile last)"
-        );
    }

    #[test]