diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index ff053451..aeeb6fb4 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -8,6 +8,8 @@ //! - Tool execution orchestration use std::collections::HashMap; +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; use std::path::PathBuf; use std::sync::{Arc, Mutex as StdMutex}; use std::time::{Duration, Instant}; @@ -35,8 +37,8 @@ use crate::mcp::McpPool; #[cfg(test)] use crate::models::ToolCaller; use crate::models::{ - ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest, - StreamEvent, SystemPrompt, Tool, Usage, + ContentBlock, ContentBlockStart, Delta, LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS, Message, + MessageRequest, StreamEvent, SystemPrompt, Tool, Usage, }; use crate::prompts; use crate::seam_manager::{SeamConfig, SeamManager}; @@ -353,8 +355,9 @@ impl Engine { config.mcp_config_path.clone(), ); - // Set up system prompt with project context (default to agent mode) - let working_set_summary = session.working_set.summary_block(&config.workspace); + // Set up stable system prompt with project context (default to agent mode). + // Per-turn working-set metadata is injected into the latest user + // message at request time so file churn does not rewrite this prefix. let user_memory_block = crate::memory::compose_block(config.memory_enabled, &config.memory_path); let system_prompt = prompts::system_prompt_for_mode_with_context_skills_and_session( @@ -368,8 +371,9 @@ impl Engine { goal_objective: config.goal_objective.as_deref(), }, ); - session.system_prompt = - append_working_set_summary(Some(system_prompt), working_set_summary.as_deref()); + let stable_prompt = Some(system_prompt); + session.last_system_prompt_hash = Some(system_prompt_hash(stable_prompt.as_ref())); + session.system_prompt = stable_prompt; let subagent_manager = new_shared_subagent_manager(config.workspace.clone(), config.max_subagents); @@ -1645,10 +1649,6 @@ impl Engine { /// Refresh the system prompt based on current mode and context. fn refresh_system_prompt(&mut self, mode: AppMode) { - let working_set_summary = self - .session - .working_set - .summary_block(&self.config.workspace); let user_memory_block = crate::memory::compose_block(self.config.memory_enabled, &self.config.memory_path); let base = prompts::system_prompt_for_mode_with_context_skills_and_session( @@ -1664,8 +1664,11 @@ impl Engine { ); let stable_prompt = merge_system_prompts(Some(&base), self.session.compaction_summary_prompt.clone()); - self.session.system_prompt = - append_working_set_summary(stable_prompt, working_set_summary.as_deref()); + let stable_hash = system_prompt_hash(stable_prompt.as_ref()); + if self.session.last_system_prompt_hash != Some(stable_hash) { + self.session.system_prompt = stable_prompt; + self.session.last_system_prompt_hash = Some(stable_hash); + } } fn merge_compaction_summary(&mut self, summary_prompt: Option) { @@ -1676,18 +1679,36 @@ impl Engine { self.session.compaction_summary_prompt.as_ref(), summary_prompt.clone(), ); - let current_without_working_set = - remove_working_set_summary(self.session.system_prompt.as_ref()); - let merged = merge_system_prompts(current_without_working_set.as_ref(), summary_prompt); - let working_set_summary = self - .session - .working_set - .summary_block(&self.config.workspace); - self.session.system_prompt = - append_working_set_summary(merged, working_set_summary.as_deref()); + let merged = merge_system_prompts(self.session.system_prompt.as_ref(), summary_prompt); + self.session.last_system_prompt_hash = Some(system_prompt_hash(merged.as_ref())); + self.session.system_prompt = merged; } } +fn system_prompt_hash(prompt: Option<&SystemPrompt>) -> u64 { + let mut hasher = DefaultHasher::new(); + match prompt { + Some(SystemPrompt::Text(text)) => { + 0u8.hash(&mut hasher); + text.hash(&mut hasher); + } + Some(SystemPrompt::Blocks(blocks)) => { + 1u8.hash(&mut hasher); + for block in blocks { + block.block_type.hash(&mut hasher); + block.text.hash(&mut hasher); + if let Some(cache_control) = &block.cache_control { + cache_control.cache_type.hash(&mut hasher); + } + } + } + None => { + 2u8.hash(&mut hasher); + } + } + hasher.finish() +} + /// Spawn the engine in a background task pub fn spawn_engine(config: EngineConfig, api_config: &Config) -> EngineHandle { let (engine, handle) = Engine::new(config, api_config); @@ -1775,9 +1796,8 @@ mod context; pub(crate) use context::compact_tool_result_for_context; use context::{ COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP, - TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget, - estimate_input_tokens_conservative, extract_compaction_summary_prompt, - is_context_length_error_message, remove_working_set_summary, summarize_text, + TURN_MAX_OUTPUT_TOKENS, context_input_budget, estimate_input_tokens_conservative, + extract_compaction_summary_prompt, is_context_length_error_message, summarize_text, turn_response_headroom_tokens, }; mod dispatch; diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs index 7e80008b..81f053e0 100644 --- a/crates/tui/src/core/engine/context.rs +++ b/crates/tui/src/core/engine/context.rs @@ -6,7 +6,7 @@ use crate::compaction::estimate_tokens; use crate::error_taxonomy::ErrorCategory; -use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model}; +use crate::models::{Message, SystemPrompt, context_window_for_model}; use crate::tools::spec::ToolResult; /// Max output tokens requested for normal agent turns. Generous on purpose: @@ -288,56 +288,6 @@ pub(super) fn extract_compaction_summary_prompt( } } -pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option { - match prompt { - Some(SystemPrompt::Blocks(blocks)) => { - let filtered: Vec = blocks - .iter() - .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)) - .cloned() - .collect(); - if filtered.is_empty() { - None - } else { - Some(SystemPrompt::Blocks(filtered)) - } - } - Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())), - None => None, - } -} - -pub(super) fn append_working_set_summary( - prompt: Option, - working_set_summary: Option<&str>, -) -> Option { - let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else { - return prompt; - }; - let working_set_block = SystemBlock { - block_type: "text".to_string(), - text: summary.to_string(), - cache_control: None, - }; - - match prompt { - Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![ - SystemBlock { - block_type: "text".to_string(), - text, - cache_control: None, - }, - working_set_block, - ])), - Some(SystemPrompt::Blocks(mut blocks)) => { - blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)); - blocks.push(working_set_block); - Some(SystemPrompt::Blocks(blocks)) - } - None => Some(SystemPrompt::Blocks(vec![working_set_block])), - } -} - fn estimate_text_tokens_conservative(text: &str) -> usize { text.chars().count().div_ceil(3) } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 1e5e0349..8a9751e7 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -501,7 +501,7 @@ fn subagent_results_are_summarized_before_parent_context_insertion() { } #[test] -fn refresh_system_prompt_places_working_set_after_stable_prefix() { +fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() { let tmp = tempdir().expect("tempdir"); fs::create_dir_all(tmp.path().join("src")).expect("mkdir"); fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write"); @@ -518,20 +518,74 @@ fn refresh_system_prompt_places_working_set_after_stable_prefix() { engine.refresh_system_prompt(AppMode::Agent); - let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else { - panic!("expected structured prompt blocks"); - }; - let last = blocks.last().expect("working-set block"); - assert!(last.text.contains(WORKING_SET_SUMMARY_MARKER)); - assert!( - blocks[..blocks.len() - 1] + let prompt = match &engine.session.system_prompt { + Some(SystemPrompt::Text(text)) => text.clone(), + Some(SystemPrompt::Blocks(blocks)) => blocks .iter() - .all(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)) - ); + .map(|block| block.text.as_str()) + .collect::>() + .join("\n"), + None => panic!("expected system prompt"), + }; + assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER)); } #[test] -fn compaction_summary_stays_before_volatile_working_set() { +fn working_set_reaches_model_as_turn_metadata() { + let tmp = tempdir().expect("tempdir"); + fs::create_dir_all(tmp.path().join("src")).expect("mkdir"); + fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write"); + + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + ..Default::default() + }; + let (mut engine, _handle) = Engine::new(config, &Config::default()); + engine + .session + .working_set + .observe_user_message("please inspect src/lib.rs", tmp.path()); + engine.session.add_message(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "please inspect src/lib.rs".to_string(), + cache_control: None, + }], + }); + + let messages = engine.messages_with_turn_metadata(); + let first_block = messages + .last() + .and_then(|message| message.content.first()) + .expect("turn metadata block"); + let ContentBlock::Text { text, .. } = first_block else { + panic!("expected text metadata block"); + }; + assert!(text.starts_with("\n")); + assert!(text.contains(WORKING_SET_SUMMARY_MARKER)); + assert!(text.contains("src/lib.rs")); +} + +#[test] +fn refresh_system_prompt_is_noop_when_unchanged() { + let tmp = tempdir().expect("tempdir"); + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + ..Default::default() + }; + let (mut engine, _handle) = Engine::new(config, &Config::default()); + + engine.refresh_system_prompt(AppMode::Agent); + let first_hash = engine.session.last_system_prompt_hash; + let first_prompt = engine.session.system_prompt.clone(); + engine.refresh_system_prompt(AppMode::Agent); + + assert_eq!(engine.session.last_system_prompt_hash, first_hash); + assert_eq!(engine.session.system_prompt, first_prompt); +} + +#[test] +fn compaction_summary_stays_in_stable_system_prompt() { let tmp = tempdir().expect("tempdir"); fs::create_dir_all(tmp.path().join("src")).expect("mkdir"); fs::write(tmp.path().join("src/main.rs"), "fn main() {}").expect("write"); @@ -552,20 +606,18 @@ fn compaction_summary_stays_before_volatile_working_set() { cache_control: None, }]))); - let Some(SystemPrompt::Blocks(blocks)) = &engine.session.system_prompt else { - panic!("expected structured prompt blocks"); + let prompt = match &engine.session.system_prompt { + Some(SystemPrompt::Text(text)) => text.clone(), + Some(SystemPrompt::Blocks(blocks)) => blocks + .iter() + .map(|block| block.text.as_str()) + .collect::>() + .join("\n"), + None => panic!("expected system prompt"), }; - let summary_index = blocks - .iter() - .position(|block| block.text.contains(COMPACTION_SUMMARY_MARKER)) - .expect("summary block"); - let working_set_index = blocks - .iter() - .position(|block| block.text.contains(WORKING_SET_SUMMARY_MARKER)) - .expect("working-set block"); - assert!(summary_index < working_set_index); - assert_eq!(working_set_index, blocks.len() - 1); + assert!(prompt.contains(COMPACTION_SUMMARY_MARKER)); + assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER)); } #[tokio::test] @@ -635,7 +687,7 @@ async fn pre_request_refresh_invoked_when_medium_risk() { engine.config.model = "deepseek-v3.2-128k".to_string(); let long = "x".repeat(5_000); - for _ in 0..200 { + for _ in 0..900 { engine.session.messages.push(Message { role: "user".to_string(), content: vec![ContentBlock::Text { diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 603309d0..2220dc9a 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -230,7 +230,7 @@ impl Engine { }; let request = MessageRequest { model: self.session.model.clone(), - messages: self.session.messages.clone(), + messages: self.messages_with_turn_metadata(), max_tokens: TURN_MAX_OUTPUT_TOKENS, system: self.session.system_prompt.clone(), tools: active_tools.clone(), @@ -1594,4 +1594,35 @@ impl Engine { } (TurnOutcomeStatus::Completed, None) } + + pub(super) fn messages_with_turn_metadata(&self) -> Vec { + let Some(summary) = self + .session + .working_set + .summary_block(&self.config.workspace) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + else { + return self.session.messages.clone(); + }; + + let mut messages = self.session.messages.clone(); + let Some(last_user) = messages + .iter_mut() + .rev() + .find(|message| message.role == "user") + else { + return messages; + }; + + let turn_meta = format!("\n{summary}\n"); + last_user.content.insert( + 0, + ContentBlock::Text { + text: turn_meta, + cache_control: None, + }, + ); + messages + } } diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index 94791c40..347b1fa5 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -25,6 +25,9 @@ pub struct Session { /// System prompt (optional) pub system_prompt: Option, + /// Hash of the last assembled stable system prompt. Used to avoid + /// replacing `system_prompt` when unchanged. + pub last_system_prompt_hash: Option, /// Persisted summary blocks generated by context compaction. pub compaction_summary_prompt: Option, @@ -131,6 +134,7 @@ impl Session { } else { None }, + last_system_prompt_hash: None, working_set: WorkingSet::default(), cycle_count: 0, current_cycle_started: Utc::now(), diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 9b2b2ecc..eec2646f 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -254,11 +254,11 @@ pub fn system_prompt_for_mode_with_context( /// 4. `## Context Management` (compile-time constant, Agent/Yolo only) /// 5. compaction handoff template (compile-time constant) /// 6. handoff block — file-backed; rewritten by `/compact` and on exit -/// 7. working-set summary — drifts when a new path is observed /// /// Anything appended after a volatile block forfeits the cache for the rest -/// of the request. New blocks belong above the handoff/working-set boundary -/// unless they themselves are turn-volatile. +/// of the request. New blocks belong above the handoff boundary unless they +/// themselves are turn-volatile. Working-set metadata is now injected into the +/// latest user message as per-turn metadata instead of this system prompt. pub fn system_prompt_for_mode_with_context_and_skills( mode: AppMode, workspace: &Path, @@ -283,7 +283,7 @@ pub fn system_prompt_for_mode_with_context_and_skills( pub fn system_prompt_for_mode_with_context_skills_and_session( mode: AppMode, workspace: &Path, - working_set_summary: Option<&str>, + _working_set_summary: Option<&str>, skills_dir: Option<&Path>, instructions: Option<&[PathBuf]>, session_context: PromptSessionContext<'_>, @@ -360,6 +360,7 @@ pub fn system_prompt_for_mode_with_context_skills_and_session( If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\ ### Prompt-cache awareness\n\n\ DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\ + - **Working set location:** the current repo working set is injected into the latest user message inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\ - **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\ - **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\ - **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\ @@ -382,13 +383,6 @@ pub fn system_prompt_for_mode_with_context_skills_and_session( full_prompt = format!("{full_prompt}\n\n{handoff_block}"); } - // 7. Working-set summary (drifts when a new path is observed). - if let Some(summary) = working_set_summary - && !summary.trim().is_empty() - { - full_prompt = format!("{full_prompt}\n\n{summary}"); - } - SystemPrompt::Text(full_prompt) } @@ -547,7 +541,7 @@ mod tests { } #[test] - fn session_goal_is_injected_above_volatile_prompt_tail() { + fn session_goal_is_injected_above_handoff_tail() { let tmp = tempdir().expect("tempdir"); let prompt = match system_prompt_for_mode_with_context_skills_and_session( AppMode::Agent, @@ -566,11 +560,10 @@ mod tests { let goal_pos = prompt.find("").expect("goal block"); let compact_pos = prompt.find("## Compaction Handoff").expect("compact block"); - let working_set_pos = prompt.find("## Repo Working Set").expect("working set"); assert!(prompt.contains("Fix transcript corruption")); assert!(goal_pos < compact_pos); - assert!(goal_pos < working_set_pos); + assert!(!prompt.contains("src/lib.rs")); } #[test] @@ -729,12 +722,10 @@ mod tests { } #[test] - fn system_prompt_with_working_set_summary_is_byte_stable_for_constant_summary() { - // The `working_set_summary` argument is the volatile surface (suspect - // #1 in #263). Independently verifying THIS surface needs a separate - // test in working_set.rs; here we just pin that the surrounding - // prompt construction faithfully embeds whatever summary it's given - // without injecting any non-determinism on its own. + fn system_prompt_ignores_working_set_summary_argument() { + // Working-set metadata is now injected into the latest user message + // per turn. The legacy argument remains for call-site compatibility + // but must not reintroduce volatile bytes into the system prompt. let tmp = tempdir().expect("tempdir"); let workspace = tmp.path(); let summary = "## Repo Working Set\nWorkspace: /tmp/x\n"; @@ -754,16 +745,18 @@ mod tests { &a, &b, ); - assert!(a.contains(summary), "summary must be embedded as-is"); + assert!( + !a.contains(summary), + "summary must not be embedded in system prompt" + ); } #[test] fn system_prompt_with_handoff_file_is_byte_stable_when_file_is_unchanged() { - // Companion to the working-set stability test: if `.deepseek/handoff.md` - // hasn't moved between two builds, the rendered prompt must produce - // identical bytes. The handoff block is the second volatile surface - // (the first is the working-set summary) — both land below the static - // boundary in `system_prompt_for_mode_with_context_and_skills`. + // If `.deepseek/handoff.md` hasn't moved between two builds, the + // rendered prompt must produce identical bytes. The handoff block + // lands below the static boundary in + // `system_prompt_for_mode_with_context_and_skills`. let tmp = tempdir().expect("tempdir"); let workspace = tmp.path(); let handoff_dir = workspace.join(".deepseek"); @@ -792,14 +785,11 @@ mod tests { } #[test] - fn handoff_and_working_set_appear_after_static_blocks() { - // Cache-prefix invariant: the volatile blocks (handoff, working_set) - // must come *after* the static `## Context Management` and the - // compaction handoff template (`## Compaction Handoff`) so a churn - // in either volatile section doesn't drag the static blocks out of - // the cached prefix. Pre-fix ordering placed handoff between the - // skills block and `## Context Management`, which busted the cache - // every time `/compact` rewrote the file. + fn handoff_appears_after_static_blocks_without_working_set() { + // Cache-prefix invariant: the handoff block must come after static + // `## Context Management` and the compaction handoff template + // (`## Compaction Handoff`). Working-set metadata is per-turn user + // metadata now, not a system-prompt tail block. let tmp = tempdir().expect("tempdir"); let workspace = tmp.path(); let handoff_dir = workspace.join(".deepseek"); @@ -822,9 +812,10 @@ mod tests { let handoff_pos = prompt .find(HANDOFF_BLOCK_MARKER) .expect("handoff block present when fixture file exists"); - let working_set_pos = prompt - .find("## Repo Working Set") - .expect("working-set summary present when supplied"); + assert!( + !prompt.contains("## Repo Working Set"), + "working-set summary must stay out of the system prompt" + ); assert!( context_pos < handoff_pos, @@ -834,10 +825,6 @@ mod tests { compact_pos < handoff_pos, "## Compaction Handoff must precede the handoff block" ); - assert!( - handoff_pos < working_set_pos, - "handoff block must precede the working-set summary (most-volatile last)" - ); } #[test]