diff --git a/CHANGELOG.md b/CHANGELOG.md index bb35cffb..bbc1f33f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,26 @@ have to work with?" — and the answer is now closer to "everything you'd reach for from a shell, including the document formats the real world uses." +### Performance + +- **Move `instructions = [...]`, user memory, and session goal + below the prompt's volatile-content boundary so DeepSeek's KV + prefix cache survives mid-session edits** (harvested from PR + #1345 by **@Duducoco**). Before this change, the per-workspace + `instructions` block, the user memory file (`/memory`), and the + current session goal (`/goal`) were rendered at position 2.5 + in the system prompt — inside the static prefix layer that the + cache hits. Any edit to those files (or any `# foo` + quick-add to memory) busted the cached prefix from that byte + onwards, forcing the next turn to re-tokenize the rest of the + static layer. Relocating them to position 6 (immediately above + the previous-session handoff block) means the cache hit covers + the entire static prefix — mode, project context, env, skills, + context management, compact template — regardless of how often + the user edits their memory file. Skills, context management, + and the compact template stay always-cacheable in the static + layer where they belong. + ### Fixed - **Toast stack overlay no longer renders on top of the composer diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 656e291a..4028aa3d 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -613,16 +613,19 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( // 2.25. Environment block — locale, platform, shell, pwd. All // four inputs are session-stable (workspace path is fixed for // the run; locale is loaded once by the caller; platform/shell - // come from process env). Inserted above instructions/skills so - // it remains in the workspace-static cache layer alongside the - // mode prompt and project context. + // come from process env). Inserted above skills so it remains in + // the workspace-static cache layer alongside the mode prompt and + // project context. full_prompt = format!( "{full_prompt}\n\n{}", render_environment_block(workspace, session_context.locale_tag), ); // 2.3a. Translation output instruction — when enabled, instruct - // the model to respond in the resolved session locale. + // the model to respond in the resolved session locale. Stays + // above the volatile-content boundary because it's a per-session + // flag, not a per-turn one: enabling `/translate` is a session + // toggle, so the prompt-prefix bytes don't drift turn-over-turn. if session_context.translation_enabled { full_prompt = format!( "{full_prompt}\n\n{}", @@ -630,35 +633,6 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( ); } - // 2.5a. Configured `instructions = [...]` files (#454). Loaded - // and concatenated in declared order. Lives above the skills - // block so it's part of the workspace-static layer that the KV - // prefix cache can hit, and so per-project overrides apply - // consistently turn-over-turn. - if let Some(paths) = instructions - && let Some(block) = render_instructions_block(paths) - { - full_prompt = format!("{full_prompt}\n\n{block}"); - } - - // 2.5b. User memory block (#489). Goes above skills/context-management - // because it's session-stable: the memory file changes when the user - // edits it via `/memory` or `# foo` quick-add, but not turn-over-turn. - if let Some(memory_block) = session_context.user_memory_block - && !memory_block.trim().is_empty() - { - full_prompt = format!("{full_prompt}\n\n{memory_block}"); - } - - if let Some(goal_objective) = session_context.goal_objective - && !goal_objective.trim().is_empty() - { - full_prompt = format!( - "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n", - goal_objective.trim() - ); - } - // 3. Skills block. #432: walks every candidate workspace // skills directory (`.agents/skills`, `skills`, // `.opencode/skills`, `.claude/skills`, `.cursor/skills`) plus global @@ -701,9 +675,45 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( // ── Volatile-content boundary ───────────────────────────────────────── // Everything below drifts mid-session and busts the prefix cache for - // bytes that follow. Keep new static blocks above this comment. + // bytes that follow. All static layers (mode, project context, env, + // skills, context management, compact template) live above this line + // so DeepSeek's KV prefix cache can hit on the entire system prompt + // regardless of per-session edits to memory, goals, or instructions. - // 6. Previous-session handoff (file-backed, rewritten by `/compact`). + // 6a. Configured `instructions = [...]` files (#454). Loaded + // and concatenated in declared order. Placed below the volatile boundary + // because these files are workspace-scoped and may differ between + // sessions; any edit to them would otherwise bust the prefix cache for + // all subsequent static layers. + if let Some(paths) = instructions + && let Some(block) = render_instructions_block(paths) + { + full_prompt = format!("{full_prompt}\n\n{block}"); + } + + // 6b. User memory block (#489). Placed below the volatile boundary + // because memory entries are editable mid-session via `/memory` or + // `# foo` quick-add. When they change, they only invalidate the + // trailing handoff block — the static prefix above stays cached. + if let Some(memory_block) = session_context.user_memory_block + && !memory_block.trim().is_empty() + { + full_prompt = format!("{full_prompt}\n\n{memory_block}"); + } + + // 6c. Current session goal. Also volatile: users set / change goals + // during a session via `/goal`. Placed below the boundary for the + // same reason as memory. + if let Some(goal_objective) = session_context.goal_objective + && !goal_objective.trim().is_empty() + { + full_prompt = format!( + "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n", + goal_objective.trim() + ); + } + + // 7. Previous-session handoff (file-backed, rewritten by `/compact`). if let Some(handoff_block) = load_handoff_block(workspace) { full_prompt = format!("{full_prompt}\n\n{handoff_block}"); } @@ -1282,7 +1292,7 @@ mod tests { } #[test] - fn session_goal_is_injected_above_handoff_tail() { + fn session_goal_is_injected_below_compact_template() { let tmp = tempdir().expect("tempdir"); let prompt = match system_prompt_for_mode_with_context_skills_and_session( AppMode::Agent, @@ -1306,7 +1316,11 @@ mod tests { let compact_pos = prompt.find("## Compaction Handoff").expect("compact block"); assert!(prompt.contains("Fix transcript corruption")); - assert!(goal_pos < compact_pos); + // Session goal is volatile content — it lives below the + // volatile-content boundary (after the compact template) so + // per-session goal changes don't bust the prefix cache for + // static layers. + assert!(compact_pos < goal_pos); assert!(!prompt.contains("src/lib.rs")); }