From a9412d7df8bb5f3c8e98866780973845ba437cf2 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Tue, 12 May 2026 01:35:20 -0500 Subject: [PATCH] perf(prompts): move volatile layers below KV-prefix-cache boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `instructions = [...]` (the per-workspace config-driven block), the user memory file (`/memory`), and the current session goal (`/goal`) were being rendered at position 2.5 in the system prompt — inside the static prefix layer that DeepSeek's KV prefix cache hits. Any edit to those files invalidated every cached byte from that position onward. A `# foo` memory quick-add (or a `/goal` update) on turn 5 meant the engine had to re-tokenize and re-charge the full static suffix — skills block, context management, compact template, environment, ~thousands of tokens — on turn 6. Relocate the three blocks to position 6, immediately above the previous-session handoff block, where the volatile-content boundary already lives. The static prefix above the boundary (mode, project context, env, skills, context management, compact template) now stays cached across turns regardless of how often the user edits their memory file or shifts session goals. Resolved a 3-way merge against the v0.8.32 `translation_enabled` addition (PR #1462). The new translation-output instruction stays at position 2.3a (inside the static prefix layer) because it's a per-session flag — `/translate` is a session toggle, not a turn-by-turn knob, so the prompt-prefix bytes don't drift mid-session. Harvested from PR #1345 by @Duducoco Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 20 +++++++++ crates/tui/src/prompts.rs | 88 +++++++++++++++++++++++---------------- 2 files changed, 71 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb35cffb..bbc1f33f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,26 @@ have to work with?" — and the answer is now closer to "everything you'd reach for from a shell, including the document formats the real world uses." +### Performance + +- **Move `instructions = [...]`, user memory, and session goal + below the prompt's volatile-content boundary so DeepSeek's KV + prefix cache survives mid-session edits** (harvested from PR + #1345 by **@Duducoco**). Before this change, the per-workspace + `instructions` block, the user memory file (`/memory`), and the + current session goal (`/goal`) were rendered at position 2.5 + in the system prompt — inside the static prefix layer that the + cache hits. Any edit to those files (or any `# foo` + quick-add to memory) busted the cached prefix from that byte + onwards, forcing the next turn to re-tokenize the rest of the + static layer. Relocating them to position 6 (immediately above + the previous-session handoff block) means the cache hit covers + the entire static prefix — mode, project context, env, skills, + context management, compact template — regardless of how often + the user edits their memory file. Skills, context management, + and the compact template stay always-cacheable in the static + layer where they belong. + ### Fixed - **Toast stack overlay no longer renders on top of the composer diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 656e291a..4028aa3d 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -613,16 +613,19 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( // 2.25. Environment block — locale, platform, shell, pwd. All // four inputs are session-stable (workspace path is fixed for // the run; locale is loaded once by the caller; platform/shell - // come from process env). Inserted above instructions/skills so - // it remains in the workspace-static cache layer alongside the - // mode prompt and project context. + // come from process env). Inserted above skills so it remains in + // the workspace-static cache layer alongside the mode prompt and + // project context. full_prompt = format!( "{full_prompt}\n\n{}", render_environment_block(workspace, session_context.locale_tag), ); // 2.3a. Translation output instruction — when enabled, instruct - // the model to respond in the resolved session locale. + // the model to respond in the resolved session locale. Stays + // above the volatile-content boundary because it's a per-session + // flag, not a per-turn one: enabling `/translate` is a session + // toggle, so the prompt-prefix bytes don't drift turn-over-turn. if session_context.translation_enabled { full_prompt = format!( "{full_prompt}\n\n{}", @@ -630,35 +633,6 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( ); } - // 2.5a. Configured `instructions = [...]` files (#454). Loaded - // and concatenated in declared order. Lives above the skills - // block so it's part of the workspace-static layer that the KV - // prefix cache can hit, and so per-project overrides apply - // consistently turn-over-turn. - if let Some(paths) = instructions - && let Some(block) = render_instructions_block(paths) - { - full_prompt = format!("{full_prompt}\n\n{block}"); - } - - // 2.5b. User memory block (#489). Goes above skills/context-management - // because it's session-stable: the memory file changes when the user - // edits it via `/memory` or `# foo` quick-add, but not turn-over-turn. - if let Some(memory_block) = session_context.user_memory_block - && !memory_block.trim().is_empty() - { - full_prompt = format!("{full_prompt}\n\n{memory_block}"); - } - - if let Some(goal_objective) = session_context.goal_objective - && !goal_objective.trim().is_empty() - { - full_prompt = format!( - "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n", - goal_objective.trim() - ); - } - // 3. Skills block. #432: walks every candidate workspace // skills directory (`.agents/skills`, `skills`, // `.opencode/skills`, `.claude/skills`, `.cursor/skills`) plus global @@ -701,9 +675,45 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( // ── Volatile-content boundary ───────────────────────────────────────── // Everything below drifts mid-session and busts the prefix cache for - // bytes that follow. Keep new static blocks above this comment. + // bytes that follow. All static layers (mode, project context, env, + // skills, context management, compact template) live above this line + // so DeepSeek's KV prefix cache can hit on the entire system prompt + // regardless of per-session edits to memory, goals, or instructions. - // 6. Previous-session handoff (file-backed, rewritten by `/compact`). + // 6a. Configured `instructions = [...]` files (#454). Loaded + // and concatenated in declared order. Placed below the volatile boundary + // because these files are workspace-scoped and may differ between + // sessions; any edit to them would otherwise bust the prefix cache for + // all subsequent static layers. + if let Some(paths) = instructions + && let Some(block) = render_instructions_block(paths) + { + full_prompt = format!("{full_prompt}\n\n{block}"); + } + + // 6b. User memory block (#489). Placed below the volatile boundary + // because memory entries are editable mid-session via `/memory` or + // `# foo` quick-add. When they change, they only invalidate the + // trailing handoff block — the static prefix above stays cached. + if let Some(memory_block) = session_context.user_memory_block + && !memory_block.trim().is_empty() + { + full_prompt = format!("{full_prompt}\n\n{memory_block}"); + } + + // 6c. Current session goal. Also volatile: users set / change goals + // during a session via `/goal`. Placed below the boundary for the + // same reason as memory. + if let Some(goal_objective) = session_context.goal_objective + && !goal_objective.trim().is_empty() + { + full_prompt = format!( + "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n", + goal_objective.trim() + ); + } + + // 7. Previous-session handoff (file-backed, rewritten by `/compact`). if let Some(handoff_block) = load_handoff_block(workspace) { full_prompt = format!("{full_prompt}\n\n{handoff_block}"); } @@ -1282,7 +1292,7 @@ mod tests { } #[test] - fn session_goal_is_injected_above_handoff_tail() { + fn session_goal_is_injected_below_compact_template() { let tmp = tempdir().expect("tempdir"); let prompt = match system_prompt_for_mode_with_context_skills_and_session( AppMode::Agent, @@ -1306,7 +1316,11 @@ mod tests { let compact_pos = prompt.find("## Compaction Handoff").expect("compact block"); assert!(prompt.contains("Fix transcript corruption")); - assert!(goal_pos < compact_pos); + // Session goal is volatile content — it lives below the + // volatile-content boundary (after the compact template) so + // per-session goal changes don't bust the prefix cache for + // static layers. + assert!(compact_pos < goal_pos); assert!(!prompt.contains("src/lib.rs")); }