diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb35cffb..bbc1f33f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,26 @@ have to work with?" — and the answer is now closer to "everything
you'd reach for from a shell, including the document formats the
real world uses."
+### Performance
+
+- **Move `instructions = [...]`, user memory, and session goal
+ below the prompt's volatile-content boundary so DeepSeek's KV
+ prefix cache survives mid-session edits** (harvested from PR
+ #1345 by **@Duducoco**). Before this change, the per-workspace
+ `instructions` block, the user memory file (`/memory`), and the
+ current session goal (`/goal`) were rendered at position 2.5
+ in the system prompt — inside the static prefix layer that the
+ cache hits. Any edit to those files (or any `# foo`
+ quick-add to memory) busted the cached prefix from that byte
+ onwards, forcing the next turn to re-tokenize the rest of the
+ static layer. Relocating them to position 6 (immediately above
+ the previous-session handoff block) means the cache hit covers
+ the entire static prefix — mode, project context, env, skills,
+ context management, compact template — regardless of how often
+ the user edits their memory file. Skills, context management,
+ and the compact template stay always-cacheable in the static
+ layer where they belong.
+
### Fixed
- **Toast stack overlay no longer renders on top of the composer
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 656e291a..4028aa3d 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -613,16 +613,19 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
// 2.25. Environment block — locale, platform, shell, pwd. All
// four inputs are session-stable (workspace path is fixed for
// the run; locale is loaded once by the caller; platform/shell
- // come from process env). Inserted above instructions/skills so
- // it remains in the workspace-static cache layer alongside the
- // mode prompt and project context.
+ // come from process env). Inserted above skills so it remains in
+ // the workspace-static cache layer alongside the mode prompt and
+ // project context.
full_prompt = format!(
"{full_prompt}\n\n{}",
render_environment_block(workspace, session_context.locale_tag),
);
// 2.3a. Translation output instruction — when enabled, instruct
- // the model to respond in the resolved session locale.
+ // the model to respond in the resolved session locale. Stays
+ // above the volatile-content boundary because it's a per-session
+ // flag, not a per-turn one: enabling `/translate` is a session
+ // toggle, so the prompt-prefix bytes don't drift turn-over-turn.
if session_context.translation_enabled {
full_prompt = format!(
"{full_prompt}\n\n{}",
@@ -630,35 +633,6 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
);
}
- // 2.5a. Configured `instructions = [...]` files (#454). Loaded
- // and concatenated in declared order. Lives above the skills
- // block so it's part of the workspace-static layer that the KV
- // prefix cache can hit, and so per-project overrides apply
- // consistently turn-over-turn.
- if let Some(paths) = instructions
- && let Some(block) = render_instructions_block(paths)
- {
- full_prompt = format!("{full_prompt}\n\n{block}");
- }
-
- // 2.5b. User memory block (#489). Goes above skills/context-management
- // because it's session-stable: the memory file changes when the user
- // edits it via `/memory` or `# foo` quick-add, but not turn-over-turn.
- if let Some(memory_block) = session_context.user_memory_block
- && !memory_block.trim().is_empty()
- {
- full_prompt = format!("{full_prompt}\n\n{memory_block}");
- }
-
- if let Some(goal_objective) = session_context.goal_objective
- && !goal_objective.trim().is_empty()
- {
- full_prompt = format!(
- "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n",
- goal_objective.trim()
- );
- }
-
// 3. Skills block. #432: walks every candidate workspace
// skills directory (`.agents/skills`, `skills`,
// `.opencode/skills`, `.claude/skills`, `.cursor/skills`) plus global
@@ -701,9 +675,45 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
// ── Volatile-content boundary ─────────────────────────────────────────
// Everything below drifts mid-session and busts the prefix cache for
- // bytes that follow. Keep new static blocks above this comment.
+ // bytes that follow. All static layers (mode, project context, env,
+ // skills, context management, compact template) live above this line
+ // so DeepSeek's KV prefix cache can hit on the entire system prompt
+ // regardless of per-session edits to memory, goals, or instructions.
- // 6. Previous-session handoff (file-backed, rewritten by `/compact`).
+ // 6a. Configured `instructions = [...]` files (#454). Loaded
+ // and concatenated in declared order. Placed below the volatile boundary
+ // because these files are workspace-scoped and may differ between
+ // sessions; any edit to them would otherwise bust the prefix cache for
+ // all subsequent static layers.
+ if let Some(paths) = instructions
+ && let Some(block) = render_instructions_block(paths)
+ {
+ full_prompt = format!("{full_prompt}\n\n{block}");
+ }
+
+ // 6b. User memory block (#489). Placed below the volatile boundary
+ // because memory entries are editable mid-session via `/memory` or
+ // `# foo` quick-add. When they change, they only invalidate the
+ // trailing handoff block — the static prefix above stays cached.
+ if let Some(memory_block) = session_context.user_memory_block
+ && !memory_block.trim().is_empty()
+ {
+ full_prompt = format!("{full_prompt}\n\n{memory_block}");
+ }
+
+ // 6c. Current session goal. Also volatile: users set / change goals
+ // during a session via `/goal`. Placed below the boundary for the
+ // same reason as memory.
+ if let Some(goal_objective) = session_context.goal_objective
+ && !goal_objective.trim().is_empty()
+ {
+ full_prompt = format!(
+ "{full_prompt}\n\n## Current Session Goal\n\n\n{}\n",
+ goal_objective.trim()
+ );
+ }
+
+ // 7. Previous-session handoff (file-backed, rewritten by `/compact`).
if let Some(handoff_block) = load_handoff_block(workspace) {
full_prompt = format!("{full_prompt}\n\n{handoff_block}");
}
@@ -1282,7 +1292,7 @@ mod tests {
}
#[test]
- fn session_goal_is_injected_above_handoff_tail() {
+ fn session_goal_is_injected_below_compact_template() {
let tmp = tempdir().expect("tempdir");
let prompt = match system_prompt_for_mode_with_context_skills_and_session(
AppMode::Agent,
@@ -1306,7 +1316,11 @@ mod tests {
let compact_pos = prompt.find("## Compaction Handoff").expect("compact block");
assert!(prompt.contains("Fix transcript corruption"));
- assert!(goal_pos < compact_pos);
+ // Session goal is volatile content — it lives below the
+ // volatile-content boundary (after the compact template) so
+ // per-session goal changes don't bust the prefix cache for
+ // static layers.
+ assert!(compact_pos < goal_pos);
assert!(!prompt.contains("src/lib.rs"));
}