refactor(engine): append turn metadata after user text
Place user text before volatile turn metadata in outgoing user-message content arrays so provider prefix caches can continue matching the stable user-input prefix across date, model-route, and working-set changes. Also adds wire-level coverage proving tail-positioned turn metadata serializes after user text while preserving turn-meta deduplication. Harvested from PR #2517 by @HUQIANTAO Co-authored-by: HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com>
This commit is contained in:
+5
-1
@@ -21,12 +21,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
page handles and do not serialize through the mutation path. The harvest also
|
||||
adds panic-safe state write-back and serializes cache-mutating unit tests so
|
||||
the global web cache remains stable under normal Cargo test parallelism.
|
||||
- Appended volatile `<turn_meta>` blocks after user text in outgoing user
|
||||
message content arrays so provider prefix caches can keep matching the stable
|
||||
user-input prefix across date, route, and working-set changes.
|
||||
|
||||
### Community
|
||||
|
||||
Thanks to **@cyq1017** for the restore-listing implementation (#2513) and
|
||||
**@wywsoor** for the broader macOS/iTerm rollback UX report (#2494), and
|
||||
**@HUQIANTAO** for the `web_run` lock-splitting work (#2502).
|
||||
**@HUQIANTAO** for the `web_run` lock-splitting work (#2502) and turn-metadata
|
||||
prefix-cache stability work (#2517).
|
||||
|
||||
## [0.8.53] - 2026-06-03
|
||||
|
||||
|
||||
@@ -3062,6 +3062,22 @@ mod stream_decoder_tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn user_message_with_tail_turn_meta(task: &str, turn_meta: &str) -> Message {
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Text {
|
||||
text: task.to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: turn_meta.to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
fn tool_message_content(messages: &[Value], index: usize) -> &str {
|
||||
messages
|
||||
.iter()
|
||||
@@ -3128,6 +3144,30 @@ mod stream_decoder_tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_builder_keeps_tail_turn_meta_after_user_text_for_wire() {
|
||||
let turn_meta = "<turn_meta>\nCurrent local date: 2026-05-09\n</turn_meta>";
|
||||
let messages = vec![
|
||||
user_message_with_tail_turn_meta("first task", turn_meta),
|
||||
Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "first answer".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
},
|
||||
user_message_with_tail_turn_meta("second task", turn_meta),
|
||||
];
|
||||
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
let first = user_message_content(&built, 0);
|
||||
let second = user_message_content(&built, 1);
|
||||
let expected_ref = "<turn_meta_unchanged />";
|
||||
|
||||
assert_eq!(first, format!("first task\n{turn_meta}"));
|
||||
assert_eq!(second, format!("second task\n{expected_ref}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_builder_keeps_changed_turn_meta_full_and_updates_recent_hash() {
|
||||
let first_meta = "<turn_meta>\nCurrent local date: 2026-05-09\n</turn_meta>";
|
||||
|
||||
@@ -1449,9 +1449,21 @@ In {new} mode: {policy}\n\n\
|
||||
reasoning_effort: Option<&str>,
|
||||
reasoning_effort_auto: bool,
|
||||
) -> Message {
|
||||
// Place the user text first and turn_meta last so that the leading
|
||||
// bytes of each user message stay stable across date / model-route /
|
||||
// working-set changes. DeepSeek's KV prefix cache matches byte
|
||||
// sequences from the start of each message; when turn_meta (which
|
||||
// contains the current date) sits at position 0 the entire user
|
||||
// message prefix is invalidated at every date boundary. Moving it
|
||||
// to the tail preserves the user-input prefix and limits cache
|
||||
// invalidation to the trailing metadata block.
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Text {
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
self.turn_metadata_block(
|
||||
routed_model,
|
||||
mode,
|
||||
@@ -1459,10 +1471,6 @@ In {new} mode: {policy}\n\n\
|
||||
reasoning_effort,
|
||||
reasoning_effort_auto,
|
||||
),
|
||||
ContentBlock::Text {
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2210,11 +2210,11 @@ fn working_set_reaches_model_as_turn_metadata() {
|
||||
engine.session.add_message(user_msg);
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let first_block = messages
|
||||
let last_block = messages
|
||||
.last()
|
||||
.and_then(|message| message.content.first())
|
||||
.and_then(|message| message.content.last())
|
||||
.expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
assert!(text.starts_with("<turn_meta>\n"));
|
||||
@@ -2235,11 +2235,11 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
|
||||
engine.session.add_message(user_msg);
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let first_block = messages
|
||||
let last_block = messages
|
||||
.last()
|
||||
.and_then(|message| message.content.first())
|
||||
.and_then(|message| message.content.last())
|
||||
.expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
|
||||
@@ -2266,8 +2266,8 @@ fn turn_metadata_includes_auto_model_route() {
|
||||
Some("max"),
|
||||
true,
|
||||
);
|
||||
let first_block = user_msg.content.first().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
let last_block = user_msg.content.last().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
|
||||
@@ -2294,8 +2294,11 @@ fn turn_metadata_includes_current_mode() {
|
||||
None,
|
||||
false,
|
||||
);
|
||||
let first_block = user_msg.content.first().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
// turn_meta was relocated to the tail of the user message in #2517
|
||||
// to keep the leading bytes (user input) stable across date / model
|
||||
// route / working-set changes.
|
||||
let last_block = user_msg.content.last().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
|
||||
@@ -2314,10 +2317,11 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
|
||||
};
|
||||
let (mut engine, _handle) = Engine::new(config, &Config::default());
|
||||
|
||||
// In agent mode by default
|
||||
// In agent mode by default. The turn_meta block now sits at the
|
||||
// *tail* of the user message (see #2517) so we read `content.last()`.
|
||||
let msg = engine.user_text_message_with_turn_metadata("hello".to_string());
|
||||
let first_block = msg.content.first().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
let last_block = msg.content.last().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
assert!(
|
||||
@@ -2328,8 +2332,8 @@ fn turn_metadata_mode_updates_with_change_mode_op() {
|
||||
// Switch to YOLO — user_text_message_with_turn_metadata should reflect the new mode
|
||||
engine.current_mode = AppMode::Yolo;
|
||||
let msg = engine.user_text_message_with_turn_metadata("hello again".to_string());
|
||||
let first_block = msg.content.first().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = first_block else {
|
||||
let last_block = msg.content.last().expect("turn metadata block");
|
||||
let ContentBlock::Text { text, .. } = last_block else {
|
||||
panic!("expected text metadata block");
|
||||
};
|
||||
assert!(
|
||||
@@ -2377,10 +2381,10 @@ fn user_text_message_keeps_current_turn_input_after_turn_metadata() {
|
||||
let user_msg =
|
||||
engine.user_text_message_with_turn_metadata("explain the cache metrics".to_string());
|
||||
|
||||
let last_text = user_msg
|
||||
// User text is now at position 0, turn_meta at position 1.
|
||||
let first_text = user_msg
|
||||
.content
|
||||
.iter()
|
||||
.rev()
|
||||
.find_map(|block| {
|
||||
if let ContentBlock::Text { text, .. } = block {
|
||||
Some(text.as_str())
|
||||
@@ -2389,7 +2393,7 @@ fn user_text_message_keeps_current_turn_input_after_turn_metadata() {
|
||||
}
|
||||
})
|
||||
.expect("user text block");
|
||||
assert_eq!(last_text, "explain the cache metrics");
|
||||
assert_eq!(first_text, "explain the cache metrics");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2488,15 +2492,59 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
Some(ContentBlock::ToolResult { .. })
|
||||
));
|
||||
|
||||
// The earlier real user message already carries the turn_meta prefix.
|
||||
// The earlier real user message carries user text first, turn_meta last.
|
||||
let real_user = messages.first().expect("first user message");
|
||||
assert_eq!(real_user.role, "user");
|
||||
let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
|
||||
else {
|
||||
panic!("expected Text block on real user message");
|
||||
};
|
||||
assert!(text.starts_with("<turn_meta>\n"));
|
||||
assert!(text.contains("src/lib.rs"));
|
||||
assert_eq!(text, "inspect src/lib.rs");
|
||||
// turn_meta is at the tail of the content array.
|
||||
let last_block = real_user.content.last().expect("turn_meta block");
|
||||
let ContentBlock::Text { text: meta, .. } = last_block else {
|
||||
panic!("expected Text block for turn_meta at tail");
|
||||
};
|
||||
assert!(meta.starts_with("<turn_meta>\n"));
|
||||
}
|
||||
|
||||
/// User text must appear before turn_meta in the content array so that
|
||||
/// the leading bytes of each user message stay stable across date changes.
|
||||
/// DeepSeek's KV prefix cache matches byte sequences from the start of
|
||||
/// each message; placing the volatile date-bearing turn_meta at position
|
||||
/// 0 would invalidate the entire user message prefix at every date
|
||||
/// boundary. Moving it to the tail preserves the user-input prefix.
|
||||
#[test]
|
||||
fn user_message_turn_meta_is_appended_not_prepended() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let config = EngineConfig {
|
||||
workspace: tmp.path().to_path_buf(),
|
||||
..Default::default()
|
||||
};
|
||||
let (engine, _handle) = Engine::new(config, &Config::default());
|
||||
|
||||
let msg = engine.user_text_message_with_turn_metadata("hello world".to_string());
|
||||
assert_eq!(msg.role, "user");
|
||||
assert_eq!(msg.content.len(), 2);
|
||||
|
||||
// First content block: user text.
|
||||
let ContentBlock::Text { text, .. } = &msg.content[0] else {
|
||||
panic!("expected Text block at position 0");
|
||||
};
|
||||
assert_eq!(text, "hello world");
|
||||
|
||||
// Second content block: turn_meta.
|
||||
let ContentBlock::Text { text: meta, .. } = &msg.content[1] else {
|
||||
panic!("expected Text block for turn_meta at position 1");
|
||||
};
|
||||
assert!(
|
||||
meta.starts_with("<turn_meta>\n"),
|
||||
"turn_meta must be at the tail"
|
||||
);
|
||||
assert!(
|
||||
meta.contains("Current local date:"),
|
||||
"turn_meta must contain the date"
|
||||
);
|
||||
}
|
||||
|
||||
/// When the turn is mid-execution and the trailing user message is a
|
||||
@@ -3747,9 +3795,10 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
|
||||
|
||||
let last = engine.session.messages.last().expect("message appended");
|
||||
assert_eq!(last.role, "user");
|
||||
let meta = match &last.content[0] {
|
||||
crate::models::ContentBlock::Text { text, .. } => text.clone(),
|
||||
other => panic!("expected text block, got {other:?}"),
|
||||
// turn_meta is now at the tail of the content array (PR #2517).
|
||||
let meta = match last.content.last() {
|
||||
Some(crate::models::ContentBlock::Text { text, .. }) => text.clone(),
|
||||
other => panic!("expected text block at tail, got {other:?}"),
|
||||
};
|
||||
assert!(meta.starts_with("<turn_meta>\n"));
|
||||
let diagnostic_text = last
|
||||
|
||||
@@ -19,13 +19,15 @@ PR is harvested, superseded, deferred, or closed.
|
||||
1. Stabilization and PR harvest: finish #2721 and #2722 before new feature work.
|
||||
2. Provider/model/auth correctness: land narrow correctness fixes that match the
|
||||
current provider architecture.
|
||||
3. File decomposition Phase 1: split safe, test-covered config/provider and TUI
|
||||
3. HarmonyOS/MatePad Edge intake: keep #2634 active, scoped, and credited while
|
||||
the OHOS/Nix dependency clearance work finishes upstream.
|
||||
4. File decomposition Phase 1: split safe, test-covered config/provider and TUI
|
||||
view surfaces before adding larger workflow UX.
|
||||
4. WhaleFlow MVP: typed IR, executor skeleton, replay, and pod monitor before
|
||||
5. WhaleFlow MVP: typed IR, executor skeleton, replay, and pod monitor before
|
||||
teacher/student promotion loops.
|
||||
5. Model Lab and HarnessProfile MVP: Hugging Face polish and provider/model
|
||||
6. Model Lab and HarnessProfile MVP: Hugging Face polish and provider/model
|
||||
posture before automatic harness creation.
|
||||
6. Release readiness: keep #2729 current and do not tag or publish without
|
||||
7. Release readiness: keep #2729 current and do not tag or publish without
|
||||
maintainer approval.
|
||||
|
||||
## Current Branch Harvest
|
||||
@@ -40,13 +42,14 @@ harvest/stewardship commits:
|
||||
| #2708 Windows sub-agent completion halves TUI render width | Cherry-picked as `e933a11d7`; follow-up fix `72653f8ef` invalidates reused fanout-card rows. | `cargo test -p codewhale-tui --locked subagent`; `cargo test -p codewhale-tui --locked terminal_size`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
||||
| #2627 Xiaomi MiMo Token Plan mode | Harvested only the auth-header behavior as `5aa68d986`; did not merge the conflicting mode/env changes. | `cargo test -p codewhale-tui --bin codewhale-tui --locked xiaomi_mimo`; `cargo test -p codewhale-secrets --locked xiaomi_mimo`; `cargo test -p codewhale-config --locked xiaomi_mimo`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
||||
| #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. |
|
||||
| #2634 HarmonyOS port | Defer direct merge; draft has broad platform and TLS/runtime blast radius. | Harvest at most the unused `rustyline` cleanup after local verification; full port needs OHOS target checks and sandbox/security review. |
|
||||
| #2634 HarmonyOS port | Active HarmonyOS/MatePad Edge lane; do not close. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. PR remains draft/blocked while the author waits on upstream Nix/dependency clearance and carries local patches; full port needs OHOS target checks plus sandbox, TLS, keyring, clipboard, browser-open, and self-update review before merge. |
|
||||
| #2687 append-only mode/approval prompt | Defer direct merge; draft has compile failures and Plan-mode prompt correctness risks. | Any future harvest must keep stable `message[0]` genuinely mode-agnostic, preserve mode/approval suffixes after capacity replans, and distinguish external overrides from persisted generated prompts. |
|
||||
| #2581 provider fallback chain design doc | Manually harvested as `docs/rfcs/2574-provider-fallback-chain.md` because the current PR head has no net file changes. | Keep issue #2574 open for implementation; close/comment on #2581 after the integration branch is public, crediting @idling11 and reporter @hsdbeebou. |
|
||||
| #2530 mention depth-cap hint | Already present in the current v0.9 stack as `a97675824` and `29f57665e`. | `cargo test -p codewhale-tui --locked try_autocomplete_file_mention_no_match` passed. |
|
||||
| #2513 restore snapshot listing | Manually harvested as `bb39cf169` with explicit `/restore list 101` cap rejection. | `cargo test -p codewhale-tui --locked restore_`; `cargo fmt --all -- --check`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. Keep #2494 open because this is only the restore-listing slice. |
|
||||
| #2576 PrefixCacheChange first-freeze event | Already present in the current v0.9 stack through `29acb87a9d`. | `cargo test -p codewhale-tui --locked prefix_cache` passed. Do not close until this integration branch is public or merged. |
|
||||
| #2502 web_run RwLock split | Manually harvested with panic-safe state write-back, `Arc<WebPage>` cache reads, and serialized cache tests. | `cargo test -p codewhale-tui --locked web_run`; `cargo clippy -p codewhale-tui --locked -- -D warnings`; `cargo fmt --all -- --check` passed. |
|
||||
| #2517 turn_meta tail relocation | Manually harvested with the user-text content block first and volatile turn metadata last. | `cargo test -p codewhale-tui --locked turn_metadata`; `cargo test -p codewhale-tui --locked user_message_turn_meta_is_appended_not_prepended`; `cargo test -p codewhale-tui --locked post_edit_hook_injects_diagnostics_message_before_next_request`; `cargo test -p codewhale-tui --locked request_builder_keeps_tail_turn_meta_after_user_text_for_wire`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
|
||||
|
||||
## PR Harvest Queue
|
||||
|
||||
@@ -75,14 +78,14 @@ harvest/stewardship commits:
|
||||
| #2506 provider path suffix overrides | Draft/conflicting | Partly superseded by current provider path-suffix support; verify. |
|
||||
| #2507 stream chunk timeout config | Draft/conflicting | Defer unless stabilization needs it. |
|
||||
| #2508 configurable path suffix | Conflicting | Likely superseded by #2506/current code; verify linked issue #2089. |
|
||||
| #2509 parallel read-only web search | Mergeable / already merged via #2504 | Already present in `origin/main` as `a09af2024`; safe to close as harvested/superseded. |
|
||||
| #2509 parallel read-only web search | Closed / already merged via #2504 | Already present in `origin/main` as `a09af2024`; closed as harvested/superseded on 2026-06-04. |
|
||||
| #2510 custom DuckDuckGo endpoint | Draft/mergeable | Low priority; defer unless docs/search lane takes it. |
|
||||
| #2511 ToolCallBefore hooks | Conflicting | Defer to hook lifecycle lane. |
|
||||
| #2512 custom completion sounds | Draft/conflicting | Defer. |
|
||||
| #2513 restore snapshot listing | Draft/mergeable | Manually harvested as `bb39cf169` with cap-rejection polish; close/comment after branch is public, leave #2494 open. |
|
||||
| #2517 turn_meta tail relocation | Mergeable | Already in high-priority harvest list; review prompt/cache implications. |
|
||||
| #2520 prompt base disk cache | Mergeable | Review after #2687 prompt architecture decision. |
|
||||
| #2522 hard compaction preserving system segment | Mergeable | Review after #2687 prompt architecture decision. |
|
||||
| #2517 turn_meta tail relocation | Mergeable | Manually harvested on the v0.9 branch; close/comment after branch is public. |
|
||||
| #2520 prompt base disk cache | Mergeable | Defer. Review found unused prompt-cache infrastructure with no runtime wiring, cache keys that still require building the prompt first, real-home cache writes in tests, and a contract that depends on the deferred #2687 prompt split. |
|
||||
| #2522 hard compaction preserving system segment | Mergeable | Defer. Review found a dormant hard path that would duplicate/cache summaries into the mutable system prompt if wired through current engine flow, and a simple tail split that can break tool-call pair and pinning invariants. |
|
||||
| #2526 shell tool availability docs | Draft/conflicting | Likely superseded by tool-surface docs; verify before closing. |
|
||||
| #2528 background completion wait | Draft/conflicting | Defer unless failing tests prove need. |
|
||||
| #2529 workspace shell opt-in | Draft/conflicting | Review with permissions/sandbox stabilization. |
|
||||
@@ -96,7 +99,7 @@ harvest/stewardship commits:
|
||||
| #2631 estimated_input_tokens cache | Mergeable | Already harvested into the 22-commit stack. |
|
||||
| #2632 tool-catalog JSON cache | Mergeable | Already harvested into the 22-commit stack. |
|
||||
| #2633 capacity reverse scans | Mergeable | Already harvested into the 22-commit stack. |
|
||||
| #2634 HarmonyOS port | Draft/mergeable | Defer broad port. Review found global TLS/provider-install risk, OHOS clipboard/test cfg issues, and major sandbox/process-security degradations. |
|
||||
| #2634 HarmonyOS port | Draft/blocked | Keep as active HarmonyOS/MatePad Edge lane. Do not merge wholesale until upstream Nix/dependency clearance, OHOS target checks, and sandbox/TLS/keyring/clipboard/browser/self-update review are complete. |
|
||||
| #2635 output rows cache | Mergeable | Already harvested into the 22-commit stack. |
|
||||
| #2636 project-context cache | Conflicting | Defer/harvest only after cache correctness fixes. |
|
||||
| #2639 POST /v1/sessions endpoint | Mergeable | Defer; app-server contract needs focused review. |
|
||||
@@ -125,9 +128,7 @@ Issue count should drop through evidence-backed consolidation, not bulk closing.
|
||||
|
||||
## Immediate Next Actions
|
||||
|
||||
1. Review #2517, #2520, and #2522 for prompt/cache implications after #2687
|
||||
was deferred.
|
||||
2. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627,
|
||||
1. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627,
|
||||
#2634, #2636, #2687, and already-harvested performance PRs.
|
||||
3. Start file decomposition Phase 1 only after the PR harvest table has no
|
||||
2. Start file decomposition Phase 1 only after the PR harvest table has no
|
||||
unknown high-priority provider/prompt/cache branches.
|
||||
|
||||
Reference in New Issue
Block a user