diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ffec202..c250464e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,12 +21,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 page handles and do not serialize through the mutation path. The harvest also adds panic-safe state write-back and serializes cache-mutating unit tests so the global web cache remains stable under normal Cargo test parallelism. +- Appended volatile `` blocks after user text in outgoing user + message content arrays so provider prefix caches can keep matching the stable + user-input prefix across date, route, and working-set changes. ### Community Thanks to **@cyq1017** for the restore-listing implementation (#2513) and **@wywsoor** for the broader macOS/iTerm rollback UX report (#2494), and -**@HUQIANTAO** for the `web_run` lock-splitting work (#2502). +**@HUQIANTAO** for the `web_run` lock-splitting work (#2502) and turn-metadata +prefix-cache stability work (#2517). ## [0.8.53] - 2026-06-03 diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 3aa217b0..a3ecf4b4 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -3062,6 +3062,22 @@ mod stream_decoder_tests { } } + fn user_message_with_tail_turn_meta(task: &str, turn_meta: &str) -> Message { + Message { + role: "user".to_string(), + content: vec![ + ContentBlock::Text { + text: task.to_string(), + cache_control: None, + }, + ContentBlock::Text { + text: turn_meta.to_string(), + cache_control: None, + }, + ], + } + } + fn tool_message_content(messages: &[Value], index: usize) -> &str { messages .iter() @@ -3128,6 +3144,30 @@ mod stream_decoder_tests { ); } + #[test] + fn request_builder_keeps_tail_turn_meta_after_user_text_for_wire() { + let turn_meta = "\nCurrent local date: 2026-05-09\n"; + let messages = vec![ + user_message_with_tail_turn_meta("first task", turn_meta), + Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: "first answer".to_string(), + cache_control: None, + }], + }, + user_message_with_tail_turn_meta("second task", turn_meta), + ]; + + let built = build_chat_messages(None, &messages, "deepseek-v4-flash"); + let first = user_message_content(&built, 0); + let second = user_message_content(&built, 1); + let expected_ref = ""; + + assert_eq!(first, format!("first task\n{turn_meta}")); + assert_eq!(second, format!("second task\n{expected_ref}")); + } + #[test] fn request_builder_keeps_changed_turn_meta_full_and_updates_recent_hash() { let first_meta = "\nCurrent local date: 2026-05-09\n"; diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 83cd6e93..b3477950 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1449,9 +1449,21 @@ In {new} mode: {policy}\n\n\ reasoning_effort: Option<&str>, reasoning_effort_auto: bool, ) -> Message { + // Place the user text first and turn_meta last so that the leading + // bytes of each user message stay stable across date / model-route / + // working-set changes. DeepSeek's KV prefix cache matches byte + // sequences from the start of each message; when turn_meta (which + // contains the current date) sits at position 0 the entire user + // message prefix is invalidated at every date boundary. Moving it + // to the tail preserves the user-input prefix and limits cache + // invalidation to the trailing metadata block. Message { role: "user".to_string(), content: vec![ + ContentBlock::Text { + text, + cache_control: None, + }, self.turn_metadata_block( routed_model, mode, @@ -1459,10 +1471,6 @@ In {new} mode: {policy}\n\n\ reasoning_effort, reasoning_effort_auto, ), - ContentBlock::Text { - text, - cache_control: None, - }, ], } } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index bed3276a..9e48a376 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -2210,11 +2210,11 @@ fn working_set_reaches_model_as_turn_metadata() { engine.session.add_message(user_msg); let messages = engine.messages_with_turn_metadata(); - let first_block = messages + let last_block = messages .last() - .and_then(|message| message.content.first()) + .and_then(|message| message.content.last()) .expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; assert!(text.starts_with("\n")); @@ -2235,11 +2235,11 @@ fn turn_metadata_includes_current_local_date_without_working_set() { engine.session.add_message(user_msg); let messages = engine.messages_with_turn_metadata(); - let first_block = messages + let last_block = messages .last() - .and_then(|message| message.content.first()) + .and_then(|message| message.content.last()) .expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; @@ -2266,8 +2266,8 @@ fn turn_metadata_includes_auto_model_route() { Some("max"), true, ); - let first_block = user_msg.content.first().expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + let last_block = user_msg.content.last().expect("turn metadata block"); + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; @@ -2294,8 +2294,11 @@ fn turn_metadata_includes_current_mode() { None, false, ); - let first_block = user_msg.content.first().expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + // turn_meta was relocated to the tail of the user message in #2517 + // to keep the leading bytes (user input) stable across date / model + // route / working-set changes. + let last_block = user_msg.content.last().expect("turn metadata block"); + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; @@ -2314,10 +2317,11 @@ fn turn_metadata_mode_updates_with_change_mode_op() { }; let (mut engine, _handle) = Engine::new(config, &Config::default()); - // In agent mode by default + // In agent mode by default. The turn_meta block now sits at the + // *tail* of the user message (see #2517) so we read `content.last()`. let msg = engine.user_text_message_with_turn_metadata("hello".to_string()); - let first_block = msg.content.first().expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + let last_block = msg.content.last().expect("turn metadata block"); + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; assert!( @@ -2328,8 +2332,8 @@ fn turn_metadata_mode_updates_with_change_mode_op() { // Switch to YOLO — user_text_message_with_turn_metadata should reflect the new mode engine.current_mode = AppMode::Yolo; let msg = engine.user_text_message_with_turn_metadata("hello again".to_string()); - let first_block = msg.content.first().expect("turn metadata block"); - let ContentBlock::Text { text, .. } = first_block else { + let last_block = msg.content.last().expect("turn metadata block"); + let ContentBlock::Text { text, .. } = last_block else { panic!("expected text metadata block"); }; assert!( @@ -2377,10 +2381,10 @@ fn user_text_message_keeps_current_turn_input_after_turn_metadata() { let user_msg = engine.user_text_message_with_turn_metadata("explain the cache metrics".to_string()); - let last_text = user_msg + // User text is now at position 0, turn_meta at position 1. + let first_text = user_msg .content .iter() - .rev() .find_map(|block| { if let ContentBlock::Text { text, .. } = block { Some(text.as_str()) @@ -2389,7 +2393,7 @@ fn user_text_message_keeps_current_turn_input_after_turn_metadata() { } }) .expect("user text block"); - assert_eq!(last_text, "explain the cache metrics"); + assert_eq!(first_text, "explain the cache metrics"); } #[test] @@ -2488,15 +2492,59 @@ fn turn_metadata_skips_tool_result_messages() { Some(ContentBlock::ToolResult { .. }) )); - // The earlier real user message already carries the turn_meta prefix. + // The earlier real user message carries user text first, turn_meta last. let real_user = messages.first().expect("first user message"); assert_eq!(real_user.role, "user"); let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content") else { panic!("expected Text block on real user message"); }; - assert!(text.starts_with("\n")); - assert!(text.contains("src/lib.rs")); + assert_eq!(text, "inspect src/lib.rs"); + // turn_meta is at the tail of the content array. + let last_block = real_user.content.last().expect("turn_meta block"); + let ContentBlock::Text { text: meta, .. } = last_block else { + panic!("expected Text block for turn_meta at tail"); + }; + assert!(meta.starts_with("\n")); +} + +/// User text must appear before turn_meta in the content array so that +/// the leading bytes of each user message stay stable across date changes. +/// DeepSeek's KV prefix cache matches byte sequences from the start of +/// each message; placing the volatile date-bearing turn_meta at position +/// 0 would invalidate the entire user message prefix at every date +/// boundary. Moving it to the tail preserves the user-input prefix. +#[test] +fn user_message_turn_meta_is_appended_not_prepended() { + let tmp = tempdir().expect("tempdir"); + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + ..Default::default() + }; + let (engine, _handle) = Engine::new(config, &Config::default()); + + let msg = engine.user_text_message_with_turn_metadata("hello world".to_string()); + assert_eq!(msg.role, "user"); + assert_eq!(msg.content.len(), 2); + + // First content block: user text. + let ContentBlock::Text { text, .. } = &msg.content[0] else { + panic!("expected Text block at position 0"); + }; + assert_eq!(text, "hello world"); + + // Second content block: turn_meta. + let ContentBlock::Text { text: meta, .. } = &msg.content[1] else { + panic!("expected Text block for turn_meta at position 1"); + }; + assert!( + meta.starts_with("\n"), + "turn_meta must be at the tail" + ); + assert!( + meta.contains("Current local date:"), + "turn_meta must contain the date" + ); } /// When the turn is mid-execution and the trailing user message is a @@ -3747,9 +3795,10 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() { let last = engine.session.messages.last().expect("message appended"); assert_eq!(last.role, "user"); - let meta = match &last.content[0] { - crate::models::ContentBlock::Text { text, .. } => text.clone(), - other => panic!("expected text block, got {other:?}"), + // turn_meta is now at the tail of the content array (PR #2517). + let meta = match last.content.last() { + Some(crate::models::ContentBlock::Text { text, .. }) => text.clone(), + other => panic!("expected text block at tail, got {other:?}"), }; assert!(meta.starts_with("\n")); let diagnostic_text = last diff --git a/docs/V0_9_0_EXECUTION_MAP.md b/docs/V0_9_0_EXECUTION_MAP.md index 5611bb1d..5d0bec7a 100644 --- a/docs/V0_9_0_EXECUTION_MAP.md +++ b/docs/V0_9_0_EXECUTION_MAP.md @@ -19,13 +19,15 @@ PR is harvested, superseded, deferred, or closed. 1. Stabilization and PR harvest: finish #2721 and #2722 before new feature work. 2. Provider/model/auth correctness: land narrow correctness fixes that match the current provider architecture. -3. File decomposition Phase 1: split safe, test-covered config/provider and TUI +3. HarmonyOS/MatePad Edge intake: keep #2634 active, scoped, and credited while + the OHOS/Nix dependency clearance work finishes upstream. +4. File decomposition Phase 1: split safe, test-covered config/provider and TUI view surfaces before adding larger workflow UX. -4. WhaleFlow MVP: typed IR, executor skeleton, replay, and pod monitor before +5. WhaleFlow MVP: typed IR, executor skeleton, replay, and pod monitor before teacher/student promotion loops. -5. Model Lab and HarnessProfile MVP: Hugging Face polish and provider/model +6. Model Lab and HarnessProfile MVP: Hugging Face polish and provider/model posture before automatic harness creation. -6. Release readiness: keep #2729 current and do not tag or publish without +7. Release readiness: keep #2729 current and do not tag or publish without maintainer approval. ## Current Branch Harvest @@ -40,13 +42,14 @@ harvest/stewardship commits: | #2708 Windows sub-agent completion halves TUI render width | Cherry-picked as `e933a11d7`; follow-up fix `72653f8ef` invalidates reused fanout-card rows. | `cargo test -p codewhale-tui --locked subagent`; `cargo test -p codewhale-tui --locked terminal_size`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. | | #2627 Xiaomi MiMo Token Plan mode | Harvested only the auth-header behavior as `5aa68d986`; did not merge the conflicting mode/env changes. | `cargo test -p codewhale-tui --bin codewhale-tui --locked xiaomi_mimo`; `cargo test -p codewhale-secrets --locked xiaomi_mimo`; `cargo test -p codewhale-config --locked xiaomi_mimo`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. | | #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. | -| #2634 HarmonyOS port | Defer direct merge; draft has broad platform and TLS/runtime blast radius. | Harvest at most the unused `rustyline` cleanup after local verification; full port needs OHOS target checks and sandbox/security review. | +| #2634 HarmonyOS port | Active HarmonyOS/MatePad Edge lane; do not close. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. PR remains draft/blocked while the author waits on upstream Nix/dependency clearance and carries local patches; full port needs OHOS target checks plus sandbox, TLS, keyring, clipboard, browser-open, and self-update review before merge. | | #2687 append-only mode/approval prompt | Defer direct merge; draft has compile failures and Plan-mode prompt correctness risks. | Any future harvest must keep stable `message[0]` genuinely mode-agnostic, preserve mode/approval suffixes after capacity replans, and distinguish external overrides from persisted generated prompts. | | #2581 provider fallback chain design doc | Manually harvested as `docs/rfcs/2574-provider-fallback-chain.md` because the current PR head has no net file changes. | Keep issue #2574 open for implementation; close/comment on #2581 after the integration branch is public, crediting @idling11 and reporter @hsdbeebou. | | #2530 mention depth-cap hint | Already present in the current v0.9 stack as `a97675824` and `29f57665e`. | `cargo test -p codewhale-tui --locked try_autocomplete_file_mention_no_match` passed. | | #2513 restore snapshot listing | Manually harvested as `bb39cf169` with explicit `/restore list 101` cap rejection. | `cargo test -p codewhale-tui --locked restore_`; `cargo fmt --all -- --check`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. Keep #2494 open because this is only the restore-listing slice. | | #2576 PrefixCacheChange first-freeze event | Already present in the current v0.9 stack through `29acb87a9d`. | `cargo test -p codewhale-tui --locked prefix_cache` passed. Do not close until this integration branch is public or merged. | | #2502 web_run RwLock split | Manually harvested with panic-safe state write-back, `Arc` cache reads, and serialized cache tests. | `cargo test -p codewhale-tui --locked web_run`; `cargo clippy -p codewhale-tui --locked -- -D warnings`; `cargo fmt --all -- --check` passed. | +| #2517 turn_meta tail relocation | Manually harvested with the user-text content block first and volatile turn metadata last. | `cargo test -p codewhale-tui --locked turn_metadata`; `cargo test -p codewhale-tui --locked user_message_turn_meta_is_appended_not_prepended`; `cargo test -p codewhale-tui --locked post_edit_hook_injects_diagnostics_message_before_next_request`; `cargo test -p codewhale-tui --locked request_builder_keeps_tail_turn_meta_after_user_text_for_wire`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. | ## PR Harvest Queue @@ -75,14 +78,14 @@ harvest/stewardship commits: | #2506 provider path suffix overrides | Draft/conflicting | Partly superseded by current provider path-suffix support; verify. | | #2507 stream chunk timeout config | Draft/conflicting | Defer unless stabilization needs it. | | #2508 configurable path suffix | Conflicting | Likely superseded by #2506/current code; verify linked issue #2089. | -| #2509 parallel read-only web search | Mergeable / already merged via #2504 | Already present in `origin/main` as `a09af2024`; safe to close as harvested/superseded. | +| #2509 parallel read-only web search | Closed / already merged via #2504 | Already present in `origin/main` as `a09af2024`; closed as harvested/superseded on 2026-06-04. | | #2510 custom DuckDuckGo endpoint | Draft/mergeable | Low priority; defer unless docs/search lane takes it. | | #2511 ToolCallBefore hooks | Conflicting | Defer to hook lifecycle lane. | | #2512 custom completion sounds | Draft/conflicting | Defer. | | #2513 restore snapshot listing | Draft/mergeable | Manually harvested as `bb39cf169` with cap-rejection polish; close/comment after branch is public, leave #2494 open. | -| #2517 turn_meta tail relocation | Mergeable | Already in high-priority harvest list; review prompt/cache implications. | -| #2520 prompt base disk cache | Mergeable | Review after #2687 prompt architecture decision. | -| #2522 hard compaction preserving system segment | Mergeable | Review after #2687 prompt architecture decision. | +| #2517 turn_meta tail relocation | Mergeable | Manually harvested on the v0.9 branch; close/comment after branch is public. | +| #2520 prompt base disk cache | Mergeable | Defer. Review found unused prompt-cache infrastructure with no runtime wiring, cache keys that still require building the prompt first, real-home cache writes in tests, and a contract that depends on the deferred #2687 prompt split. | +| #2522 hard compaction preserving system segment | Mergeable | Defer. Review found a dormant hard path that would duplicate/cache summaries into the mutable system prompt if wired through current engine flow, and a simple tail split that can break tool-call pair and pinning invariants. | | #2526 shell tool availability docs | Draft/conflicting | Likely superseded by tool-surface docs; verify before closing. | | #2528 background completion wait | Draft/conflicting | Defer unless failing tests prove need. | | #2529 workspace shell opt-in | Draft/conflicting | Review with permissions/sandbox stabilization. | @@ -96,7 +99,7 @@ harvest/stewardship commits: | #2631 estimated_input_tokens cache | Mergeable | Already harvested into the 22-commit stack. | | #2632 tool-catalog JSON cache | Mergeable | Already harvested into the 22-commit stack. | | #2633 capacity reverse scans | Mergeable | Already harvested into the 22-commit stack. | -| #2634 HarmonyOS port | Draft/mergeable | Defer broad port. Review found global TLS/provider-install risk, OHOS clipboard/test cfg issues, and major sandbox/process-security degradations. | +| #2634 HarmonyOS port | Draft/blocked | Keep as active HarmonyOS/MatePad Edge lane. Do not merge wholesale until upstream Nix/dependency clearance, OHOS target checks, and sandbox/TLS/keyring/clipboard/browser/self-update review are complete. | | #2635 output rows cache | Mergeable | Already harvested into the 22-commit stack. | | #2636 project-context cache | Conflicting | Defer/harvest only after cache correctness fixes. | | #2639 POST /v1/sessions endpoint | Mergeable | Defer; app-server contract needs focused review. | @@ -125,9 +128,7 @@ Issue count should drop through evidence-backed consolidation, not bulk closing. ## Immediate Next Actions -1. Review #2517, #2520, and #2522 for prompt/cache implications after #2687 - was deferred. -2. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627, +1. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627, #2634, #2636, #2687, and already-harvested performance PRs. -3. Start file decomposition Phase 1 only after the PR harvest table has no +2. Start file decomposition Phase 1 only after the PR harvest table has no unknown high-priority provider/prompt/cache branches.