fix(client): keep mutation-tool result confirmations off wire dedup (#1695)

Co-Authored-By: LeoLin990405 <126708262+LeoLin990405@users.noreply.github.com>
This commit is contained in:
Zhongyue Lin
2026-05-21 16:21:42 +08:00
committed by GitHub
parent faf5e07adb
commit a5501e608a
2 changed files with 181 additions and 8 deletions
+174 -8
View File
@@ -922,6 +922,18 @@ fn turn_meta_budget_json(turn_meta: &TurnMetaBudget) -> Value {
})
}
/// Mutating/write tools whose result body is a *confirmation* (it embeds
/// the unified diff + summary of what was just written), not retrievable
/// reference data. Two identical large `write_file` calls must each keep
/// their full confirmation inline: collapsing the later one to a
/// `<TOOL_RESULT_REF sha="..." />` makes the model lose the write-success
/// context and behave as if the file is missing (issue #1695). Read-style
/// tools (`read_file`, `grep_files`, `exec_shell`, …) are unaffected and
/// still dedup normally.
fn is_mutation_tool(tool_name: &str) -> bool {
matches!(tool_name, "write_file" | "edit_file" | "apply_patch")
}
fn compact_tool_result_for_wire(
tool_name: &str,
input: &Value,
@@ -932,10 +944,26 @@ fn compact_tool_result_for_wire(
let original_chars = content.chars().count();
let sha = sha256_hex(content.as_bytes());
// Two independent size-and-kind predicates, deliberately decoupled:
//
// * `persist_eligible` — size only. Any large result (including a
// mutation tool's big diff) is written to the SHA-addressed store
// so that, if it gets truncated below, the elided middle stays
// retrievable via `retrieve_tool_result`. Mutation tools must NOT
// be excluded here: a >12k-char `write_file` diff that we truncate
// without persisting would leave the model unable to recover it.
// * `dedup_eligible` — size AND non-mutation. Only this predicate
// gates collapsing a later identical result to a
// `<TOOL_RESULT_REF>`. Mutation-tool results are write
// *confirmations*, never dedup-eligible (#1695): two identical
// large `write_file` calls must each keep their full confirmation
// inline.
//
// Below the threshold, repeating the content is safer than asking
// the model to chase a reference. Above it, persist a SHA-addressed
// copy before any later message can point at that SHA.
let dedup_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
// the model to chase a reference, and there's no retrieval burden to
// satisfy, so both predicates are false.
let persist_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
let dedup_eligible = persist_eligible && !is_mutation_tool(tool_name);
if dedup_eligible && let Some(previous) = seen_tool_results.get(&sha) {
// Re-check persistence before emitting a ref. If the file is
@@ -966,11 +994,17 @@ fn compact_tool_result_for_wire(
};
}
if dedup_eligible {
// Persist before registering the content as dedupable. If the
// write fails, later occurrences stay inline instead of pointing
// at a file that was never created.
if persist_tool_result_for_sha(&sha, content) {
if persist_eligible {
// Persist any large result so a later truncation below stays
// retrievable by SHA — this includes mutation tools, whose big
// diffs are NOT dedup-eligible but still must be recoverable
// when elided. Only register the SHA as dedup-able (eligible to
// be replaced by a back-reference later) when `dedup_eligible`:
// if the write fails, skip registration so later occurrences
// stay inline instead of pointing at a file that was never
// created.
let persisted = persist_tool_result_for_sha(&sha, content);
if persisted && dedup_eligible {
seen_tool_results.insert(
sha.clone(),
SeenToolResult {
@@ -2768,6 +2802,138 @@ mod stream_decoder_tests {
);
}
#[test]
fn request_builder_never_dedups_large_identical_write_file_confirmations() {
// A `write_file` result embeds the unified diff + summary; it is a
// confirmation, not retrievable data. Two identical >1024-char
// write_file results must BOTH stay inline — collapsing the second
// to a SHA ref makes the model lose write-success context and
// report the file as missing (#1695).
let output = "A".repeat(2_000);
let messages = vec![
tool_use_message("tool-1", "write_file", json!({"path": "big.txt"})),
tool_result_message("tool-1", &output),
tool_use_message("tool-2", "write_file", json!({"path": "big.txt"})),
tool_result_message("tool-2", &output),
];
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
let first = tool_message_content(&built, 0);
let second = tool_message_content(&built, 1);
assert_eq!(first, output);
assert_eq!(second, output);
assert!(!second.contains("<TOOL_RESULT_REF"), "got: {second}");
// Non-mutation tools still dedup: an identical large read_file
// result collapses to a retrievable SHA ref.
let read_messages = vec![
tool_use_message("read-1", "read_file", json!({"path": "README.md"})),
tool_result_message("read-1", &output),
tool_use_message("read-2", "read_file", json!({"path": "README.md"})),
tool_result_message("read-2", &output),
];
let read_built = build_chat_messages(None, &read_messages, "deepseek-v4-flash");
let read_first = tool_message_content(&read_built, 0);
let read_second = tool_message_content(&read_built, 1);
assert_eq!(read_first, output);
assert!(
read_second.starts_with("<TOOL_RESULT_REF sha=\""),
"got: {read_second}"
);
}
#[test]
fn large_write_file_result_stays_inline_but_is_persisted_for_retrieval() {
// Decoupling regression (#1695 follow-up): a SINGLE very large
// `write_file` result must (a) never collapse to a
// `<TOOL_RESULT_REF>` (mutation confirmations stay inline) yet
// (b) still be persisted to the SHA store so the content elided
// by truncation remains retrievable via `retrieve_tool_result`.
// Before the fix, folding `!is_mutation_tool` into the single
// `dedup_eligible` gate also disabled persistence, so a >12k
// mutation diff was truncated AND unrecoverable.
let _guard = crate::tools::truncate::TEST_SPILLOVER_GUARD
.lock()
.unwrap_or_else(|err| err.into_inner());
let tmp = tempfile::tempdir().expect("tempdir");
let prior = crate::tools::truncate::set_test_spillover_root(Some(
tmp.path().join(".deepseek").join("tool_outputs"),
));
struct Restore(Option<std::path::PathBuf>);
impl Drop for Restore {
fn drop(&mut self) {
crate::tools::truncate::set_test_spillover_root(self.0.take());
}
}
let _restore = Restore(prior);
// > TOOL_RESULT_SENT_CHAR_BUDGET (12_000) so the wire path
// truncates and would need a SHA to recover the middle.
let big_diff = "D".repeat(20_000);
let sha = sha256_hex(big_diff.as_bytes());
let messages = vec![
tool_use_message("w-1", "write_file", json!({"path": "huge.rs"})),
tool_result_message("w-1", &big_diff),
tool_use_message("w-2", "write_file", json!({"path": "huge.rs"})),
tool_result_message("w-2", &big_diff),
];
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
let first = tool_message_content(&built, 0);
let second = tool_message_content(&built, 1);
// (a) Both confirmations stay inline — truncated, never a ref.
assert!(
first.contains("[TOOL_RESULT_TRUNCATED]"),
"first should be truncated, got: {first}"
);
assert!(
!first.contains("<TOOL_RESULT_REF"),
"first must not be a dedup ref, got: {first}"
);
assert!(
!second.contains("<TOOL_RESULT_REF"),
"second identical write_file must stay inline (#1695), got: {second}"
);
assert!(
second.contains("[TOOL_RESULT_TRUNCATED]"),
"second should also be inline-truncated, got: {second}"
);
assert!(
first.contains(&format!("sha256: {sha}")),
"truncation block should advertise the recovery SHA, got: {first}"
);
// (b) The full content was persisted to the SHA store and is
// retrievable — the seam `persist_tool_result_for_sha` writes
// to and `retrieve_tool_result ref=sha:` reads back from.
let path = crate::tools::truncate::sha_spillover_path(&sha)
.expect("sha spillover path resolvable under test root");
assert!(path.exists(), "large write_file output not persisted: {path:?}");
let persisted = std::fs::read_to_string(&path).expect("read persisted spillover");
assert_eq!(
persisted, big_diff,
"persisted content must match the original write_file result verbatim"
);
// Sanity: a large NON-mutation result still dedups (back-ref on
// the second sighting) — decoupling didn't regress #1695's
// preserved read-path behavior.
let read_messages = vec![
tool_use_message("r-1", "read_file", json!({"path": "huge.rs"})),
tool_result_message("r-1", &big_diff),
tool_use_message("r-2", "read_file", json!({"path": "huge.rs"})),
tool_result_message("r-2", &big_diff),
];
let read_built = build_chat_messages(None, &read_messages, "deepseek-v4-flash");
let read_second = tool_message_content(&read_built, 1);
assert!(
read_second.starts_with("<TOOL_RESULT_REF sha=\""),
"large read_file must still dedup to a ref, got: {read_second}"
);
}
#[test]
fn tool_result_budget_is_wire_only_and_does_not_mutate_session_message() {
let long_output = format!("{}{}", "A".repeat(7_000), "Z".repeat(7_000));
+7
View File
@@ -641,6 +641,13 @@ mod tests {
#[test]
fn cache_inspect_displays_tool_result_budget_metadata() {
// Wire dedup persists to the process-global SHA spillover root.
// Serialize through the same guard other tests use to override
// that root, so a parallel test pointing it at a temp dir can't
// make this test's second-sighting dedup silently fail.
let _spill_guard = crate::tools::truncate::TEST_SPILLOVER_GUARD
.lock()
.unwrap_or_else(|err| err.into_inner());
let mut app = create_test_app();
let long_output = format!("{}{}", "A".repeat(7_000), "Z".repeat(7_000));
app.api_messages.push(Message {