fix(client): keep mutation-tool result confirmations off wire dedup (#1695)
Co-Authored-By: LeoLin990405 <126708262+LeoLin990405@users.noreply.github.com>
This commit is contained in:
@@ -922,6 +922,18 @@ fn turn_meta_budget_json(turn_meta: &TurnMetaBudget) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
/// Mutating/write tools whose result body is a *confirmation* (it embeds
|
||||
/// the unified diff + summary of what was just written), not retrievable
|
||||
/// reference data. Two identical large `write_file` calls must each keep
|
||||
/// their full confirmation inline: collapsing the later one to a
|
||||
/// `<TOOL_RESULT_REF sha="..." />` makes the model lose the write-success
|
||||
/// context and behave as if the file is missing (issue #1695). Read-style
|
||||
/// tools (`read_file`, `grep_files`, `exec_shell`, …) are unaffected and
|
||||
/// still dedup normally.
|
||||
fn is_mutation_tool(tool_name: &str) -> bool {
|
||||
matches!(tool_name, "write_file" | "edit_file" | "apply_patch")
|
||||
}
|
||||
|
||||
fn compact_tool_result_for_wire(
|
||||
tool_name: &str,
|
||||
input: &Value,
|
||||
@@ -932,10 +944,26 @@ fn compact_tool_result_for_wire(
|
||||
let original_chars = content.chars().count();
|
||||
let sha = sha256_hex(content.as_bytes());
|
||||
|
||||
// Two independent size-and-kind predicates, deliberately decoupled:
|
||||
//
|
||||
// * `persist_eligible` — size only. Any large result (including a
|
||||
// mutation tool's big diff) is written to the SHA-addressed store
|
||||
// so that, if it gets truncated below, the elided middle stays
|
||||
// retrievable via `retrieve_tool_result`. Mutation tools must NOT
|
||||
// be excluded here: a >12k-char `write_file` diff that we truncate
|
||||
// without persisting would leave the model unable to recover it.
|
||||
// * `dedup_eligible` — size AND non-mutation. Only this predicate
|
||||
// gates collapsing a later identical result to a
|
||||
// `<TOOL_RESULT_REF>`. Mutation-tool results are write
|
||||
// *confirmations*, never dedup-eligible (#1695): two identical
|
||||
// large `write_file` calls must each keep their full confirmation
|
||||
// inline.
|
||||
//
|
||||
// Below the threshold, repeating the content is safer than asking
|
||||
// the model to chase a reference. Above it, persist a SHA-addressed
|
||||
// copy before any later message can point at that SHA.
|
||||
let dedup_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
|
||||
// the model to chase a reference, and there's no retrieval burden to
|
||||
// satisfy, so both predicates are false.
|
||||
let persist_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
|
||||
let dedup_eligible = persist_eligible && !is_mutation_tool(tool_name);
|
||||
|
||||
if dedup_eligible && let Some(previous) = seen_tool_results.get(&sha) {
|
||||
// Re-check persistence before emitting a ref. If the file is
|
||||
@@ -966,11 +994,17 @@ fn compact_tool_result_for_wire(
|
||||
};
|
||||
}
|
||||
|
||||
if dedup_eligible {
|
||||
// Persist before registering the content as dedupable. If the
|
||||
// write fails, later occurrences stay inline instead of pointing
|
||||
// at a file that was never created.
|
||||
if persist_tool_result_for_sha(&sha, content) {
|
||||
if persist_eligible {
|
||||
// Persist any large result so a later truncation below stays
|
||||
// retrievable by SHA — this includes mutation tools, whose big
|
||||
// diffs are NOT dedup-eligible but still must be recoverable
|
||||
// when elided. Only register the SHA as dedup-able (eligible to
|
||||
// be replaced by a back-reference later) when `dedup_eligible`:
|
||||
// if the write fails, skip registration so later occurrences
|
||||
// stay inline instead of pointing at a file that was never
|
||||
// created.
|
||||
let persisted = persist_tool_result_for_sha(&sha, content);
|
||||
if persisted && dedup_eligible {
|
||||
seen_tool_results.insert(
|
||||
sha.clone(),
|
||||
SeenToolResult {
|
||||
@@ -2768,6 +2802,138 @@ mod stream_decoder_tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_builder_never_dedups_large_identical_write_file_confirmations() {
|
||||
// A `write_file` result embeds the unified diff + summary; it is a
|
||||
// confirmation, not retrievable data. Two identical >1024-char
|
||||
// write_file results must BOTH stay inline — collapsing the second
|
||||
// to a SHA ref makes the model lose write-success context and
|
||||
// report the file as missing (#1695).
|
||||
let output = "A".repeat(2_000);
|
||||
let messages = vec![
|
||||
tool_use_message("tool-1", "write_file", json!({"path": "big.txt"})),
|
||||
tool_result_message("tool-1", &output),
|
||||
tool_use_message("tool-2", "write_file", json!({"path": "big.txt"})),
|
||||
tool_result_message("tool-2", &output),
|
||||
];
|
||||
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
let first = tool_message_content(&built, 0);
|
||||
let second = tool_message_content(&built, 1);
|
||||
|
||||
assert_eq!(first, output);
|
||||
assert_eq!(second, output);
|
||||
assert!(!second.contains("<TOOL_RESULT_REF"), "got: {second}");
|
||||
|
||||
// Non-mutation tools still dedup: an identical large read_file
|
||||
// result collapses to a retrievable SHA ref.
|
||||
let read_messages = vec![
|
||||
tool_use_message("read-1", "read_file", json!({"path": "README.md"})),
|
||||
tool_result_message("read-1", &output),
|
||||
tool_use_message("read-2", "read_file", json!({"path": "README.md"})),
|
||||
tool_result_message("read-2", &output),
|
||||
];
|
||||
let read_built = build_chat_messages(None, &read_messages, "deepseek-v4-flash");
|
||||
let read_first = tool_message_content(&read_built, 0);
|
||||
let read_second = tool_message_content(&read_built, 1);
|
||||
assert_eq!(read_first, output);
|
||||
assert!(
|
||||
read_second.starts_with("<TOOL_RESULT_REF sha=\""),
|
||||
"got: {read_second}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn large_write_file_result_stays_inline_but_is_persisted_for_retrieval() {
|
||||
// Decoupling regression (#1695 follow-up): a SINGLE very large
|
||||
// `write_file` result must (a) never collapse to a
|
||||
// `<TOOL_RESULT_REF>` (mutation confirmations stay inline) yet
|
||||
// (b) still be persisted to the SHA store so the content elided
|
||||
// by truncation remains retrievable via `retrieve_tool_result`.
|
||||
// Before the fix, folding `!is_mutation_tool` into the single
|
||||
// `dedup_eligible` gate also disabled persistence, so a >12k
|
||||
// mutation diff was truncated AND unrecoverable.
|
||||
let _guard = crate::tools::truncate::TEST_SPILLOVER_GUARD
|
||||
.lock()
|
||||
.unwrap_or_else(|err| err.into_inner());
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let prior = crate::tools::truncate::set_test_spillover_root(Some(
|
||||
tmp.path().join(".deepseek").join("tool_outputs"),
|
||||
));
|
||||
struct Restore(Option<std::path::PathBuf>);
|
||||
impl Drop for Restore {
|
||||
fn drop(&mut self) {
|
||||
crate::tools::truncate::set_test_spillover_root(self.0.take());
|
||||
}
|
||||
}
|
||||
let _restore = Restore(prior);
|
||||
|
||||
// > TOOL_RESULT_SENT_CHAR_BUDGET (12_000) so the wire path
|
||||
// truncates and would need a SHA to recover the middle.
|
||||
let big_diff = "D".repeat(20_000);
|
||||
let sha = sha256_hex(big_diff.as_bytes());
|
||||
|
||||
let messages = vec![
|
||||
tool_use_message("w-1", "write_file", json!({"path": "huge.rs"})),
|
||||
tool_result_message("w-1", &big_diff),
|
||||
tool_use_message("w-2", "write_file", json!({"path": "huge.rs"})),
|
||||
tool_result_message("w-2", &big_diff),
|
||||
];
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
let first = tool_message_content(&built, 0);
|
||||
let second = tool_message_content(&built, 1);
|
||||
|
||||
// (a) Both confirmations stay inline — truncated, never a ref.
|
||||
assert!(
|
||||
first.contains("[TOOL_RESULT_TRUNCATED]"),
|
||||
"first should be truncated, got: {first}"
|
||||
);
|
||||
assert!(
|
||||
!first.contains("<TOOL_RESULT_REF"),
|
||||
"first must not be a dedup ref, got: {first}"
|
||||
);
|
||||
assert!(
|
||||
!second.contains("<TOOL_RESULT_REF"),
|
||||
"second identical write_file must stay inline (#1695), got: {second}"
|
||||
);
|
||||
assert!(
|
||||
second.contains("[TOOL_RESULT_TRUNCATED]"),
|
||||
"second should also be inline-truncated, got: {second}"
|
||||
);
|
||||
assert!(
|
||||
first.contains(&format!("sha256: {sha}")),
|
||||
"truncation block should advertise the recovery SHA, got: {first}"
|
||||
);
|
||||
|
||||
// (b) The full content was persisted to the SHA store and is
|
||||
// retrievable — the seam `persist_tool_result_for_sha` writes
|
||||
// to and `retrieve_tool_result ref=sha:` reads back from.
|
||||
let path = crate::tools::truncate::sha_spillover_path(&sha)
|
||||
.expect("sha spillover path resolvable under test root");
|
||||
assert!(path.exists(), "large write_file output not persisted: {path:?}");
|
||||
let persisted = std::fs::read_to_string(&path).expect("read persisted spillover");
|
||||
assert_eq!(
|
||||
persisted, big_diff,
|
||||
"persisted content must match the original write_file result verbatim"
|
||||
);
|
||||
|
||||
// Sanity: a large NON-mutation result still dedups (back-ref on
|
||||
// the second sighting) — decoupling didn't regress #1695's
|
||||
// preserved read-path behavior.
|
||||
let read_messages = vec![
|
||||
tool_use_message("r-1", "read_file", json!({"path": "huge.rs"})),
|
||||
tool_result_message("r-1", &big_diff),
|
||||
tool_use_message("r-2", "read_file", json!({"path": "huge.rs"})),
|
||||
tool_result_message("r-2", &big_diff),
|
||||
];
|
||||
let read_built = build_chat_messages(None, &read_messages, "deepseek-v4-flash");
|
||||
let read_second = tool_message_content(&read_built, 1);
|
||||
assert!(
|
||||
read_second.starts_with("<TOOL_RESULT_REF sha=\""),
|
||||
"large read_file must still dedup to a ref, got: {read_second}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tool_result_budget_is_wire_only_and_does_not_mutate_session_message() {
|
||||
let long_output = format!("{}{}", "A".repeat(7_000), "Z".repeat(7_000));
|
||||
|
||||
@@ -641,6 +641,13 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn cache_inspect_displays_tool_result_budget_metadata() {
|
||||
// Wire dedup persists to the process-global SHA spillover root.
|
||||
// Serialize through the same guard other tests use to override
|
||||
// that root, so a parallel test pointing it at a temp dir can't
|
||||
// make this test's second-sighting dedup silently fail.
|
||||
let _spill_guard = crate::tools::truncate::TEST_SPILLOVER_GUARD
|
||||
.lock()
|
||||
.unwrap_or_else(|err| err.into_inner());
|
||||
let mut app = create_test_app();
|
||||
let long_output = format!("{}{}", "A".repeat(7_000), "Z".repeat(7_000));
|
||||
app.api_messages.push(Message {
|
||||
|
||||
Reference in New Issue
Block a user