fix(tools): unify tool-result retrieval references

2026-05-12 12:09:40 -05:00
parent 33822424d8
commit 1372d72627
4 changed files with 543 additions and 67 deletions
@@ -204,6 +204,9 @@ impl From<&ArtifactRecord> for TranscriptArtifactRef {

 #[must_use]
 pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> String {
+    // The model sees several identifiers in this block. Keep a literal
+    // retrieve command next to them so it does not have to infer which
+    // field is accepted by `retrieve_tool_result`.
    format!(
        "[artifact: {tool}]\n\
         id:           {id}\n\
@@ -211,7 +214,8 @@ pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> Stri
         tool_call_id: {tool_call_id}\n\
         size:         {size}\n\
         path:         {path}\n\
-         preview:      {preview}",
+         preview:      {preview}\n\
+         retrieve:     retrieve_tool_result ref={id}",
        tool = reference.tool_name,
        id = reference.artifact_id,
        tool_call_id = reference.tool_call_id,
@@ -273,6 +277,10 @@ mod tests {
        let rendered = render_transcript_artifact_ref(&reference);

        assert!(rendered.contains("path:         artifacts/art_call-big.txt"));
+        assert!(
+            rendered.contains("retrieve:     retrieve_tool_result ref=art_call-big"),
+            "rendered block must embed the literal retrieve command: {rendered}"
+        );
    }

    #[test]
@@ -494,6 +494,16 @@ pub(crate) const CACHE_WARMUP_USER_TAIL: &str = "请只回复 OK";
 const TOOL_RESULT_SENT_CHAR_BUDGET: usize = 12_000;
 const TOOL_RESULT_HEAD_CHARS: usize = 4_000;
 const TOOL_RESULT_TAIL_CHARS: usize = 4_000;
+/// Tool results shorter than this stay inline even when repeated. The
+/// extra prompt bytes are cheaper than forcing the model through an
+/// unnecessary retrieval hop for tiny command outputs.
+const TOOL_RESULT_DEDUP_MIN_CHARS: usize = 1_024;
+/// Tool results shorter than this are also exempt from disk persistence —
+/// no SHA file is written. The wire-dedup path won't fire for them
+/// anyway (see `TOOL_RESULT_DEDUP_MIN_CHARS`), so there's no retrieval
+/// burden to satisfy. Keeps `~/.deepseek/tool_outputs/` from filling
+/// up with tiny `gh auth status` and `cat package.json` files.
+const TOOL_RESULT_SHA_PERSIST_MIN_CHARS: usize = 1_024;

 #[derive(Debug, Clone, PartialEq, Eq)]
 pub(crate) struct PromptInspection {
@@ -777,6 +787,34 @@ fn sha256_hex(bytes: &[u8]) -> String {
    format!("{:x}", hasher.finalize())
 }

+/// Persist a SHA-addressed copy of `content` to
+/// `~/.deepseek/tool_outputs/sha_<sha>.txt` so the model can retrieve
+/// the original bytes after the wire-dedup compactor has replaced
+/// later occurrences with a `<TOOL_RESULT_REF sha="..." />` block.
+///
+/// Returns `true` when the persist succeeded (or the content is
+/// below `TOOL_RESULT_SHA_PERSIST_MIN_CHARS` — there's no retrieval
+/// need to satisfy). Returns `false` when the write failed and the
+/// caller MUST skip dedup, because emitting a SHA ref the model
+/// can't retrieve is worse than inlining the content twice. The
+/// no-home-dir edge case (InvalidInput) is treated as a real
+/// failure: we can't promise retrieval works without a writable
+/// store.
+fn persist_tool_result_for_sha(sha: &str, content: &str) -> bool {
+    if content.chars().count() < TOOL_RESULT_SHA_PERSIST_MIN_CHARS {
+        return true;
+    }
+    match crate::tools::truncate::write_sha_spillover(sha, content) {
+        Ok(_) => true,
+        Err(err) => {
+            logging::warn(format!(
+                "tool-result SHA spillover write failed for sha={sha}: {err} — dedup skipped"
+            ));
+            false
+        }
+    }
+}
+
 #[derive(Clone)]
 struct PendingToolCallInfo {
    tool_name: String,
@@ -819,8 +857,9 @@ fn render_turn_meta_for_wire(
        .as_ref()
        .is_some_and(|previous| previous.sha256 == sha)
    {
-        let rendered =
-            format!("<TURN_META_REF sha=\"{sha}\" original_chars=\"{original_chars}\" />");
+        // Keep the repeated metadata slot short without surfacing an
+        // opaque hash the model cannot resolve.
+        let rendered = "<turn_meta_unchanged />".to_string();
        let budget = TurnMetaBudget {
            original_chars,
            sent_chars: rendered.chars().count(),
@@ -867,10 +906,30 @@ fn compact_tool_result_for_wire(
    let original_chars = content.chars().count();
    let sha = sha256_hex(content.as_bytes());

-    if let Some(previous) = seen_tool_results.get(&sha) {
+    // Below the threshold, repeating the content is safer than asking
+    // the model to chase a reference. Above it, persist a SHA-addressed
+    // copy before any later message can point at that SHA.
+    let dedup_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
+
+    if dedup_eligible && let Some(previous) = seen_tool_results.get(&sha) {
+        // Re-check persistence before emitting a ref. If the file is
+        // already present this is a cheap no-op; if the write now fails,
+        // inline the content rather than producing an orphan reference.
+        if !persist_tool_result_for_sha(&sha, content) {
+            return WireToolResult {
+                content: content.to_string(),
+                original_chars,
+                sent_chars: original_chars,
+                truncated: false,
+                deduplicated: false,
+            };
+        }
        let content = format!(
-            "<TOOL_RESULT_REF sha=\"{}\" original_message=\"{}\" chars=\"{}\" />",
-            sha, previous.message_label, previous.original_chars
+            "<TOOL_RESULT_REF sha=\"{sha}\" original_message=\"{label}\" chars=\"{chars}\">\n\
+             retrieve: retrieve_tool_result ref=sha:{sha}\n\
+             </TOOL_RESULT_REF>",
+            label = previous.message_label,
+            chars = previous.original_chars,
        );
        return WireToolResult {
            sent_chars: content.chars().count(),
@@ -881,13 +940,20 @@ fn compact_tool_result_for_wire(
        };
    }

-    seen_tool_results.insert(
-        sha.clone(),
-        SeenToolResult {
-            message_label: message_label.to_string(),
-            original_chars,
-        },
-    );
+    if dedup_eligible {
+        // Persist before registering the content as dedupable. If the
+        // write fails, later occurrences stay inline instead of pointing
+        // at a file that was never created.
+        if persist_tool_result_for_sha(&sha, content) {
+            seen_tool_results.insert(
+                sha.clone(),
+                SeenToolResult {
+                    message_label: message_label.to_string(),
+                    original_chars,
+                },
+            );
+        }
+    }

    if original_chars <= TOOL_RESULT_SENT_CHAR_BUDGET {
        return WireToolResult {
@@ -2402,14 +2468,10 @@ mod stream_decoder_tests {
        let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
        let first = user_message_content(&built, 0);
        let second = user_message_content(&built, 1);
-        let expected_sha = sha256_hex(turn_meta.as_bytes());
-        let expected_ref = format!(
-            "<TURN_META_REF sha=\"{expected_sha}\" original_chars=\"{}\" />",
-            turn_meta.chars().count()
-        );
+        let expected_ref = "<turn_meta_unchanged />";

        assert!(first.starts_with(turn_meta), "got: {first}");
-        assert!(second.starts_with(&expected_ref), "got: {second}");
+        assert!(second.starts_with(expected_ref), "got: {second}");
        assert!(second.ends_with("second task"), "got: {second}");
        assert_eq!(
            second,
@@ -2447,7 +2509,7 @@ mod stream_decoder_tests {

        let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
        assert!(
-            user_message_content(&built, 1).starts_with("<TURN_META_REF"),
+            user_message_content(&built, 1).starts_with("<turn_meta_unchanged />"),
            "got: {}",
            user_message_content(&built, 1)
        );
@@ -2536,7 +2598,7 @@ mod stream_decoder_tests {
    }

    #[test]
-    fn request_builder_deduplicates_identical_tool_results_for_wire() {
+    fn request_builder_does_not_dedup_short_tool_results_for_wire() {
        let output = "same tool output";
        let messages = vec![
            tool_use_message("tool-1", "read_file", json!({"path": "README.md"})),
@@ -2549,6 +2611,25 @@ mod stream_decoder_tests {
        let first = tool_message_content(&built, 0);
        let second = tool_message_content(&built, 1);

+        assert_eq!(first, output);
+        assert_eq!(second, output);
+        assert!(!second.contains("<TOOL_RESULT_REF"), "got: {second}");
+    }
+
+    #[test]
+    fn request_builder_deduplicates_large_identical_tool_results_with_retrieval_hint() {
+        let output = "A".repeat(2_000);
+        let messages = vec![
+            tool_use_message("tool-1", "read_file", json!({"path": "README.md"})),
+            tool_result_message("tool-1", &output),
+            tool_use_message("tool-2", "read_file", json!({"path": "README.md"})),
+            tool_result_message("tool-2", &output),
+        ];
+
+        let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
+        let first = tool_message_content(&built, 0);
+        let second = tool_message_content(&built, 1);
+
        assert_eq!(first, output);
        assert!(
            second.starts_with("<TOOL_RESULT_REF sha=\""),
@@ -2558,7 +2639,11 @@ mod stream_decoder_tests {
            second.contains("original_message=\"Message #1\""),
            "got: {second}"
        );
-        assert!(second.contains("chars=\"16\""), "got: {second}");
+        assert!(second.contains("chars=\"2000\""), "got: {second}");
+        assert!(
+            second.contains("retrieve: retrieve_tool_result ref=sha:"),
+            "got: {second}"
+        );
    }

    #[test]
@@ -2619,7 +2704,13 @@ mod stream_decoder_tests {
        assert!(tool_layers[0].truncated);
        assert!(!tool_layers[0].deduplicated);
        assert_eq!(tool_layers[1].original_chars, 14_000);
-        assert!(tool_layers[1].sent_chars < 200);
+        // Keep the reference far smaller than the original 14K output
+        // even with a copyable retrieval hint included.
+        assert!(
+            tool_layers[1].sent_chars < 300,
+            "deduplicated ref grew unexpectedly large: {}",
+            tool_layers[1].sent_chars
+        );
        assert!(!tool_layers[1].truncated);
        assert!(tool_layers[1].deduplicated);
    }
@@ -36,7 +36,7 @@ impl ToolSpec for RetrieveToolResultTool {
    }

    fn description(&self) -> &'static str {
-        "Retrieve a previously spilled large tool result from ~/.deepseek/tool_outputs by tool call id, filename, or spillover path. Supports summary, head, tail, lines, and query modes so you can fetch only the needed historical output."
+        "Retrieve a previously spilled large tool result. Accepts a tool_call_id (`call_abc123`), artifact id (`art_call_abc123`), SHA reference (`sha:<64-hex>` or bare 64-hex from `<TOOL_RESULT_REF>`), relative filename (`call_abc123.txt`, `artifacts/art_call_abc123.txt`), or absolute path under ~/.deepseek. Modes: summary, head, tail, lines, query."
    }

    fn input_schema(&self) -> Value {
@@ -45,7 +45,7 @@ impl ToolSpec for RetrieveToolResultTool {
            "properties": {
                "ref": {
                    "type": "string",
-                    "description": "Tool call id, tool_result:<id>, spillover filename, or absolute spillover path."
+                    "description": "Tool call id, artifact id (`art_<id>`), SHA ref (`sha:<64-hex>`), spillover filename, or absolute path under ~/.deepseek."
                },
                "mode": {
                    "type": "string",
@@ -97,7 +97,7 @@ impl ToolSpec for RetrieveToolResultTool {
        true
    }

-    async fn execute(&self, input: Value, _context: &ToolContext) -> Result<ToolResult, ToolError> {
+    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
        let reference = required_str(&input, "ref")?.trim();
        if reference.is_empty() {
            return Err(ToolError::invalid_input("ref cannot be empty"));
@@ -112,7 +112,7 @@ impl ToolSpec for RetrieveToolResultTool {
            1,
            HARD_MAX_BYTES,
        );
-        let path = resolve_spillover_reference(reference)?;
+        let path = resolve_spillover_reference(reference, &context.state_namespace)?;
        let content = fs::read_to_string(&path).map_err(|err| {
            ToolError::execution_failed(format!("failed to read {}: {err}", path.display()))
        })?;
@@ -139,53 +139,222 @@ impl ToolSpec for RetrieveToolResultTool {
    }
 }

-fn resolve_spillover_reference(reference: &str) -> Result<PathBuf, ToolError> {
+/// Resolve a tool-result ref to a concrete file path.
+///
+/// Accepts six shapes:
+/// 1. `tool_call_id` — legacy spillover form, `<id>.txt` under `tool_outputs/`.
+/// 2. `art_<id>` — current artifact id, written by `apply_spillover_with_artifact`.
+///    Tries the session artifact directory first, falls back to `<id>.txt`
+///    (stripping the `art_` prefix) so old + new naming both work.
+/// 3. `sha:<64-hex>` or bare 64-hex — content-addressed wire dedup, `sha_<hex>.txt`.
+/// 4. `tool_result:<x>` — `<x>` is any of the above after the prefix.
+/// 5. `artifacts/<file>.txt` or `<file>.txt` — relative paths.
+/// 6. Absolute paths under `~/.deepseek/`.
+///
+/// The error message on a miss enumerates which forms were tried so the
+/// model can correct course without a second blind guess.
+fn resolve_spillover_reference(reference: &str, session_id: &str) -> Result<PathBuf, ToolError> {
    let root = crate::tools::truncate::spillover_root()
        .ok_or_else(|| ToolError::execution_failed("could not resolve ~/.deepseek/tool_outputs"))?;
-    let root_canonical = root.canonicalize().map_err(|err| {
-        ToolError::execution_failed(format!(
-            "spillover directory {} is not readable: {err}",
-            root.display()
-        ))
-    })?;
+    let root_canonical = root.canonicalize().ok();
+
+    // Resolve the session's `artifacts/` directory.
+    // `session_artifact_absolute_path(sid, p)` returns
+    // `~/.deepseek/sessions/<sid>/<p>` — so passing the literal
+    // `ARTIFACTS_DIR_NAME` ("artifacts") gets us the real artifacts
+    // root. An earlier draft passed `Path::new(".")` and took
+    // `.parent()`, which landed one directory too high (`<sid>` instead
+    // of `<sid>/artifacts`) and silently broke every bare `art_<id>`
+    // ref — only the legacy-spillover fallback survived. The test
+    // `resolves_art_prefix_to_legacy_spillover_id` masked it because
+    // it ONLY wrote a legacy spillover file. The new test
+    // `resolves_art_prefix_via_session_artifacts` exercises the real
+    // path.
+    let session_artifacts_root = if !session_id.is_empty() {
+        crate::artifacts::session_artifact_absolute_path(
+            session_id,
+            std::path::Path::new(crate::artifacts::ARTIFACTS_DIR_NAME),
+        )
+    } else {
+        None
+    };
+    let session_artifacts_root_canonical = session_artifacts_root
+        .as_ref()
+        .and_then(|p| p.canonicalize().ok());

    let trimmed = reference.trim();
-    let stripped = trimmed.strip_prefix("tool_result:").unwrap_or(trimmed);
-    let raw_path = PathBuf::from(stripped);
+    let stripped = trimmed
+        .strip_prefix("tool_result:")
+        .unwrap_or(trimmed)
+        .trim();

-    let candidate = if raw_path.is_absolute() {
-        raw_path
-    } else if stripped.ends_with(".txt")
-        || stripped.contains('/')
-        || (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR))
-    {
-        root.join(stripped)
-    } else {
-        crate::tools::truncate::spillover_path(stripped).ok_or_else(|| {
-            ToolError::invalid_input(format!("invalid spilled tool-result ref `{reference}`"))
-        })?
+    let mut tried: Vec<PathBuf> = Vec::new();
+    let try_path = |candidate: PathBuf, tried: &mut Vec<PathBuf>| -> Option<PathBuf> {
+        // Always record what we tried so the `not_found` diagnostic
+        // can enumerate every candidate, even ones whose
+        // `canonicalize` returns ENOENT. Models otherwise saw the
+        // useless "(no valid candidates derived from ref)" line.
+        tried.push(candidate.clone());
+
+        // Reject symlinks at the leaf BEFORE canonicalizing so an
+        // attacker who can write under `<sid>/artifacts/` cannot
+        // plant a symlink to `/etc/passwd` and read it back through
+        // `retrieve_tool_result`. canonicalize() would happily
+        // follow such a link and then pass the `starts_with(root)`
+        // check because of the resolved-then-compare order. The
+        // legacy `~/.deepseek/tool_outputs/` dir is engine-only and
+        // never carried this concern; session artifact dirs hold
+        // arbitrary tool output and need the guard.
+        if let Ok(meta) = std::fs::symlink_metadata(&candidate)
+            && meta.file_type().is_symlink()
+        {
+            return None;
+        }
+
+        let canonical = candidate.canonicalize().ok()?;
+        if !canonical.is_file() {
+            return None;
+        }
+        let inside_legacy = root_canonical
+            .as_ref()
+            .is_some_and(|root| canonical.starts_with(root));
+        let inside_session = session_artifacts_root_canonical
+            .as_ref()
+            .is_some_and(|root| canonical.starts_with(root));
+        if inside_legacy || inside_session {
+            Some(canonical)
+        } else {
+            None
+        }
    };

-    let canonical = candidate.canonicalize().map_err(|err| {
-        ToolError::execution_failed(format!(
-            "spilled tool result `{reference}` was not found at {}: {err}",
-            candidate.display()
-        ))
-    })?;
-
-    if !canonical.starts_with(&root_canonical) {
-        return Err(ToolError::invalid_input(format!(
-            "ref `{reference}` does not point inside {}",
-            root_canonical.display()
-        )));
-    }
-    if !canonical.is_file() {
-        return Err(ToolError::invalid_input(format!(
-            "ref `{reference}` does not point to a spillover file"
-        )));
+    // Form 1/3: absolute path. Validate it lives under one of the allowed roots.
+    let raw_path = PathBuf::from(stripped);
+    if raw_path.is_absolute() {
+        if let Some(found) = try_path(raw_path.clone(), &mut tried) {
+            return Ok(found);
+        }
+        return Err(not_found(
+            reference,
+            &tried,
+            &root,
+            session_artifacts_root.as_deref(),
+        ));
    }

-    Ok(canonical)
+    // Form 4: `sha:<hex>` prefix or bare 64-hex SHA → SHA-addressed file.
+    let sha_candidate = stripped
+        .strip_prefix("sha:")
+        .or_else(|| stripped.strip_prefix("sha_"))
+        .unwrap_or(stripped)
+        .trim();
+    if crate::tools::truncate::is_valid_sha256(&sha_candidate.to_ascii_lowercase())
+        && let Some(p) = crate::tools::truncate::sha_spillover_path(sha_candidate)
+        && let Some(found) = try_path(p, &mut tried)
+    {
+        return Ok(found);
+    }
+
+    // Form 5: relative path with separator or `.txt` suffix.
+    let looks_like_path = stripped.ends_with(".txt")
+        || stripped.contains('/')
+        || (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR));
+    if looks_like_path {
+        // Try legacy spillover root.
+        if let Some(found) = try_path(root.join(stripped), &mut tried) {
+            return Ok(found);
+        }
+        // Session artifact roots point directly at `<sid>/artifacts/`.
+        // Strip an optional leading `artifacts/` segment from transcript
+        // paths before joining.
+        if let Some(sa_root) = session_artifacts_root.as_ref() {
+            let rel = stripped.strip_prefix("artifacts/").unwrap_or(stripped);
+            if let Some(found) = try_path(sa_root.join(rel), &mut tried) {
+                return Ok(found);
+            }
+        }
+        return Err(not_found(
+            reference,
+            &tried,
+            &root,
+            session_artifacts_root.as_deref(),
+        ));
+    }
+
+    // Form 1: bare id → legacy `tool_outputs/<id>.txt`.
+    if let Some(p) = crate::tools::truncate::spillover_path(stripped)
+        && let Some(found) = try_path(p, &mut tried)
+    {
+        return Ok(found);
+    }
+    // Form 2: `art_<id>` → strip prefix and try both:
+    //   a) session artifacts dir at `artifacts/art_<id>.txt`
+    //   b) legacy spillover at `<id>.txt`
+    if let Some(stripped_art) = stripped.strip_prefix("art_") {
+        if let Some(sa_root) = session_artifacts_root.as_ref() {
+            let session_file = sa_root.join(format!("art_{stripped_art}.txt"));
+            if let Some(found) = try_path(session_file, &mut tried) {
+                return Ok(found);
+            }
+        }
+        if let Some(p) = crate::tools::truncate::spillover_path(stripped_art)
+            && let Some(found) = try_path(p, &mut tried)
+        {
+            return Ok(found);
+        }
+    }
+    // Form 2b: maybe the model passed the bare id but the artifact lives
+    // under the session artifacts dir. Try `artifacts/art_<id>.txt`.
+    if let Some(sa_root) = session_artifacts_root.as_ref() {
+        let session_file = sa_root.join(format!("art_{stripped}.txt"));
+        if let Some(found) = try_path(session_file, &mut tried) {
+            return Ok(found);
+        }
+    }
+
+    Err(not_found(
+        reference,
+        &tried,
+        &root,
+        session_artifacts_root.as_deref(),
+    ))
+}
+
+/// Format a "ref didn't resolve" error with enough detail for the
+/// caller to choose a valid reference form on the next attempt.
+fn not_found(
+    reference: &str,
+    tried: &[PathBuf],
+    legacy_root: &std::path::Path,
+    session_artifacts_root: Option<&std::path::Path>,
+) -> ToolError {
+    let tried_list = if tried.is_empty() {
+        "(no valid candidates derived from ref)".to_string()
+    } else {
+        tried
+            .iter()
+            .map(|p| format!("  - {}", p.display()))
+            .collect::<Vec<_>>()
+            .join("\n")
+    };
+    let session_hint = session_artifacts_root
+        .map(|p| format!("\nsession artifacts root: {}", p.display()))
+        .unwrap_or_default();
+    ToolError::execution_failed(format!(
+        "spilled tool result `{reference}` not found. Tried:\n{tried_list}\n\
+         spillover root: {legacy}{session}\n\
+         Accepted ref forms: \
+         (a) `<tool_call_id>` for legacy spillover, \
+         (b) `art_<tool_call_id>` for session artifacts, \
+         (c) `sha:<64-hex>` or bare 64-hex from a <TOOL_RESULT_REF> block, \
+         (d) `artifacts/art_<id>.txt` or `<id>.txt` relative paths. \
+         If the source was a `<TOOL_RESULT_REF sha=\"...\" />` block, copy the \
+         sha value and pass it as `ref=sha:<value>`. \
+         If the source was an [artifact ...] block, pass the `id:` field \
+         (the `art_<id>` form) directly.",
+        legacy = legacy_root.display(),
+        session = session_hint,
+    ))
 }

 fn build_summary_payload(
@@ -619,9 +788,170 @@ mod tests {

        let err = execute_tool(json!({"ref": outside.display().to_string()})).unwrap_err();

+        // The new resolver classifies anything that fails to live under
+        // an approved root as "not found" so we don't accidentally
+        // leak whether an outside path exists on disk.
+        let msg = err.to_string();
        assert!(
-            err.to_string().contains("does not point inside"),
-            "unexpected error: {err}"
+            msg.contains("not found"),
+            "expected `not found` diagnostic, got: {msg}"
        );
    }
+
+    #[test]
+    fn resolves_sha_reference_from_wire_dedup() {
+        // A SHA-keyed lookup — emulates what happens when the model
+        // sees a `<TOOL_RESULT_REF sha="..." />` block and passes the
+        // SHA to retrieve_tool_result.
+        let _lock = test_lock();
+        let tmp = tempdir().unwrap();
+        let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
+        let body = "checking crate ... error[E0425]: cannot find value\n".repeat(80);
+        let sha = {
+            use sha2::{Digest, Sha256};
+            let mut hasher = Sha256::new();
+            hasher.update(body.as_bytes());
+            format!("{:x}", hasher.finalize())
+        };
+        crate::tools::truncate::write_sha_spillover(&sha, &body).unwrap();
+
+        // Form: `sha:<hex>`
+        let result = execute_tool(json!({"ref": format!("sha:{sha}")})).unwrap();
+        assert!(result.success, "sha:<hex> form should resolve");
+
+        // Form: bare 64-hex
+        let result = execute_tool(json!({"ref": &sha})).unwrap();
+        assert!(result.success, "bare 64-hex form should resolve");
+    }
+
+    #[test]
+    fn resolves_art_prefix_to_legacy_spillover_id() {
+        // The model commonly sees `id: art_call_xyz` in artifact
+        // ref blocks. retrieve_tool_result should strip the `art_`
+        // prefix and find the legacy `<id>.txt` file if no
+        // session-artifact equivalent exists.
+        let _lock = test_lock();
+        let tmp = tempdir().unwrap();
+        let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
+        crate::tools::truncate::write_spillover("call_xyz", "line1\nline2\nline3").unwrap();
+
+        let result = execute_tool(json!({"ref": "art_call_xyz"})).unwrap();
+        assert!(result.success, "art_ prefix should resolve to legacy id");
+    }
+
+    #[test]
+    fn not_found_error_lists_tried_candidates_and_accepted_forms() {
+        let _lock = test_lock();
+        let tmp = tempdir().unwrap();
+        let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
+        fs::create_dir_all(tmp.path().join("tool_outputs")).unwrap();
+
+        let err = execute_tool(json!({"ref": "definitely_missing_id"})).unwrap_err();
+        let msg = err.to_string();
+        assert!(msg.contains("not found"), "got: {msg}");
+        assert!(
+            msg.contains("sha:"),
+            "diagnostic should mention sha form: {msg}"
+        );
+        assert!(
+            msg.contains("art_<tool_call_id>"),
+            "diagnostic should mention art form: {msg}"
+        );
+        assert!(
+            msg.contains("tool_outputs"),
+            "tried list should include the legacy spillover candidate: {msg}"
+        );
+        assert!(
+            !msg.contains("(no valid candidates derived from ref)"),
+            "tried list should not be empty: {msg}"
+        );
+    }
+
+    #[test]
+    fn resolves_art_prefix_via_session_artifacts() {
+        let _lock = test_lock();
+        let tmp = tempdir().unwrap();
+        let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs"));
+        let _art_guard = {
+            let prior = crate::artifacts::set_test_artifact_sessions_root(Some(
+                tmp.path().join("sessions"),
+            ));
+            scopeguard_for_test(prior)
+        };
+        let session_id = "session-abc";
+        let body = "this is the canonical session artifact body, not a legacy file";
+        crate::artifacts::write_session_artifact(session_id, "art_call_real", body).unwrap();
+
+        let runtime = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap();
+        let workspace_tmp = tempdir().unwrap();
+        let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id);
+        let result = runtime
+            .block_on(RetrieveToolResultTool.execute(json!({"ref": "art_call_real"}), &ctx))
+            .expect("art_<id> should resolve via session artifacts");
+        assert!(result.success);
+        let payload: Value = serde_json::from_str(&result.content).unwrap();
+        assert!(
+            payload
+                .to_string()
+                .contains("canonical session artifact body"),
+            "summary should pull from session artifact, got: {}",
+            payload
+        );
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn rejects_symlink_inside_session_artifacts() {
+        let _lock = test_lock();
+        let tmp = tempdir().unwrap();
+        let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs"));
+        let _art_guard = {
+            let prior = crate::artifacts::set_test_artifact_sessions_root(Some(
+                tmp.path().join("sessions"),
+            ));
+            scopeguard_for_test(prior)
+        };
+        let session_id = "session-xyz";
+        // Plant a sensitive file outside the artifact dir.
+        let secret = tmp.path().join("secret.txt");
+        fs::write(&secret, "do not leak").unwrap();
+        // Create the artifact dir, then drop a symlink inside it
+        // pointing at the secret.
+        let art_dir = tmp
+            .path()
+            .join("sessions")
+            .join(session_id)
+            .join("artifacts");
+        fs::create_dir_all(&art_dir).unwrap();
+        std::os::unix::fs::symlink(&secret, art_dir.join("art_evil.txt")).unwrap();
+
+        let runtime = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap();
+        let workspace_tmp = tempdir().unwrap();
+        let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id);
+        let result =
+            runtime.block_on(RetrieveToolResultTool.execute(json!({"ref": "art_evil"}), &ctx));
+        let err = result.expect_err("symlink artifact must not resolve");
+        assert!(
+            err.to_string().contains("not found"),
+            "expected `not found`, got: {err}"
+        );
+    }
+
+    struct ArtifactRootGuard {
+        prior: Option<PathBuf>,
+    }
+    impl Drop for ArtifactRootGuard {
+        fn drop(&mut self) {
+            crate::artifacts::set_test_artifact_sessions_root(self.prior.take());
+        }
+    }
+    fn scopeguard_for_test(prior: Option<PathBuf>) -> ArtifactRootGuard {
+        ArtifactRootGuard { prior }
+    }
 }
@@ -103,6 +103,53 @@ pub fn spillover_path(id: &str) -> Option<PathBuf> {
    Some(spillover_root()?.join(format!("{sanitised}.txt")))
 }

+/// Resolve the spillover-file path for a SHA256 content hash. Separate
+/// namespace (`sha_<hex>.txt`) from the tool-call-id files so the two
+/// reference systems (engine-side spillover + wire-side dedup) can
+/// co-exist in one directory without collisions. `sha` must be the
+/// raw 64-char lowercase hex digest — case-insensitive matching is
+/// done by the caller.
+#[must_use]
+pub fn sha_spillover_path(sha: &str) -> Option<PathBuf> {
+    let sha = sha.trim().to_ascii_lowercase();
+    if !is_valid_sha256(&sha) {
+        return None;
+    }
+    Some(spillover_root()?.join(format!("sha_{sha}.txt")))
+}
+
+/// True when `s` is a 64-character lowercase ASCII hex string. Used
+/// to detect bare SHA refs the model might pass to retrieval and to
+/// validate input to [`sha_spillover_path`].
+#[must_use]
+pub fn is_valid_sha256(s: &str) -> bool {
+    s.len() == 64
+        && s.chars()
+            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
+}
+
+/// Write content to the SHA-addressed spillover file. Idempotent —
+/// the same hash always maps to the same path, and the file's bytes
+/// are a function of the hash. Skips the write if the file already
+/// exists (which is the common case for the wire dedup, since the
+/// second sighting writes the same content that the first did).
+pub fn write_sha_spillover(sha: &str, content: &str) -> io::Result<PathBuf> {
+    let path = sha_spillover_path(sha).ok_or_else(|| {
+        io::Error::new(
+            io::ErrorKind::InvalidInput,
+            "sha must be a 64-char lowercase hex digest",
+        )
+    })?;
+    if path.exists() {
+        return Ok(path);
+    }
+    if let Some(parent) = path.parent() {
+        fs::create_dir_all(parent)?;
+    }
+    crate::utils::write_atomic(&path, content.as_bytes())?;
+    Ok(path)
+}
+
 /// Write `content` to the spillover file for `id`. Creates the
 /// parent directory if needed. Returns the resolved path on success.
 ///