diff --git a/crates/tui/src/artifacts.rs b/crates/tui/src/artifacts.rs index 620f8018..adb2647b 100644 --- a/crates/tui/src/artifacts.rs +++ b/crates/tui/src/artifacts.rs @@ -204,6 +204,9 @@ impl From<&ArtifactRecord> for TranscriptArtifactRef { #[must_use] pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> String { + // The model sees several identifiers in this block. Keep a literal + // retrieve command next to them so it does not have to infer which + // field is accepted by `retrieve_tool_result`. format!( "[artifact: {tool}]\n\ id: {id}\n\ @@ -211,7 +214,8 @@ pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> Stri tool_call_id: {tool_call_id}\n\ size: {size}\n\ path: {path}\n\ - preview: {preview}", + preview: {preview}\n\ + retrieve: retrieve_tool_result ref={id}", tool = reference.tool_name, id = reference.artifact_id, tool_call_id = reference.tool_call_id, @@ -273,6 +277,10 @@ mod tests { let rendered = render_transcript_artifact_ref(&reference); assert!(rendered.contains("path: artifacts/art_call-big.txt")); + assert!( + rendered.contains("retrieve: retrieve_tool_result ref=art_call-big"), + "rendered block must embed the literal retrieve command: {rendered}" + ); } #[test] diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 3eecbf7a..c9e51a01 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -494,6 +494,16 @@ pub(crate) const CACHE_WARMUP_USER_TAIL: &str = "请只回复 OK"; const TOOL_RESULT_SENT_CHAR_BUDGET: usize = 12_000; const TOOL_RESULT_HEAD_CHARS: usize = 4_000; const TOOL_RESULT_TAIL_CHARS: usize = 4_000; +/// Tool results shorter than this stay inline even when repeated. The +/// extra prompt bytes are cheaper than forcing the model through an +/// unnecessary retrieval hop for tiny command outputs. +const TOOL_RESULT_DEDUP_MIN_CHARS: usize = 1_024; +/// Tool results shorter than this are also exempt from disk persistence — +/// no SHA file is written. The wire-dedup path won't fire for them +/// anyway (see `TOOL_RESULT_DEDUP_MIN_CHARS`), so there's no retrieval +/// burden to satisfy. Keeps `~/.deepseek/tool_outputs/` from filling +/// up with tiny `gh auth status` and `cat package.json` files. +const TOOL_RESULT_SHA_PERSIST_MIN_CHARS: usize = 1_024; #[derive(Debug, Clone, PartialEq, Eq)] pub(crate) struct PromptInspection { @@ -777,6 +787,34 @@ fn sha256_hex(bytes: &[u8]) -> String { format!("{:x}", hasher.finalize()) } +/// Persist a SHA-addressed copy of `content` to +/// `~/.deepseek/tool_outputs/sha_.txt` so the model can retrieve +/// the original bytes after the wire-dedup compactor has replaced +/// later occurrences with a `` block. +/// +/// Returns `true` when the persist succeeded (or the content is +/// below `TOOL_RESULT_SHA_PERSIST_MIN_CHARS` — there's no retrieval +/// need to satisfy). Returns `false` when the write failed and the +/// caller MUST skip dedup, because emitting a SHA ref the model +/// can't retrieve is worse than inlining the content twice. The +/// no-home-dir edge case (InvalidInput) is treated as a real +/// failure: we can't promise retrieval works without a writable +/// store. +fn persist_tool_result_for_sha(sha: &str, content: &str) -> bool { + if content.chars().count() < TOOL_RESULT_SHA_PERSIST_MIN_CHARS { + return true; + } + match crate::tools::truncate::write_sha_spillover(sha, content) { + Ok(_) => true, + Err(err) => { + logging::warn(format!( + "tool-result SHA spillover write failed for sha={sha}: {err} — dedup skipped" + )); + false + } + } +} + #[derive(Clone)] struct PendingToolCallInfo { tool_name: String, @@ -819,8 +857,9 @@ fn render_turn_meta_for_wire( .as_ref() .is_some_and(|previous| previous.sha256 == sha) { - let rendered = - format!(""); + // Keep the repeated metadata slot short without surfacing an + // opaque hash the model cannot resolve. + let rendered = "".to_string(); let budget = TurnMetaBudget { original_chars, sent_chars: rendered.chars().count(), @@ -867,10 +906,30 @@ fn compact_tool_result_for_wire( let original_chars = content.chars().count(); let sha = sha256_hex(content.as_bytes()); - if let Some(previous) = seen_tool_results.get(&sha) { + // Below the threshold, repeating the content is safer than asking + // the model to chase a reference. Above it, persist a SHA-addressed + // copy before any later message can point at that SHA. + let dedup_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS; + + if dedup_eligible && let Some(previous) = seen_tool_results.get(&sha) { + // Re-check persistence before emitting a ref. If the file is + // already present this is a cheap no-op; if the write now fails, + // inline the content rather than producing an orphan reference. + if !persist_tool_result_for_sha(&sha, content) { + return WireToolResult { + content: content.to_string(), + original_chars, + sent_chars: original_chars, + truncated: false, + deduplicated: false, + }; + } let content = format!( - "", - sha, previous.message_label, previous.original_chars + "\n\ + retrieve: retrieve_tool_result ref=sha:{sha}\n\ + ", + label = previous.message_label, + chars = previous.original_chars, ); return WireToolResult { sent_chars: content.chars().count(), @@ -881,13 +940,20 @@ fn compact_tool_result_for_wire( }; } - seen_tool_results.insert( - sha.clone(), - SeenToolResult { - message_label: message_label.to_string(), - original_chars, - }, - ); + if dedup_eligible { + // Persist before registering the content as dedupable. If the + // write fails, later occurrences stay inline instead of pointing + // at a file that was never created. + if persist_tool_result_for_sha(&sha, content) { + seen_tool_results.insert( + sha.clone(), + SeenToolResult { + message_label: message_label.to_string(), + original_chars, + }, + ); + } + } if original_chars <= TOOL_RESULT_SENT_CHAR_BUDGET { return WireToolResult { @@ -2402,14 +2468,10 @@ mod stream_decoder_tests { let built = build_chat_messages(None, &messages, "deepseek-v4-flash"); let first = user_message_content(&built, 0); let second = user_message_content(&built, 1); - let expected_sha = sha256_hex(turn_meta.as_bytes()); - let expected_ref = format!( - "", - turn_meta.chars().count() - ); + let expected_ref = ""; assert!(first.starts_with(turn_meta), "got: {first}"); - assert!(second.starts_with(&expected_ref), "got: {second}"); + assert!(second.starts_with(expected_ref), "got: {second}"); assert!(second.ends_with("second task"), "got: {second}"); assert_eq!( second, @@ -2447,7 +2509,7 @@ mod stream_decoder_tests { let built = build_chat_messages(None, &messages, "deepseek-v4-flash"); assert!( - user_message_content(&built, 1).starts_with(""), "got: {}", user_message_content(&built, 1) ); @@ -2536,7 +2598,7 @@ mod stream_decoder_tests { } #[test] - fn request_builder_deduplicates_identical_tool_results_for_wire() { + fn request_builder_does_not_dedup_short_tool_results_for_wire() { let output = "same tool output"; let messages = vec![ tool_use_message("tool-1", "read_file", json!({"path": "README.md"})), @@ -2549,6 +2611,25 @@ mod stream_decoder_tests { let first = tool_message_content(&built, 0); let second = tool_message_content(&built, 1); + assert_eq!(first, output); + assert_eq!(second, output); + assert!(!second.contains(" &'static str { - "Retrieve a previously spilled large tool result from ~/.deepseek/tool_outputs by tool call id, filename, or spillover path. Supports summary, head, tail, lines, and query modes so you can fetch only the needed historical output." + "Retrieve a previously spilled large tool result. Accepts a tool_call_id (`call_abc123`), artifact id (`art_call_abc123`), SHA reference (`sha:<64-hex>` or bare 64-hex from ``), relative filename (`call_abc123.txt`, `artifacts/art_call_abc123.txt`), or absolute path under ~/.deepseek. Modes: summary, head, tail, lines, query." } fn input_schema(&self) -> Value { @@ -45,7 +45,7 @@ impl ToolSpec for RetrieveToolResultTool { "properties": { "ref": { "type": "string", - "description": "Tool call id, tool_result:, spillover filename, or absolute spillover path." + "description": "Tool call id, artifact id (`art_`), SHA ref (`sha:<64-hex>`), spillover filename, or absolute path under ~/.deepseek." }, "mode": { "type": "string", @@ -97,7 +97,7 @@ impl ToolSpec for RetrieveToolResultTool { true } - async fn execute(&self, input: Value, _context: &ToolContext) -> Result { + async fn execute(&self, input: Value, context: &ToolContext) -> Result { let reference = required_str(&input, "ref")?.trim(); if reference.is_empty() { return Err(ToolError::invalid_input("ref cannot be empty")); @@ -112,7 +112,7 @@ impl ToolSpec for RetrieveToolResultTool { 1, HARD_MAX_BYTES, ); - let path = resolve_spillover_reference(reference)?; + let path = resolve_spillover_reference(reference, &context.state_namespace)?; let content = fs::read_to_string(&path).map_err(|err| { ToolError::execution_failed(format!("failed to read {}: {err}", path.display())) })?; @@ -139,53 +139,222 @@ impl ToolSpec for RetrieveToolResultTool { } } -fn resolve_spillover_reference(reference: &str) -> Result { +/// Resolve a tool-result ref to a concrete file path. +/// +/// Accepts six shapes: +/// 1. `tool_call_id` — legacy spillover form, `.txt` under `tool_outputs/`. +/// 2. `art_` — current artifact id, written by `apply_spillover_with_artifact`. +/// Tries the session artifact directory first, falls back to `.txt` +/// (stripping the `art_` prefix) so old + new naming both work. +/// 3. `sha:<64-hex>` or bare 64-hex — content-addressed wire dedup, `sha_.txt`. +/// 4. `tool_result:` — `` is any of the above after the prefix. +/// 5. `artifacts/.txt` or `.txt` — relative paths. +/// 6. Absolute paths under `~/.deepseek/`. +/// +/// The error message on a miss enumerates which forms were tried so the +/// model can correct course without a second blind guess. +fn resolve_spillover_reference(reference: &str, session_id: &str) -> Result { let root = crate::tools::truncate::spillover_root() .ok_or_else(|| ToolError::execution_failed("could not resolve ~/.deepseek/tool_outputs"))?; - let root_canonical = root.canonicalize().map_err(|err| { - ToolError::execution_failed(format!( - "spillover directory {} is not readable: {err}", - root.display() - )) - })?; + let root_canonical = root.canonicalize().ok(); + + // Resolve the session's `artifacts/` directory. + // `session_artifact_absolute_path(sid, p)` returns + // `~/.deepseek/sessions//

` — so passing the literal + // `ARTIFACTS_DIR_NAME` ("artifacts") gets us the real artifacts + // root. An earlier draft passed `Path::new(".")` and took + // `.parent()`, which landed one directory too high (`` instead + // of `/artifacts`) and silently broke every bare `art_` + // ref — only the legacy-spillover fallback survived. The test + // `resolves_art_prefix_to_legacy_spillover_id` masked it because + // it ONLY wrote a legacy spillover file. The new test + // `resolves_art_prefix_via_session_artifacts` exercises the real + // path. + let session_artifacts_root = if !session_id.is_empty() { + crate::artifacts::session_artifact_absolute_path( + session_id, + std::path::Path::new(crate::artifacts::ARTIFACTS_DIR_NAME), + ) + } else { + None + }; + let session_artifacts_root_canonical = session_artifacts_root + .as_ref() + .and_then(|p| p.canonicalize().ok()); let trimmed = reference.trim(); - let stripped = trimmed.strip_prefix("tool_result:").unwrap_or(trimmed); - let raw_path = PathBuf::from(stripped); + let stripped = trimmed + .strip_prefix("tool_result:") + .unwrap_or(trimmed) + .trim(); - let candidate = if raw_path.is_absolute() { - raw_path - } else if stripped.ends_with(".txt") - || stripped.contains('/') - || (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR)) - { - root.join(stripped) - } else { - crate::tools::truncate::spillover_path(stripped).ok_or_else(|| { - ToolError::invalid_input(format!("invalid spilled tool-result ref `{reference}`")) - })? + let mut tried: Vec = Vec::new(); + let try_path = |candidate: PathBuf, tried: &mut Vec| -> Option { + // Always record what we tried so the `not_found` diagnostic + // can enumerate every candidate, even ones whose + // `canonicalize` returns ENOENT. Models otherwise saw the + // useless "(no valid candidates derived from ref)" line. + tried.push(candidate.clone()); + + // Reject symlinks at the leaf BEFORE canonicalizing so an + // attacker who can write under `/artifacts/` cannot + // plant a symlink to `/etc/passwd` and read it back through + // `retrieve_tool_result`. canonicalize() would happily + // follow such a link and then pass the `starts_with(root)` + // check because of the resolved-then-compare order. The + // legacy `~/.deepseek/tool_outputs/` dir is engine-only and + // never carried this concern; session artifact dirs hold + // arbitrary tool output and need the guard. + if let Ok(meta) = std::fs::symlink_metadata(&candidate) + && meta.file_type().is_symlink() + { + return None; + } + + let canonical = candidate.canonicalize().ok()?; + if !canonical.is_file() { + return None; + } + let inside_legacy = root_canonical + .as_ref() + .is_some_and(|root| canonical.starts_with(root)); + let inside_session = session_artifacts_root_canonical + .as_ref() + .is_some_and(|root| canonical.starts_with(root)); + if inside_legacy || inside_session { + Some(canonical) + } else { + None + } }; - let canonical = candidate.canonicalize().map_err(|err| { - ToolError::execution_failed(format!( - "spilled tool result `{reference}` was not found at {}: {err}", - candidate.display() - )) - })?; - - if !canonical.starts_with(&root_canonical) { - return Err(ToolError::invalid_input(format!( - "ref `{reference}` does not point inside {}", - root_canonical.display() - ))); - } - if !canonical.is_file() { - return Err(ToolError::invalid_input(format!( - "ref `{reference}` does not point to a spillover file" - ))); + // Form 1/3: absolute path. Validate it lives under one of the allowed roots. + let raw_path = PathBuf::from(stripped); + if raw_path.is_absolute() { + if let Some(found) = try_path(raw_path.clone(), &mut tried) { + return Ok(found); + } + return Err(not_found( + reference, + &tried, + &root, + session_artifacts_root.as_deref(), + )); } - Ok(canonical) + // Form 4: `sha:` prefix or bare 64-hex SHA → SHA-addressed file. + let sha_candidate = stripped + .strip_prefix("sha:") + .or_else(|| stripped.strip_prefix("sha_")) + .unwrap_or(stripped) + .trim(); + if crate::tools::truncate::is_valid_sha256(&sha_candidate.to_ascii_lowercase()) + && let Some(p) = crate::tools::truncate::sha_spillover_path(sha_candidate) + && let Some(found) = try_path(p, &mut tried) + { + return Ok(found); + } + + // Form 5: relative path with separator or `.txt` suffix. + let looks_like_path = stripped.ends_with(".txt") + || stripped.contains('/') + || (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR)); + if looks_like_path { + // Try legacy spillover root. + if let Some(found) = try_path(root.join(stripped), &mut tried) { + return Ok(found); + } + // Session artifact roots point directly at `/artifacts/`. + // Strip an optional leading `artifacts/` segment from transcript + // paths before joining. + if let Some(sa_root) = session_artifacts_root.as_ref() { + let rel = stripped.strip_prefix("artifacts/").unwrap_or(stripped); + if let Some(found) = try_path(sa_root.join(rel), &mut tried) { + return Ok(found); + } + } + return Err(not_found( + reference, + &tried, + &root, + session_artifacts_root.as_deref(), + )); + } + + // Form 1: bare id → legacy `tool_outputs/.txt`. + if let Some(p) = crate::tools::truncate::spillover_path(stripped) + && let Some(found) = try_path(p, &mut tried) + { + return Ok(found); + } + // Form 2: `art_` → strip prefix and try both: + // a) session artifacts dir at `artifacts/art_.txt` + // b) legacy spillover at `.txt` + if let Some(stripped_art) = stripped.strip_prefix("art_") { + if let Some(sa_root) = session_artifacts_root.as_ref() { + let session_file = sa_root.join(format!("art_{stripped_art}.txt")); + if let Some(found) = try_path(session_file, &mut tried) { + return Ok(found); + } + } + if let Some(p) = crate::tools::truncate::spillover_path(stripped_art) + && let Some(found) = try_path(p, &mut tried) + { + return Ok(found); + } + } + // Form 2b: maybe the model passed the bare id but the artifact lives + // under the session artifacts dir. Try `artifacts/art_.txt`. + if let Some(sa_root) = session_artifacts_root.as_ref() { + let session_file = sa_root.join(format!("art_{stripped}.txt")); + if let Some(found) = try_path(session_file, &mut tried) { + return Ok(found); + } + } + + Err(not_found( + reference, + &tried, + &root, + session_artifacts_root.as_deref(), + )) +} + +/// Format a "ref didn't resolve" error with enough detail for the +/// caller to choose a valid reference form on the next attempt. +fn not_found( + reference: &str, + tried: &[PathBuf], + legacy_root: &std::path::Path, + session_artifacts_root: Option<&std::path::Path>, +) -> ToolError { + let tried_list = if tried.is_empty() { + "(no valid candidates derived from ref)".to_string() + } else { + tried + .iter() + .map(|p| format!(" - {}", p.display())) + .collect::>() + .join("\n") + }; + let session_hint = session_artifacts_root + .map(|p| format!("\nsession artifacts root: {}", p.display())) + .unwrap_or_default(); + ToolError::execution_failed(format!( + "spilled tool result `{reference}` not found. Tried:\n{tried_list}\n\ + spillover root: {legacy}{session}\n\ + Accepted ref forms: \ + (a) `` for legacy spillover, \ + (b) `art_` for session artifacts, \ + (c) `sha:<64-hex>` or bare 64-hex from a block, \ + (d) `artifacts/art_.txt` or `.txt` relative paths. \ + If the source was a `` block, copy the \ + sha value and pass it as `ref=sha:`. \ + If the source was an [artifact ...] block, pass the `id:` field \ + (the `art_` form) directly.", + legacy = legacy_root.display(), + session = session_hint, + )) } fn build_summary_payload( @@ -619,9 +788,170 @@ mod tests { let err = execute_tool(json!({"ref": outside.display().to_string()})).unwrap_err(); + // The new resolver classifies anything that fails to live under + // an approved root as "not found" so we don't accidentally + // leak whether an outside path exists on disk. + let msg = err.to_string(); assert!( - err.to_string().contains("does not point inside"), - "unexpected error: {err}" + msg.contains("not found"), + "expected `not found` diagnostic, got: {msg}" ); } + + #[test] + fn resolves_sha_reference_from_wire_dedup() { + // A SHA-keyed lookup — emulates what happens when the model + // sees a `` block and passes the + // SHA to retrieve_tool_result. + let _lock = test_lock(); + let tmp = tempdir().unwrap(); + let _guard = set_spillover_root(tmp.path().join("tool_outputs")); + let body = "checking crate ... error[E0425]: cannot find value\n".repeat(80); + let sha = { + use sha2::{Digest, Sha256}; + let mut hasher = Sha256::new(); + hasher.update(body.as_bytes()); + format!("{:x}", hasher.finalize()) + }; + crate::tools::truncate::write_sha_spillover(&sha, &body).unwrap(); + + // Form: `sha:` + let result = execute_tool(json!({"ref": format!("sha:{sha}")})).unwrap(); + assert!(result.success, "sha: form should resolve"); + + // Form: bare 64-hex + let result = execute_tool(json!({"ref": &sha})).unwrap(); + assert!(result.success, "bare 64-hex form should resolve"); + } + + #[test] + fn resolves_art_prefix_to_legacy_spillover_id() { + // The model commonly sees `id: art_call_xyz` in artifact + // ref blocks. retrieve_tool_result should strip the `art_` + // prefix and find the legacy `.txt` file if no + // session-artifact equivalent exists. + let _lock = test_lock(); + let tmp = tempdir().unwrap(); + let _guard = set_spillover_root(tmp.path().join("tool_outputs")); + crate::tools::truncate::write_spillover("call_xyz", "line1\nline2\nline3").unwrap(); + + let result = execute_tool(json!({"ref": "art_call_xyz"})).unwrap(); + assert!(result.success, "art_ prefix should resolve to legacy id"); + } + + #[test] + fn not_found_error_lists_tried_candidates_and_accepted_forms() { + let _lock = test_lock(); + let tmp = tempdir().unwrap(); + let _guard = set_spillover_root(tmp.path().join("tool_outputs")); + fs::create_dir_all(tmp.path().join("tool_outputs")).unwrap(); + + let err = execute_tool(json!({"ref": "definitely_missing_id"})).unwrap_err(); + let msg = err.to_string(); + assert!(msg.contains("not found"), "got: {msg}"); + assert!( + msg.contains("sha:"), + "diagnostic should mention sha form: {msg}" + ); + assert!( + msg.contains("art_"), + "diagnostic should mention art form: {msg}" + ); + assert!( + msg.contains("tool_outputs"), + "tried list should include the legacy spillover candidate: {msg}" + ); + assert!( + !msg.contains("(no valid candidates derived from ref)"), + "tried list should not be empty: {msg}" + ); + } + + #[test] + fn resolves_art_prefix_via_session_artifacts() { + let _lock = test_lock(); + let tmp = tempdir().unwrap(); + let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs")); + let _art_guard = { + let prior = crate::artifacts::set_test_artifact_sessions_root(Some( + tmp.path().join("sessions"), + )); + scopeguard_for_test(prior) + }; + let session_id = "session-abc"; + let body = "this is the canonical session artifact body, not a legacy file"; + crate::artifacts::write_session_artifact(session_id, "art_call_real", body).unwrap(); + + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + let workspace_tmp = tempdir().unwrap(); + let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id); + let result = runtime + .block_on(RetrieveToolResultTool.execute(json!({"ref": "art_call_real"}), &ctx)) + .expect("art_ should resolve via session artifacts"); + assert!(result.success); + let payload: Value = serde_json::from_str(&result.content).unwrap(); + assert!( + payload + .to_string() + .contains("canonical session artifact body"), + "summary should pull from session artifact, got: {}", + payload + ); + } + + #[cfg(unix)] + #[test] + fn rejects_symlink_inside_session_artifacts() { + let _lock = test_lock(); + let tmp = tempdir().unwrap(); + let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs")); + let _art_guard = { + let prior = crate::artifacts::set_test_artifact_sessions_root(Some( + tmp.path().join("sessions"), + )); + scopeguard_for_test(prior) + }; + let session_id = "session-xyz"; + // Plant a sensitive file outside the artifact dir. + let secret = tmp.path().join("secret.txt"); + fs::write(&secret, "do not leak").unwrap(); + // Create the artifact dir, then drop a symlink inside it + // pointing at the secret. + let art_dir = tmp + .path() + .join("sessions") + .join(session_id) + .join("artifacts"); + fs::create_dir_all(&art_dir).unwrap(); + std::os::unix::fs::symlink(&secret, art_dir.join("art_evil.txt")).unwrap(); + + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + let workspace_tmp = tempdir().unwrap(); + let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id); + let result = + runtime.block_on(RetrieveToolResultTool.execute(json!({"ref": "art_evil"}), &ctx)); + let err = result.expect_err("symlink artifact must not resolve"); + assert!( + err.to_string().contains("not found"), + "expected `not found`, got: {err}" + ); + } + + struct ArtifactRootGuard { + prior: Option, + } + impl Drop for ArtifactRootGuard { + fn drop(&mut self) { + crate::artifacts::set_test_artifact_sessions_root(self.prior.take()); + } + } + fn scopeguard_for_test(prior: Option) -> ArtifactRootGuard { + ArtifactRootGuard { prior } + } } diff --git a/crates/tui/src/tools/truncate.rs b/crates/tui/src/tools/truncate.rs index dd73d2aa..e0cadcae 100644 --- a/crates/tui/src/tools/truncate.rs +++ b/crates/tui/src/tools/truncate.rs @@ -103,6 +103,53 @@ pub fn spillover_path(id: &str) -> Option { Some(spillover_root()?.join(format!("{sanitised}.txt"))) } +/// Resolve the spillover-file path for a SHA256 content hash. Separate +/// namespace (`sha_.txt`) from the tool-call-id files so the two +/// reference systems (engine-side spillover + wire-side dedup) can +/// co-exist in one directory without collisions. `sha` must be the +/// raw 64-char lowercase hex digest — case-insensitive matching is +/// done by the caller. +#[must_use] +pub fn sha_spillover_path(sha: &str) -> Option { + let sha = sha.trim().to_ascii_lowercase(); + if !is_valid_sha256(&sha) { + return None; + } + Some(spillover_root()?.join(format!("sha_{sha}.txt"))) +} + +/// True when `s` is a 64-character lowercase ASCII hex string. Used +/// to detect bare SHA refs the model might pass to retrieval and to +/// validate input to [`sha_spillover_path`]. +#[must_use] +pub fn is_valid_sha256(s: &str) -> bool { + s.len() == 64 + && s.chars() + .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()) +} + +/// Write content to the SHA-addressed spillover file. Idempotent — +/// the same hash always maps to the same path, and the file's bytes +/// are a function of the hash. Skips the write if the file already +/// exists (which is the common case for the wire dedup, since the +/// second sighting writes the same content that the first did). +pub fn write_sha_spillover(sha: &str, content: &str) -> io::Result { + let path = sha_spillover_path(sha).ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "sha must be a 64-char lowercase hex digest", + ) + })?; + if path.exists() { + return Ok(path); + } + if let Some(parent) = path.parent() { + fs::create_dir_all(parent)?; + } + crate::utils::write_atomic(&path, content.as_bytes())?; + Ok(path) +} + /// Write `content` to the spillover file for `id`. Creates the /// parent directory if needed. Returns the resolved path on success. ///