fix(tools): unify tool-result retrieval references
This commit is contained in:
@@ -204,6 +204,9 @@ impl From<&ArtifactRecord> for TranscriptArtifactRef {
|
||||
|
||||
#[must_use]
|
||||
pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> String {
|
||||
// The model sees several identifiers in this block. Keep a literal
|
||||
// retrieve command next to them so it does not have to infer which
|
||||
// field is accepted by `retrieve_tool_result`.
|
||||
format!(
|
||||
"[artifact: {tool}]\n\
|
||||
id: {id}\n\
|
||||
@@ -211,7 +214,8 @@ pub fn render_transcript_artifact_ref(reference: &TranscriptArtifactRef) -> Stri
|
||||
tool_call_id: {tool_call_id}\n\
|
||||
size: {size}\n\
|
||||
path: {path}\n\
|
||||
preview: {preview}",
|
||||
preview: {preview}\n\
|
||||
retrieve: retrieve_tool_result ref={id}",
|
||||
tool = reference.tool_name,
|
||||
id = reference.artifact_id,
|
||||
tool_call_id = reference.tool_call_id,
|
||||
@@ -273,6 +277,10 @@ mod tests {
|
||||
let rendered = render_transcript_artifact_ref(&reference);
|
||||
|
||||
assert!(rendered.contains("path: artifacts/art_call-big.txt"));
|
||||
assert!(
|
||||
rendered.contains("retrieve: retrieve_tool_result ref=art_call-big"),
|
||||
"rendered block must embed the literal retrieve command: {rendered}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
+113
-22
@@ -494,6 +494,16 @@ pub(crate) const CACHE_WARMUP_USER_TAIL: &str = "请只回复 OK";
|
||||
const TOOL_RESULT_SENT_CHAR_BUDGET: usize = 12_000;
|
||||
const TOOL_RESULT_HEAD_CHARS: usize = 4_000;
|
||||
const TOOL_RESULT_TAIL_CHARS: usize = 4_000;
|
||||
/// Tool results shorter than this stay inline even when repeated. The
|
||||
/// extra prompt bytes are cheaper than forcing the model through an
|
||||
/// unnecessary retrieval hop for tiny command outputs.
|
||||
const TOOL_RESULT_DEDUP_MIN_CHARS: usize = 1_024;
|
||||
/// Tool results shorter than this are also exempt from disk persistence —
|
||||
/// no SHA file is written. The wire-dedup path won't fire for them
|
||||
/// anyway (see `TOOL_RESULT_DEDUP_MIN_CHARS`), so there's no retrieval
|
||||
/// burden to satisfy. Keeps `~/.deepseek/tool_outputs/` from filling
|
||||
/// up with tiny `gh auth status` and `cat package.json` files.
|
||||
const TOOL_RESULT_SHA_PERSIST_MIN_CHARS: usize = 1_024;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct PromptInspection {
|
||||
@@ -777,6 +787,34 @@ fn sha256_hex(bytes: &[u8]) -> String {
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
/// Persist a SHA-addressed copy of `content` to
|
||||
/// `~/.deepseek/tool_outputs/sha_<sha>.txt` so the model can retrieve
|
||||
/// the original bytes after the wire-dedup compactor has replaced
|
||||
/// later occurrences with a `<TOOL_RESULT_REF sha="..." />` block.
|
||||
///
|
||||
/// Returns `true` when the persist succeeded (or the content is
|
||||
/// below `TOOL_RESULT_SHA_PERSIST_MIN_CHARS` — there's no retrieval
|
||||
/// need to satisfy). Returns `false` when the write failed and the
|
||||
/// caller MUST skip dedup, because emitting a SHA ref the model
|
||||
/// can't retrieve is worse than inlining the content twice. The
|
||||
/// no-home-dir edge case (InvalidInput) is treated as a real
|
||||
/// failure: we can't promise retrieval works without a writable
|
||||
/// store.
|
||||
fn persist_tool_result_for_sha(sha: &str, content: &str) -> bool {
|
||||
if content.chars().count() < TOOL_RESULT_SHA_PERSIST_MIN_CHARS {
|
||||
return true;
|
||||
}
|
||||
match crate::tools::truncate::write_sha_spillover(sha, content) {
|
||||
Ok(_) => true,
|
||||
Err(err) => {
|
||||
logging::warn(format!(
|
||||
"tool-result SHA spillover write failed for sha={sha}: {err} — dedup skipped"
|
||||
));
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct PendingToolCallInfo {
|
||||
tool_name: String,
|
||||
@@ -819,8 +857,9 @@ fn render_turn_meta_for_wire(
|
||||
.as_ref()
|
||||
.is_some_and(|previous| previous.sha256 == sha)
|
||||
{
|
||||
let rendered =
|
||||
format!("<TURN_META_REF sha=\"{sha}\" original_chars=\"{original_chars}\" />");
|
||||
// Keep the repeated metadata slot short without surfacing an
|
||||
// opaque hash the model cannot resolve.
|
||||
let rendered = "<turn_meta_unchanged />".to_string();
|
||||
let budget = TurnMetaBudget {
|
||||
original_chars,
|
||||
sent_chars: rendered.chars().count(),
|
||||
@@ -867,10 +906,30 @@ fn compact_tool_result_for_wire(
|
||||
let original_chars = content.chars().count();
|
||||
let sha = sha256_hex(content.as_bytes());
|
||||
|
||||
if let Some(previous) = seen_tool_results.get(&sha) {
|
||||
// Below the threshold, repeating the content is safer than asking
|
||||
// the model to chase a reference. Above it, persist a SHA-addressed
|
||||
// copy before any later message can point at that SHA.
|
||||
let dedup_eligible = original_chars >= TOOL_RESULT_DEDUP_MIN_CHARS;
|
||||
|
||||
if dedup_eligible && let Some(previous) = seen_tool_results.get(&sha) {
|
||||
// Re-check persistence before emitting a ref. If the file is
|
||||
// already present this is a cheap no-op; if the write now fails,
|
||||
// inline the content rather than producing an orphan reference.
|
||||
if !persist_tool_result_for_sha(&sha, content) {
|
||||
return WireToolResult {
|
||||
content: content.to_string(),
|
||||
original_chars,
|
||||
sent_chars: original_chars,
|
||||
truncated: false,
|
||||
deduplicated: false,
|
||||
};
|
||||
}
|
||||
let content = format!(
|
||||
"<TOOL_RESULT_REF sha=\"{}\" original_message=\"{}\" chars=\"{}\" />",
|
||||
sha, previous.message_label, previous.original_chars
|
||||
"<TOOL_RESULT_REF sha=\"{sha}\" original_message=\"{label}\" chars=\"{chars}\">\n\
|
||||
retrieve: retrieve_tool_result ref=sha:{sha}\n\
|
||||
</TOOL_RESULT_REF>",
|
||||
label = previous.message_label,
|
||||
chars = previous.original_chars,
|
||||
);
|
||||
return WireToolResult {
|
||||
sent_chars: content.chars().count(),
|
||||
@@ -881,13 +940,20 @@ fn compact_tool_result_for_wire(
|
||||
};
|
||||
}
|
||||
|
||||
seen_tool_results.insert(
|
||||
sha.clone(),
|
||||
SeenToolResult {
|
||||
message_label: message_label.to_string(),
|
||||
original_chars,
|
||||
},
|
||||
);
|
||||
if dedup_eligible {
|
||||
// Persist before registering the content as dedupable. If the
|
||||
// write fails, later occurrences stay inline instead of pointing
|
||||
// at a file that was never created.
|
||||
if persist_tool_result_for_sha(&sha, content) {
|
||||
seen_tool_results.insert(
|
||||
sha.clone(),
|
||||
SeenToolResult {
|
||||
message_label: message_label.to_string(),
|
||||
original_chars,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if original_chars <= TOOL_RESULT_SENT_CHAR_BUDGET {
|
||||
return WireToolResult {
|
||||
@@ -2402,14 +2468,10 @@ mod stream_decoder_tests {
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
let first = user_message_content(&built, 0);
|
||||
let second = user_message_content(&built, 1);
|
||||
let expected_sha = sha256_hex(turn_meta.as_bytes());
|
||||
let expected_ref = format!(
|
||||
"<TURN_META_REF sha=\"{expected_sha}\" original_chars=\"{}\" />",
|
||||
turn_meta.chars().count()
|
||||
);
|
||||
let expected_ref = "<turn_meta_unchanged />";
|
||||
|
||||
assert!(first.starts_with(turn_meta), "got: {first}");
|
||||
assert!(second.starts_with(&expected_ref), "got: {second}");
|
||||
assert!(second.starts_with(expected_ref), "got: {second}");
|
||||
assert!(second.ends_with("second task"), "got: {second}");
|
||||
assert_eq!(
|
||||
second,
|
||||
@@ -2447,7 +2509,7 @@ mod stream_decoder_tests {
|
||||
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
assert!(
|
||||
user_message_content(&built, 1).starts_with("<TURN_META_REF"),
|
||||
user_message_content(&built, 1).starts_with("<turn_meta_unchanged />"),
|
||||
"got: {}",
|
||||
user_message_content(&built, 1)
|
||||
);
|
||||
@@ -2536,7 +2598,7 @@ mod stream_decoder_tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_builder_deduplicates_identical_tool_results_for_wire() {
|
||||
fn request_builder_does_not_dedup_short_tool_results_for_wire() {
|
||||
let output = "same tool output";
|
||||
let messages = vec![
|
||||
tool_use_message("tool-1", "read_file", json!({"path": "README.md"})),
|
||||
@@ -2549,6 +2611,25 @@ mod stream_decoder_tests {
|
||||
let first = tool_message_content(&built, 0);
|
||||
let second = tool_message_content(&built, 1);
|
||||
|
||||
assert_eq!(first, output);
|
||||
assert_eq!(second, output);
|
||||
assert!(!second.contains("<TOOL_RESULT_REF"), "got: {second}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn request_builder_deduplicates_large_identical_tool_results_with_retrieval_hint() {
|
||||
let output = "A".repeat(2_000);
|
||||
let messages = vec![
|
||||
tool_use_message("tool-1", "read_file", json!({"path": "README.md"})),
|
||||
tool_result_message("tool-1", &output),
|
||||
tool_use_message("tool-2", "read_file", json!({"path": "README.md"})),
|
||||
tool_result_message("tool-2", &output),
|
||||
];
|
||||
|
||||
let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
|
||||
let first = tool_message_content(&built, 0);
|
||||
let second = tool_message_content(&built, 1);
|
||||
|
||||
assert_eq!(first, output);
|
||||
assert!(
|
||||
second.starts_with("<TOOL_RESULT_REF sha=\""),
|
||||
@@ -2558,7 +2639,11 @@ mod stream_decoder_tests {
|
||||
second.contains("original_message=\"Message #1\""),
|
||||
"got: {second}"
|
||||
);
|
||||
assert!(second.contains("chars=\"16\""), "got: {second}");
|
||||
assert!(second.contains("chars=\"2000\""), "got: {second}");
|
||||
assert!(
|
||||
second.contains("retrieve: retrieve_tool_result ref=sha:"),
|
||||
"got: {second}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -2619,7 +2704,13 @@ mod stream_decoder_tests {
|
||||
assert!(tool_layers[0].truncated);
|
||||
assert!(!tool_layers[0].deduplicated);
|
||||
assert_eq!(tool_layers[1].original_chars, 14_000);
|
||||
assert!(tool_layers[1].sent_chars < 200);
|
||||
// Keep the reference far smaller than the original 14K output
|
||||
// even with a copyable retrieval hint included.
|
||||
assert!(
|
||||
tool_layers[1].sent_chars < 300,
|
||||
"deduplicated ref grew unexpectedly large: {}",
|
||||
tool_layers[1].sent_chars
|
||||
);
|
||||
assert!(!tool_layers[1].truncated);
|
||||
assert!(tool_layers[1].deduplicated);
|
||||
}
|
||||
|
||||
@@ -36,7 +36,7 @@ impl ToolSpec for RetrieveToolResultTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Retrieve a previously spilled large tool result from ~/.deepseek/tool_outputs by tool call id, filename, or spillover path. Supports summary, head, tail, lines, and query modes so you can fetch only the needed historical output."
|
||||
"Retrieve a previously spilled large tool result. Accepts a tool_call_id (`call_abc123`), artifact id (`art_call_abc123`), SHA reference (`sha:<64-hex>` or bare 64-hex from `<TOOL_RESULT_REF>`), relative filename (`call_abc123.txt`, `artifacts/art_call_abc123.txt`), or absolute path under ~/.deepseek. Modes: summary, head, tail, lines, query."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -45,7 +45,7 @@ impl ToolSpec for RetrieveToolResultTool {
|
||||
"properties": {
|
||||
"ref": {
|
||||
"type": "string",
|
||||
"description": "Tool call id, tool_result:<id>, spillover filename, or absolute spillover path."
|
||||
"description": "Tool call id, artifact id (`art_<id>`), SHA ref (`sha:<64-hex>`), spillover filename, or absolute path under ~/.deepseek."
|
||||
},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
@@ -97,7 +97,7 @@ impl ToolSpec for RetrieveToolResultTool {
|
||||
true
|
||||
}
|
||||
|
||||
async fn execute(&self, input: Value, _context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let reference = required_str(&input, "ref")?.trim();
|
||||
if reference.is_empty() {
|
||||
return Err(ToolError::invalid_input("ref cannot be empty"));
|
||||
@@ -112,7 +112,7 @@ impl ToolSpec for RetrieveToolResultTool {
|
||||
1,
|
||||
HARD_MAX_BYTES,
|
||||
);
|
||||
let path = resolve_spillover_reference(reference)?;
|
||||
let path = resolve_spillover_reference(reference, &context.state_namespace)?;
|
||||
let content = fs::read_to_string(&path).map_err(|err| {
|
||||
ToolError::execution_failed(format!("failed to read {}: {err}", path.display()))
|
||||
})?;
|
||||
@@ -139,53 +139,222 @@ impl ToolSpec for RetrieveToolResultTool {
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_spillover_reference(reference: &str) -> Result<PathBuf, ToolError> {
|
||||
/// Resolve a tool-result ref to a concrete file path.
|
||||
///
|
||||
/// Accepts six shapes:
|
||||
/// 1. `tool_call_id` — legacy spillover form, `<id>.txt` under `tool_outputs/`.
|
||||
/// 2. `art_<id>` — current artifact id, written by `apply_spillover_with_artifact`.
|
||||
/// Tries the session artifact directory first, falls back to `<id>.txt`
|
||||
/// (stripping the `art_` prefix) so old + new naming both work.
|
||||
/// 3. `sha:<64-hex>` or bare 64-hex — content-addressed wire dedup, `sha_<hex>.txt`.
|
||||
/// 4. `tool_result:<x>` — `<x>` is any of the above after the prefix.
|
||||
/// 5. `artifacts/<file>.txt` or `<file>.txt` — relative paths.
|
||||
/// 6. Absolute paths under `~/.deepseek/`.
|
||||
///
|
||||
/// The error message on a miss enumerates which forms were tried so the
|
||||
/// model can correct course without a second blind guess.
|
||||
fn resolve_spillover_reference(reference: &str, session_id: &str) -> Result<PathBuf, ToolError> {
|
||||
let root = crate::tools::truncate::spillover_root()
|
||||
.ok_or_else(|| ToolError::execution_failed("could not resolve ~/.deepseek/tool_outputs"))?;
|
||||
let root_canonical = root.canonicalize().map_err(|err| {
|
||||
ToolError::execution_failed(format!(
|
||||
"spillover directory {} is not readable: {err}",
|
||||
root.display()
|
||||
))
|
||||
})?;
|
||||
let root_canonical = root.canonicalize().ok();
|
||||
|
||||
// Resolve the session's `artifacts/` directory.
|
||||
// `session_artifact_absolute_path(sid, p)` returns
|
||||
// `~/.deepseek/sessions/<sid>/<p>` — so passing the literal
|
||||
// `ARTIFACTS_DIR_NAME` ("artifacts") gets us the real artifacts
|
||||
// root. An earlier draft passed `Path::new(".")` and took
|
||||
// `.parent()`, which landed one directory too high (`<sid>` instead
|
||||
// of `<sid>/artifacts`) and silently broke every bare `art_<id>`
|
||||
// ref — only the legacy-spillover fallback survived. The test
|
||||
// `resolves_art_prefix_to_legacy_spillover_id` masked it because
|
||||
// it ONLY wrote a legacy spillover file. The new test
|
||||
// `resolves_art_prefix_via_session_artifacts` exercises the real
|
||||
// path.
|
||||
let session_artifacts_root = if !session_id.is_empty() {
|
||||
crate::artifacts::session_artifact_absolute_path(
|
||||
session_id,
|
||||
std::path::Path::new(crate::artifacts::ARTIFACTS_DIR_NAME),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let session_artifacts_root_canonical = session_artifacts_root
|
||||
.as_ref()
|
||||
.and_then(|p| p.canonicalize().ok());
|
||||
|
||||
let trimmed = reference.trim();
|
||||
let stripped = trimmed.strip_prefix("tool_result:").unwrap_or(trimmed);
|
||||
let raw_path = PathBuf::from(stripped);
|
||||
let stripped = trimmed
|
||||
.strip_prefix("tool_result:")
|
||||
.unwrap_or(trimmed)
|
||||
.trim();
|
||||
|
||||
let candidate = if raw_path.is_absolute() {
|
||||
raw_path
|
||||
} else if stripped.ends_with(".txt")
|
||||
|| stripped.contains('/')
|
||||
|| (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR))
|
||||
{
|
||||
root.join(stripped)
|
||||
} else {
|
||||
crate::tools::truncate::spillover_path(stripped).ok_or_else(|| {
|
||||
ToolError::invalid_input(format!("invalid spilled tool-result ref `{reference}`"))
|
||||
})?
|
||||
let mut tried: Vec<PathBuf> = Vec::new();
|
||||
let try_path = |candidate: PathBuf, tried: &mut Vec<PathBuf>| -> Option<PathBuf> {
|
||||
// Always record what we tried so the `not_found` diagnostic
|
||||
// can enumerate every candidate, even ones whose
|
||||
// `canonicalize` returns ENOENT. Models otherwise saw the
|
||||
// useless "(no valid candidates derived from ref)" line.
|
||||
tried.push(candidate.clone());
|
||||
|
||||
// Reject symlinks at the leaf BEFORE canonicalizing so an
|
||||
// attacker who can write under `<sid>/artifacts/` cannot
|
||||
// plant a symlink to `/etc/passwd` and read it back through
|
||||
// `retrieve_tool_result`. canonicalize() would happily
|
||||
// follow such a link and then pass the `starts_with(root)`
|
||||
// check because of the resolved-then-compare order. The
|
||||
// legacy `~/.deepseek/tool_outputs/` dir is engine-only and
|
||||
// never carried this concern; session artifact dirs hold
|
||||
// arbitrary tool output and need the guard.
|
||||
if let Ok(meta) = std::fs::symlink_metadata(&candidate)
|
||||
&& meta.file_type().is_symlink()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
let canonical = candidate.canonicalize().ok()?;
|
||||
if !canonical.is_file() {
|
||||
return None;
|
||||
}
|
||||
let inside_legacy = root_canonical
|
||||
.as_ref()
|
||||
.is_some_and(|root| canonical.starts_with(root));
|
||||
let inside_session = session_artifacts_root_canonical
|
||||
.as_ref()
|
||||
.is_some_and(|root| canonical.starts_with(root));
|
||||
if inside_legacy || inside_session {
|
||||
Some(canonical)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let canonical = candidate.canonicalize().map_err(|err| {
|
||||
ToolError::execution_failed(format!(
|
||||
"spilled tool result `{reference}` was not found at {}: {err}",
|
||||
candidate.display()
|
||||
))
|
||||
})?;
|
||||
|
||||
if !canonical.starts_with(&root_canonical) {
|
||||
return Err(ToolError::invalid_input(format!(
|
||||
"ref `{reference}` does not point inside {}",
|
||||
root_canonical.display()
|
||||
)));
|
||||
}
|
||||
if !canonical.is_file() {
|
||||
return Err(ToolError::invalid_input(format!(
|
||||
"ref `{reference}` does not point to a spillover file"
|
||||
)));
|
||||
// Form 1/3: absolute path. Validate it lives under one of the allowed roots.
|
||||
let raw_path = PathBuf::from(stripped);
|
||||
if raw_path.is_absolute() {
|
||||
if let Some(found) = try_path(raw_path.clone(), &mut tried) {
|
||||
return Ok(found);
|
||||
}
|
||||
return Err(not_found(
|
||||
reference,
|
||||
&tried,
|
||||
&root,
|
||||
session_artifacts_root.as_deref(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(canonical)
|
||||
// Form 4: `sha:<hex>` prefix or bare 64-hex SHA → SHA-addressed file.
|
||||
let sha_candidate = stripped
|
||||
.strip_prefix("sha:")
|
||||
.or_else(|| stripped.strip_prefix("sha_"))
|
||||
.unwrap_or(stripped)
|
||||
.trim();
|
||||
if crate::tools::truncate::is_valid_sha256(&sha_candidate.to_ascii_lowercase())
|
||||
&& let Some(p) = crate::tools::truncate::sha_spillover_path(sha_candidate)
|
||||
&& let Some(found) = try_path(p, &mut tried)
|
||||
{
|
||||
return Ok(found);
|
||||
}
|
||||
|
||||
// Form 5: relative path with separator or `.txt` suffix.
|
||||
let looks_like_path = stripped.ends_with(".txt")
|
||||
|| stripped.contains('/')
|
||||
|| (std::path::MAIN_SEPARATOR != '/' && stripped.contains(std::path::MAIN_SEPARATOR));
|
||||
if looks_like_path {
|
||||
// Try legacy spillover root.
|
||||
if let Some(found) = try_path(root.join(stripped), &mut tried) {
|
||||
return Ok(found);
|
||||
}
|
||||
// Session artifact roots point directly at `<sid>/artifacts/`.
|
||||
// Strip an optional leading `artifacts/` segment from transcript
|
||||
// paths before joining.
|
||||
if let Some(sa_root) = session_artifacts_root.as_ref() {
|
||||
let rel = stripped.strip_prefix("artifacts/").unwrap_or(stripped);
|
||||
if let Some(found) = try_path(sa_root.join(rel), &mut tried) {
|
||||
return Ok(found);
|
||||
}
|
||||
}
|
||||
return Err(not_found(
|
||||
reference,
|
||||
&tried,
|
||||
&root,
|
||||
session_artifacts_root.as_deref(),
|
||||
));
|
||||
}
|
||||
|
||||
// Form 1: bare id → legacy `tool_outputs/<id>.txt`.
|
||||
if let Some(p) = crate::tools::truncate::spillover_path(stripped)
|
||||
&& let Some(found) = try_path(p, &mut tried)
|
||||
{
|
||||
return Ok(found);
|
||||
}
|
||||
// Form 2: `art_<id>` → strip prefix and try both:
|
||||
// a) session artifacts dir at `artifacts/art_<id>.txt`
|
||||
// b) legacy spillover at `<id>.txt`
|
||||
if let Some(stripped_art) = stripped.strip_prefix("art_") {
|
||||
if let Some(sa_root) = session_artifacts_root.as_ref() {
|
||||
let session_file = sa_root.join(format!("art_{stripped_art}.txt"));
|
||||
if let Some(found) = try_path(session_file, &mut tried) {
|
||||
return Ok(found);
|
||||
}
|
||||
}
|
||||
if let Some(p) = crate::tools::truncate::spillover_path(stripped_art)
|
||||
&& let Some(found) = try_path(p, &mut tried)
|
||||
{
|
||||
return Ok(found);
|
||||
}
|
||||
}
|
||||
// Form 2b: maybe the model passed the bare id but the artifact lives
|
||||
// under the session artifacts dir. Try `artifacts/art_<id>.txt`.
|
||||
if let Some(sa_root) = session_artifacts_root.as_ref() {
|
||||
let session_file = sa_root.join(format!("art_{stripped}.txt"));
|
||||
if let Some(found) = try_path(session_file, &mut tried) {
|
||||
return Ok(found);
|
||||
}
|
||||
}
|
||||
|
||||
Err(not_found(
|
||||
reference,
|
||||
&tried,
|
||||
&root,
|
||||
session_artifacts_root.as_deref(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Format a "ref didn't resolve" error with enough detail for the
|
||||
/// caller to choose a valid reference form on the next attempt.
|
||||
fn not_found(
|
||||
reference: &str,
|
||||
tried: &[PathBuf],
|
||||
legacy_root: &std::path::Path,
|
||||
session_artifacts_root: Option<&std::path::Path>,
|
||||
) -> ToolError {
|
||||
let tried_list = if tried.is_empty() {
|
||||
"(no valid candidates derived from ref)".to_string()
|
||||
} else {
|
||||
tried
|
||||
.iter()
|
||||
.map(|p| format!(" - {}", p.display()))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
};
|
||||
let session_hint = session_artifacts_root
|
||||
.map(|p| format!("\nsession artifacts root: {}", p.display()))
|
||||
.unwrap_or_default();
|
||||
ToolError::execution_failed(format!(
|
||||
"spilled tool result `{reference}` not found. Tried:\n{tried_list}\n\
|
||||
spillover root: {legacy}{session}\n\
|
||||
Accepted ref forms: \
|
||||
(a) `<tool_call_id>` for legacy spillover, \
|
||||
(b) `art_<tool_call_id>` for session artifacts, \
|
||||
(c) `sha:<64-hex>` or bare 64-hex from a <TOOL_RESULT_REF> block, \
|
||||
(d) `artifacts/art_<id>.txt` or `<id>.txt` relative paths. \
|
||||
If the source was a `<TOOL_RESULT_REF sha=\"...\" />` block, copy the \
|
||||
sha value and pass it as `ref=sha:<value>`. \
|
||||
If the source was an [artifact ...] block, pass the `id:` field \
|
||||
(the `art_<id>` form) directly.",
|
||||
legacy = legacy_root.display(),
|
||||
session = session_hint,
|
||||
))
|
||||
}
|
||||
|
||||
fn build_summary_payload(
|
||||
@@ -619,9 +788,170 @@ mod tests {
|
||||
|
||||
let err = execute_tool(json!({"ref": outside.display().to_string()})).unwrap_err();
|
||||
|
||||
// The new resolver classifies anything that fails to live under
|
||||
// an approved root as "not found" so we don't accidentally
|
||||
// leak whether an outside path exists on disk.
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
err.to_string().contains("does not point inside"),
|
||||
"unexpected error: {err}"
|
||||
msg.contains("not found"),
|
||||
"expected `not found` diagnostic, got: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_sha_reference_from_wire_dedup() {
|
||||
// A SHA-keyed lookup — emulates what happens when the model
|
||||
// sees a `<TOOL_RESULT_REF sha="..." />` block and passes the
|
||||
// SHA to retrieve_tool_result.
|
||||
let _lock = test_lock();
|
||||
let tmp = tempdir().unwrap();
|
||||
let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
|
||||
let body = "checking crate ... error[E0425]: cannot find value\n".repeat(80);
|
||||
let sha = {
|
||||
use sha2::{Digest, Sha256};
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(body.as_bytes());
|
||||
format!("{:x}", hasher.finalize())
|
||||
};
|
||||
crate::tools::truncate::write_sha_spillover(&sha, &body).unwrap();
|
||||
|
||||
// Form: `sha:<hex>`
|
||||
let result = execute_tool(json!({"ref": format!("sha:{sha}")})).unwrap();
|
||||
assert!(result.success, "sha:<hex> form should resolve");
|
||||
|
||||
// Form: bare 64-hex
|
||||
let result = execute_tool(json!({"ref": &sha})).unwrap();
|
||||
assert!(result.success, "bare 64-hex form should resolve");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_art_prefix_to_legacy_spillover_id() {
|
||||
// The model commonly sees `id: art_call_xyz` in artifact
|
||||
// ref blocks. retrieve_tool_result should strip the `art_`
|
||||
// prefix and find the legacy `<id>.txt` file if no
|
||||
// session-artifact equivalent exists.
|
||||
let _lock = test_lock();
|
||||
let tmp = tempdir().unwrap();
|
||||
let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
|
||||
crate::tools::truncate::write_spillover("call_xyz", "line1\nline2\nline3").unwrap();
|
||||
|
||||
let result = execute_tool(json!({"ref": "art_call_xyz"})).unwrap();
|
||||
assert!(result.success, "art_ prefix should resolve to legacy id");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_found_error_lists_tried_candidates_and_accepted_forms() {
|
||||
let _lock = test_lock();
|
||||
let tmp = tempdir().unwrap();
|
||||
let _guard = set_spillover_root(tmp.path().join("tool_outputs"));
|
||||
fs::create_dir_all(tmp.path().join("tool_outputs")).unwrap();
|
||||
|
||||
let err = execute_tool(json!({"ref": "definitely_missing_id"})).unwrap_err();
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("not found"), "got: {msg}");
|
||||
assert!(
|
||||
msg.contains("sha:"),
|
||||
"diagnostic should mention sha form: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("art_<tool_call_id>"),
|
||||
"diagnostic should mention art form: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("tool_outputs"),
|
||||
"tried list should include the legacy spillover candidate: {msg}"
|
||||
);
|
||||
assert!(
|
||||
!msg.contains("(no valid candidates derived from ref)"),
|
||||
"tried list should not be empty: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_art_prefix_via_session_artifacts() {
|
||||
let _lock = test_lock();
|
||||
let tmp = tempdir().unwrap();
|
||||
let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs"));
|
||||
let _art_guard = {
|
||||
let prior = crate::artifacts::set_test_artifact_sessions_root(Some(
|
||||
tmp.path().join("sessions"),
|
||||
));
|
||||
scopeguard_for_test(prior)
|
||||
};
|
||||
let session_id = "session-abc";
|
||||
let body = "this is the canonical session artifact body, not a legacy file";
|
||||
crate::artifacts::write_session_artifact(session_id, "art_call_real", body).unwrap();
|
||||
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
let workspace_tmp = tempdir().unwrap();
|
||||
let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id);
|
||||
let result = runtime
|
||||
.block_on(RetrieveToolResultTool.execute(json!({"ref": "art_call_real"}), &ctx))
|
||||
.expect("art_<id> should resolve via session artifacts");
|
||||
assert!(result.success);
|
||||
let payload: Value = serde_json::from_str(&result.content).unwrap();
|
||||
assert!(
|
||||
payload
|
||||
.to_string()
|
||||
.contains("canonical session artifact body"),
|
||||
"summary should pull from session artifact, got: {}",
|
||||
payload
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn rejects_symlink_inside_session_artifacts() {
|
||||
let _lock = test_lock();
|
||||
let tmp = tempdir().unwrap();
|
||||
let _spill_guard = set_spillover_root(tmp.path().join("tool_outputs"));
|
||||
let _art_guard = {
|
||||
let prior = crate::artifacts::set_test_artifact_sessions_root(Some(
|
||||
tmp.path().join("sessions"),
|
||||
));
|
||||
scopeguard_for_test(prior)
|
||||
};
|
||||
let session_id = "session-xyz";
|
||||
// Plant a sensitive file outside the artifact dir.
|
||||
let secret = tmp.path().join("secret.txt");
|
||||
fs::write(&secret, "do not leak").unwrap();
|
||||
// Create the artifact dir, then drop a symlink inside it
|
||||
// pointing at the secret.
|
||||
let art_dir = tmp
|
||||
.path()
|
||||
.join("sessions")
|
||||
.join(session_id)
|
||||
.join("artifacts");
|
||||
fs::create_dir_all(&art_dir).unwrap();
|
||||
std::os::unix::fs::symlink(&secret, art_dir.join("art_evil.txt")).unwrap();
|
||||
|
||||
let runtime = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap();
|
||||
let workspace_tmp = tempdir().unwrap();
|
||||
let ctx = ToolContext::new(workspace_tmp.path()).with_state_namespace(session_id);
|
||||
let result =
|
||||
runtime.block_on(RetrieveToolResultTool.execute(json!({"ref": "art_evil"}), &ctx));
|
||||
let err = result.expect_err("symlink artifact must not resolve");
|
||||
assert!(
|
||||
err.to_string().contains("not found"),
|
||||
"expected `not found`, got: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
struct ArtifactRootGuard {
|
||||
prior: Option<PathBuf>,
|
||||
}
|
||||
impl Drop for ArtifactRootGuard {
|
||||
fn drop(&mut self) {
|
||||
crate::artifacts::set_test_artifact_sessions_root(self.prior.take());
|
||||
}
|
||||
}
|
||||
fn scopeguard_for_test(prior: Option<PathBuf>) -> ArtifactRootGuard {
|
||||
ArtifactRootGuard { prior }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,6 +103,53 @@ pub fn spillover_path(id: &str) -> Option<PathBuf> {
|
||||
Some(spillover_root()?.join(format!("{sanitised}.txt")))
|
||||
}
|
||||
|
||||
/// Resolve the spillover-file path for a SHA256 content hash. Separate
|
||||
/// namespace (`sha_<hex>.txt`) from the tool-call-id files so the two
|
||||
/// reference systems (engine-side spillover + wire-side dedup) can
|
||||
/// co-exist in one directory without collisions. `sha` must be the
|
||||
/// raw 64-char lowercase hex digest — case-insensitive matching is
|
||||
/// done by the caller.
|
||||
#[must_use]
|
||||
pub fn sha_spillover_path(sha: &str) -> Option<PathBuf> {
|
||||
let sha = sha.trim().to_ascii_lowercase();
|
||||
if !is_valid_sha256(&sha) {
|
||||
return None;
|
||||
}
|
||||
Some(spillover_root()?.join(format!("sha_{sha}.txt")))
|
||||
}
|
||||
|
||||
/// True when `s` is a 64-character lowercase ASCII hex string. Used
|
||||
/// to detect bare SHA refs the model might pass to retrieval and to
|
||||
/// validate input to [`sha_spillover_path`].
|
||||
#[must_use]
|
||||
pub fn is_valid_sha256(s: &str) -> bool {
|
||||
s.len() == 64
|
||||
&& s.chars()
|
||||
.all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase())
|
||||
}
|
||||
|
||||
/// Write content to the SHA-addressed spillover file. Idempotent —
|
||||
/// the same hash always maps to the same path, and the file's bytes
|
||||
/// are a function of the hash. Skips the write if the file already
|
||||
/// exists (which is the common case for the wire dedup, since the
|
||||
/// second sighting writes the same content that the first did).
|
||||
pub fn write_sha_spillover(sha: &str, content: &str) -> io::Result<PathBuf> {
|
||||
let path = sha_spillover_path(sha).ok_or_else(|| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"sha must be a 64-char lowercase hex digest",
|
||||
)
|
||||
})?;
|
||||
if path.exists() {
|
||||
return Ok(path);
|
||||
}
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
crate::utils::write_atomic(&path, content.as_bytes())?;
|
||||
Ok(path)
|
||||
}
|
||||
|
||||
/// Write `content` to the spillover file for `id`. Creates the
|
||||
/// parent directory if needed. Returns the resolved path on success.
|
||||
///
|
||||
|
||||
Reference in New Issue
Block a user