fix(tools): hydrate deferred schemas before execution
Return a schema hydration result on first deferred tool use so the model can retry with visible parameters instead of executing guessed arguments. Add edit_file coverage for old_string/new_string aliases. (cherry picked from commit 91be171cc15dd895170bd1a486445f5e05356b57)
This commit is contained in:
@@ -2049,10 +2049,12 @@ use self::tool_catalog::{
|
||||
CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
|
||||
active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
|
||||
execute_code_execution_tool, execute_tool_search, initial_active_tools, is_tool_search_tool,
|
||||
maybe_activate_requested_deferred_tool, missing_tool_error_message,
|
||||
maybe_hydrate_requested_deferred_tool, missing_tool_error_message,
|
||||
};
|
||||
#[cfg(test)]
|
||||
use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool};
|
||||
use self::tool_catalog::{
|
||||
TOOL_SEARCH_BM25_NAME, maybe_activate_requested_deferred_tool, should_default_defer_tool,
|
||||
};
|
||||
use self::tool_execution::emit_tool_audit;
|
||||
use self::tool_setup::sandbox_policy_for_mode;
|
||||
|
||||
|
||||
@@ -366,6 +366,79 @@ fn model_tool_catalog_applies_native_and_mcp_deferral() {
|
||||
assert_eq!(defer_loading("mcp_server_write"), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
|
||||
let mut edit = api_tool("edit_file");
|
||||
edit.defer_loading = Some(true);
|
||||
edit.input_schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"search": { "type": "string" },
|
||||
"replace": { "type": "string" }
|
||||
},
|
||||
"required": ["path", "search", "replace"]
|
||||
});
|
||||
|
||||
let catalog = vec![edit];
|
||||
let active_at_batch_start = HashSet::new();
|
||||
let mut hydrated_this_batch = HashSet::new();
|
||||
let result = maybe_hydrate_requested_deferred_tool(
|
||||
"edit_file",
|
||||
&json!({
|
||||
"path": "src/foo.rs",
|
||||
"old_string": "before",
|
||||
"new_string": "after"
|
||||
}),
|
||||
&catalog,
|
||||
&active_at_batch_start,
|
||||
&mut hydrated_this_batch,
|
||||
)
|
||||
.expect("first deferred use should hydrate");
|
||||
|
||||
assert!(!active_at_batch_start.contains("edit_file"));
|
||||
assert!(hydrated_this_batch.contains("edit_file"));
|
||||
assert!(result.success);
|
||||
assert!(result.content.contains("Tool `edit_file` was deferred"));
|
||||
assert!(result.content.contains("path: string"));
|
||||
assert!(result.content.contains("search: string"));
|
||||
assert!(result.content.contains("replace: string"));
|
||||
assert!(result.content.contains("old_string -> search"));
|
||||
assert!(result.content.contains("new_string -> replace"));
|
||||
assert!(result.content.contains("The tool was not executed"));
|
||||
|
||||
let metadata = result.metadata.expect("metadata");
|
||||
assert_eq!(metadata["event"], "tool.schema_hydrated");
|
||||
assert_eq!(metadata["executed"], false);
|
||||
assert_eq!(metadata["retry_required"], true);
|
||||
|
||||
let second_result = maybe_hydrate_requested_deferred_tool(
|
||||
"edit_file",
|
||||
&json!({"path": "src/bar.rs", "old_string": "before", "new_string": "after"}),
|
||||
&catalog,
|
||||
&active_at_batch_start,
|
||||
&mut hydrated_this_batch,
|
||||
)
|
||||
.expect("later calls in the same batch should hydrate instead of executing");
|
||||
assert_eq!(second_result.metadata.unwrap()["executed"], false);
|
||||
assert_eq!(hydrated_this_batch.len(), 1);
|
||||
|
||||
let mut active_next_batch = active_at_batch_start.clone();
|
||||
active_next_batch.extend(hydrated_this_batch);
|
||||
let mut hydrated_next_batch = HashSet::new();
|
||||
assert!(
|
||||
maybe_hydrate_requested_deferred_tool(
|
||||
"edit_file",
|
||||
&json!({"path": "src/foo.rs", "search": "before", "replace": "after"}),
|
||||
&catalog,
|
||||
&active_next_batch,
|
||||
&mut hydrated_next_batch,
|
||||
)
|
||||
.is_none(),
|
||||
"tools hydrated in a previous batch should execute normally"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() {
|
||||
let catalog = build_model_tool_catalog(
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
use serde_json::json;
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use crate::models::Tool;
|
||||
use crate::tools::spec::{ToolError, ToolResult, required_str};
|
||||
@@ -390,6 +390,7 @@ pub(super) fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> S
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn maybe_activate_requested_deferred_tool(
|
||||
tool_name: &str,
|
||||
catalog: &[Tool],
|
||||
@@ -406,6 +407,144 @@ pub(super) fn maybe_activate_requested_deferred_tool(
|
||||
active_tools.insert(tool_name.to_string())
|
||||
}
|
||||
|
||||
pub(super) fn maybe_hydrate_requested_deferred_tool(
|
||||
tool_name: &str,
|
||||
tool_input: &Value,
|
||||
catalog: &[Tool],
|
||||
active_tools_at_batch_start: &HashSet<String>,
|
||||
hydrated_tools_this_batch: &mut HashSet<String>,
|
||||
) -> Option<ToolResult> {
|
||||
let def = catalog.iter().find(|def| def.name == tool_name)?;
|
||||
|
||||
if !def.defer_loading.unwrap_or(false) || active_tools_at_batch_start.contains(tool_name) {
|
||||
return None;
|
||||
}
|
||||
|
||||
hydrated_tools_this_batch.insert(tool_name.to_string());
|
||||
Some(deferred_tool_schema_hydration_result(def, tool_input))
|
||||
}
|
||||
|
||||
fn deferred_tool_schema_hydration_result(tool: &Tool, tool_input: &Value) -> ToolResult {
|
||||
let expected = schema_field_lines(tool);
|
||||
let received = received_field_names(tool_input);
|
||||
let corrections = likely_field_corrections(&tool.name, &received);
|
||||
|
||||
let expected_text = if expected.is_empty() {
|
||||
" (no declared fields)".to_string()
|
||||
} else {
|
||||
expected
|
||||
.iter()
|
||||
.map(|field| format!(" {field}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
};
|
||||
let received_text = if received.is_empty() {
|
||||
" (none)".to_string()
|
||||
} else {
|
||||
format!(" {}", received.join(", "))
|
||||
};
|
||||
let correction_text = if corrections.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(
|
||||
"\n\nLikely correction:\n{}",
|
||||
corrections
|
||||
.iter()
|
||||
.map(|field| format!(" {field}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
)
|
||||
};
|
||||
|
||||
ToolResult::success(format!(
|
||||
"Tool `{}` was deferred and has now been loaded.\n\nExpected schema:\n{}\n\nReceived fields:\n{}{}\n\nThe tool was not executed. Retry the same operation with the loaded schema.",
|
||||
tool.name, expected_text, received_text, correction_text
|
||||
))
|
||||
.with_metadata(json!({
|
||||
"event": "tool.schema_hydrated",
|
||||
"tool": tool.name,
|
||||
"executed": false,
|
||||
"retry_required": true,
|
||||
"reason": "deferred_tool_first_use",
|
||||
}))
|
||||
}
|
||||
|
||||
fn schema_field_lines(tool: &Tool) -> Vec<String> {
|
||||
let mut required = Vec::new();
|
||||
if let Some(items) = tool.input_schema.get("required").and_then(Value::as_array) {
|
||||
for item in items {
|
||||
if let Some(field) = item.as_str() {
|
||||
required.push(field.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let Some(properties) = tool
|
||||
.input_schema
|
||||
.get("properties")
|
||||
.and_then(Value::as_object)
|
||||
else {
|
||||
return required;
|
||||
};
|
||||
|
||||
let mut fields = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
for field in &required {
|
||||
if let Some(schema) = properties.get(field) {
|
||||
fields.push(format!("{field}: {}", schema_type_label(schema)));
|
||||
seen.insert(field.as_str());
|
||||
} else {
|
||||
fields.push(field.clone());
|
||||
}
|
||||
}
|
||||
for (field, schema) in properties {
|
||||
if seen.contains(field.as_str()) {
|
||||
continue;
|
||||
}
|
||||
fields.push(format!("{field}: {} (optional)", schema_type_label(schema)));
|
||||
}
|
||||
fields
|
||||
}
|
||||
|
||||
fn schema_type_label(schema: &Value) -> String {
|
||||
schema
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("value")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn received_field_names(input: &Value) -> Vec<String> {
|
||||
let mut fields = input
|
||||
.as_object()
|
||||
.map(|object| object.keys().cloned().collect::<Vec<_>>())
|
||||
.unwrap_or_default();
|
||||
fields.sort();
|
||||
fields
|
||||
}
|
||||
|
||||
fn likely_field_corrections(tool_name: &str, received: &[String]) -> Vec<String> {
|
||||
if tool_name != "edit_file" {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let has = |name: &str| received.iter().any(|field| field == name);
|
||||
let mut corrections = Vec::new();
|
||||
if has("old_string") {
|
||||
corrections.push("old_string -> search".to_string());
|
||||
} else if has("old_str") {
|
||||
corrections.push("old_str -> search".to_string());
|
||||
}
|
||||
if has("new_string") {
|
||||
corrections.push("new_string -> replace".to_string());
|
||||
} else if has("new_str") {
|
||||
corrections.push("new_str -> replace".to_string());
|
||||
} else if has("replacement") {
|
||||
corrections.push("replacement -> replace".to_string());
|
||||
}
|
||||
corrections
|
||||
}
|
||||
|
||||
pub(super) fn execute_tool_search(
|
||||
tool_name: &str,
|
||||
input: &serde_json::Value,
|
||||
|
||||
@@ -1021,6 +1021,9 @@ impl Engine {
|
||||
None
|
||||
};
|
||||
|
||||
let active_tools_at_batch_start = active_tool_names.clone();
|
||||
let mut deferred_tools_hydrated_this_batch: std::collections::HashSet<String> =
|
||||
std::collections::HashSet::new();
|
||||
let mut plans: Vec<ToolExecutionPlan> = Vec::with_capacity(tool_uses.len());
|
||||
for (index, tool) in tool_uses.iter_mut().enumerate() {
|
||||
let tool_id = tool.id.clone();
|
||||
@@ -1062,18 +1065,7 @@ impl Engine {
|
||||
)));
|
||||
}
|
||||
|
||||
if maybe_activate_requested_deferred_tool(
|
||||
&tool_name,
|
||||
&tool_catalog,
|
||||
&mut active_tool_names,
|
||||
) {
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
"Auto-loaded deferred tool '{tool_name}' after model request."
|
||||
)))
|
||||
.await;
|
||||
}
|
||||
let requested_tool_name = tool_name.clone();
|
||||
let mut tool_def = tool_catalog.iter().find(|def| def.name == tool_name);
|
||||
|
||||
// Resolve hallucinated tool names when the model emits a
|
||||
@@ -1092,21 +1084,6 @@ impl Engine {
|
||||
// Update the tool_uses entry so the result is
|
||||
// attributed to the canonical name.
|
||||
tool.name = tool_name.clone();
|
||||
// Re-run the deferred-activation check with the
|
||||
// canonical name.
|
||||
if maybe_activate_requested_deferred_tool(
|
||||
&tool_name,
|
||||
&tool_catalog,
|
||||
&mut active_tool_names,
|
||||
) {
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
"Auto-loaded deferred tool '{}' after resolving '{}'.",
|
||||
tool_name, tool_name
|
||||
)))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1154,7 +1131,33 @@ impl Engine {
|
||||
read_only = true;
|
||||
}
|
||||
|
||||
let should_emit_hydration_status =
|
||||
!deferred_tools_hydrated_this_batch.contains(&tool_name);
|
||||
if blocked_error.is_none()
|
||||
&& let Some(result) = maybe_hydrate_requested_deferred_tool(
|
||||
&tool_name,
|
||||
&tool_input,
|
||||
&tool_catalog,
|
||||
&active_tools_at_batch_start,
|
||||
&mut deferred_tools_hydrated_this_batch,
|
||||
)
|
||||
{
|
||||
if should_emit_hydration_status {
|
||||
let status = if requested_tool_name == tool_name {
|
||||
format!("Auto-loaded deferred tool '{tool_name}' after model request.")
|
||||
} else {
|
||||
format!(
|
||||
"Auto-loaded deferred tool '{}' after resolving '{}'.",
|
||||
tool_name, requested_tool_name
|
||||
)
|
||||
};
|
||||
let _ = self.tx_event.send(Event::status(status)).await;
|
||||
}
|
||||
guard_result = Some(result);
|
||||
}
|
||||
|
||||
if blocked_error.is_none()
|
||||
&& guard_result.is_none()
|
||||
&& let AttemptDecision::Block(message) =
|
||||
loop_guard.record_attempt(&tool_name, &tool_input)
|
||||
{
|
||||
@@ -1180,6 +1183,7 @@ impl Engine {
|
||||
guard_result,
|
||||
});
|
||||
}
|
||||
active_tool_names.extend(deferred_tools_hydrated_this_batch);
|
||||
|
||||
let parallel_allowed = should_parallelize_tool_batch(&plans);
|
||||
if parallel_allowed && plans.len() > 1 {
|
||||
@@ -1656,6 +1660,12 @@ impl Engine {
|
||||
&outcome.name,
|
||||
&output,
|
||||
);
|
||||
let tool_was_executed = output
|
||||
.metadata
|
||||
.as_ref()
|
||||
.and_then(|metadata| metadata.get("executed"))
|
||||
.and_then(serde_json::Value::as_bool)
|
||||
.unwrap_or(true);
|
||||
let output_content = output.content;
|
||||
|
||||
tool_call.set_result(output_content.clone(), duration);
|
||||
@@ -1670,7 +1680,7 @@ impl Engine {
|
||||
// this on success — failed edits leave the file
|
||||
// untouched, so polling for diagnostics would just
|
||||
// surface stale state.
|
||||
if output.success {
|
||||
if output.success && tool_was_executed {
|
||||
self.run_post_edit_lsp_hook(&outcome.name, &tool_input)
|
||||
.await;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user