fix(tools): hydrate deferred schemas before execution

Return a schema hydration result on first deferred tool use so the model can retry with visible parameters instead of executing guessed arguments. Add edit_file coverage for old_string/new_string aliases. (cherry picked from commit 91be171cc15dd895170bd1a486445f5e05356b57)
2026-05-11 13:34:04 +08:00
parent cd3767c75a
commit 9279d92f4e
4 changed files with 255 additions and 31 deletions
@@ -2049,10 +2049,12 @@ use self::tool_catalog::{
    CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
    active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
    execute_code_execution_tool, execute_tool_search, initial_active_tools, is_tool_search_tool,
-    maybe_activate_requested_deferred_tool, missing_tool_error_message,
+    maybe_hydrate_requested_deferred_tool, missing_tool_error_message,
 };
 #[cfg(test)]
-use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool};
+use self::tool_catalog::{
+    TOOL_SEARCH_BM25_NAME, maybe_activate_requested_deferred_tool, should_default_defer_tool,
+};
 use self::tool_execution::emit_tool_audit;
 use self::tool_setup::sandbox_policy_for_mode;

@@ -366,6 +366,79 @@ fn model_tool_catalog_applies_native_and_mcp_deferral() {
    assert_eq!(defer_loading("mcp_server_write"), Some(true));
 }

+#[test]
+fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
+    let mut edit = api_tool("edit_file");
+    edit.defer_loading = Some(true);
+    edit.input_schema = json!({
+        "type": "object",
+        "properties": {
+            "path": { "type": "string" },
+            "search": { "type": "string" },
+            "replace": { "type": "string" }
+        },
+        "required": ["path", "search", "replace"]
+    });
+
+    let catalog = vec![edit];
+    let active_at_batch_start = HashSet::new();
+    let mut hydrated_this_batch = HashSet::new();
+    let result = maybe_hydrate_requested_deferred_tool(
+        "edit_file",
+        &json!({
+            "path": "src/foo.rs",
+            "old_string": "before",
+            "new_string": "after"
+        }),
+        &catalog,
+        &active_at_batch_start,
+        &mut hydrated_this_batch,
+    )
+    .expect("first deferred use should hydrate");
+
+    assert!(!active_at_batch_start.contains("edit_file"));
+    assert!(hydrated_this_batch.contains("edit_file"));
+    assert!(result.success);
+    assert!(result.content.contains("Tool `edit_file` was deferred"));
+    assert!(result.content.contains("path: string"));
+    assert!(result.content.contains("search: string"));
+    assert!(result.content.contains("replace: string"));
+    assert!(result.content.contains("old_string -> search"));
+    assert!(result.content.contains("new_string -> replace"));
+    assert!(result.content.contains("The tool was not executed"));
+
+    let metadata = result.metadata.expect("metadata");
+    assert_eq!(metadata["event"], "tool.schema_hydrated");
+    assert_eq!(metadata["executed"], false);
+    assert_eq!(metadata["retry_required"], true);
+
+    let second_result = maybe_hydrate_requested_deferred_tool(
+        "edit_file",
+        &json!({"path": "src/bar.rs", "old_string": "before", "new_string": "after"}),
+        &catalog,
+        &active_at_batch_start,
+        &mut hydrated_this_batch,
+    )
+    .expect("later calls in the same batch should hydrate instead of executing");
+    assert_eq!(second_result.metadata.unwrap()["executed"], false);
+    assert_eq!(hydrated_this_batch.len(), 1);
+
+    let mut active_next_batch = active_at_batch_start.clone();
+    active_next_batch.extend(hydrated_this_batch);
+    let mut hydrated_next_batch = HashSet::new();
+    assert!(
+        maybe_hydrate_requested_deferred_tool(
+            "edit_file",
+            &json!({"path": "src/foo.rs", "search": "before", "replace": "after"}),
+            &catalog,
+            &active_next_batch,
+            &mut hydrated_next_batch,
+        )
+        .is_none(),
+        "tools hydrated in a previous batch should execute normally"
+    );
+}
+
 #[test]
 fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() {
    let catalog = build_model_tool_catalog(
@@ -9,7 +9,7 @@ use std::collections::HashSet;
 use std::path::Path;
 use std::time::Duration;

-use serde_json::json;
+use serde_json::{Value, json};

 use crate::models::Tool;
 use crate::tools::spec::{ToolError, ToolResult, required_str};
@@ -390,6 +390,7 @@ pub(super) fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> S
    )
 }

+#[cfg(test)]
 pub(super) fn maybe_activate_requested_deferred_tool(
    tool_name: &str,
    catalog: &[Tool],
@@ -406,6 +407,144 @@ pub(super) fn maybe_activate_requested_deferred_tool(
    active_tools.insert(tool_name.to_string())
 }

+pub(super) fn maybe_hydrate_requested_deferred_tool(
+    tool_name: &str,
+    tool_input: &Value,
+    catalog: &[Tool],
+    active_tools_at_batch_start: &HashSet<String>,
+    hydrated_tools_this_batch: &mut HashSet<String>,
+) -> Option<ToolResult> {
+    let def = catalog.iter().find(|def| def.name == tool_name)?;
+
+    if !def.defer_loading.unwrap_or(false) || active_tools_at_batch_start.contains(tool_name) {
+        return None;
+    }
+
+    hydrated_tools_this_batch.insert(tool_name.to_string());
+    Some(deferred_tool_schema_hydration_result(def, tool_input))
+}
+
+fn deferred_tool_schema_hydration_result(tool: &Tool, tool_input: &Value) -> ToolResult {
+    let expected = schema_field_lines(tool);
+    let received = received_field_names(tool_input);
+    let corrections = likely_field_corrections(&tool.name, &received);
+
+    let expected_text = if expected.is_empty() {
+        "  (no declared fields)".to_string()
+    } else {
+        expected
+            .iter()
+            .map(|field| format!("  {field}"))
+            .collect::<Vec<_>>()
+            .join("\n")
+    };
+    let received_text = if received.is_empty() {
+        "  (none)".to_string()
+    } else {
+        format!("  {}", received.join(", "))
+    };
+    let correction_text = if corrections.is_empty() {
+        String::new()
+    } else {
+        format!(
+            "\n\nLikely correction:\n{}",
+            corrections
+                .iter()
+                .map(|field| format!("  {field}"))
+                .collect::<Vec<_>>()
+                .join("\n")
+        )
+    };
+
+    ToolResult::success(format!(
+        "Tool `{}` was deferred and has now been loaded.\n\nExpected schema:\n{}\n\nReceived fields:\n{}{}\n\nThe tool was not executed. Retry the same operation with the loaded schema.",
+        tool.name, expected_text, received_text, correction_text
+    ))
+    .with_metadata(json!({
+        "event": "tool.schema_hydrated",
+        "tool": tool.name,
+        "executed": false,
+        "retry_required": true,
+        "reason": "deferred_tool_first_use",
+    }))
+}
+
+fn schema_field_lines(tool: &Tool) -> Vec<String> {
+    let mut required = Vec::new();
+    if let Some(items) = tool.input_schema.get("required").and_then(Value::as_array) {
+        for item in items {
+            if let Some(field) = item.as_str() {
+                required.push(field.to_string());
+            }
+        }
+    }
+
+    let Some(properties) = tool
+        .input_schema
+        .get("properties")
+        .and_then(Value::as_object)
+    else {
+        return required;
+    };
+
+    let mut fields = Vec::new();
+    let mut seen = HashSet::new();
+    for field in &required {
+        if let Some(schema) = properties.get(field) {
+            fields.push(format!("{field}: {}", schema_type_label(schema)));
+            seen.insert(field.as_str());
+        } else {
+            fields.push(field.clone());
+        }
+    }
+    for (field, schema) in properties {
+        if seen.contains(field.as_str()) {
+            continue;
+        }
+        fields.push(format!("{field}: {} (optional)", schema_type_label(schema)));
+    }
+    fields
+}
+
+fn schema_type_label(schema: &Value) -> String {
+    schema
+        .get("type")
+        .and_then(Value::as_str)
+        .unwrap_or("value")
+        .to_string()
+}
+
+fn received_field_names(input: &Value) -> Vec<String> {
+    let mut fields = input
+        .as_object()
+        .map(|object| object.keys().cloned().collect::<Vec<_>>())
+        .unwrap_or_default();
+    fields.sort();
+    fields
+}
+
+fn likely_field_corrections(tool_name: &str, received: &[String]) -> Vec<String> {
+    if tool_name != "edit_file" {
+        return Vec::new();
+    }
+
+    let has = |name: &str| received.iter().any(|field| field == name);
+    let mut corrections = Vec::new();
+    if has("old_string") {
+        corrections.push("old_string -> search".to_string());
+    } else if has("old_str") {
+        corrections.push("old_str -> search".to_string());
+    }
+    if has("new_string") {
+        corrections.push("new_string -> replace".to_string());
+    } else if has("new_str") {
+        corrections.push("new_str -> replace".to_string());
+    } else if has("replacement") {
+        corrections.push("replacement -> replace".to_string());
+    }
+    corrections
+}
+
 pub(super) fn execute_tool_search(
    tool_name: &str,
    input: &serde_json::Value,
@@ -1021,6 +1021,9 @@ impl Engine {
                None
            };

+            let active_tools_at_batch_start = active_tool_names.clone();
+            let mut deferred_tools_hydrated_this_batch: std::collections::HashSet<String> =
+                std::collections::HashSet::new();
            let mut plans: Vec<ToolExecutionPlan> = Vec::with_capacity(tool_uses.len());
            for (index, tool) in tool_uses.iter_mut().enumerate() {
                let tool_id = tool.id.clone();
@@ -1062,18 +1065,7 @@ impl Engine {
                    )));
                }

-                if maybe_activate_requested_deferred_tool(
-                    &tool_name,
-                    &tool_catalog,
-                    &mut active_tool_names,
-                ) {
-                    let _ = self
-                        .tx_event
-                        .send(Event::status(format!(
-                            "Auto-loaded deferred tool '{tool_name}' after model request."
-                        )))
-                        .await;
-                }
+                let requested_tool_name = tool_name.clone();
                let mut tool_def = tool_catalog.iter().find(|def| def.name == tool_name);

                // Resolve hallucinated tool names when the model emits a
@@ -1092,21 +1084,6 @@ impl Engine {
                        // Update the tool_uses entry so the result is
                        // attributed to the canonical name.
                        tool.name = tool_name.clone();
-                        // Re-run the deferred-activation check with the
-                        // canonical name.
-                        if maybe_activate_requested_deferred_tool(
-                            &tool_name,
-                            &tool_catalog,
-                            &mut active_tool_names,
-                        ) {
-                            let _ = self
-                                .tx_event
-                                .send(Event::status(format!(
-                                    "Auto-loaded deferred tool '{}' after resolving '{}'.",
-                                    tool_name, tool_name
-                                )))
-                                .await;
-                        }
                    }
                }

@@ -1154,7 +1131,33 @@ impl Engine {
                    read_only = true;
                }

+                let should_emit_hydration_status =
+                    !deferred_tools_hydrated_this_batch.contains(&tool_name);
                if blocked_error.is_none()
+                    && let Some(result) = maybe_hydrate_requested_deferred_tool(
+                        &tool_name,
+                        &tool_input,
+                        &tool_catalog,
+                        &active_tools_at_batch_start,
+                        &mut deferred_tools_hydrated_this_batch,
+                    )
+                {
+                    if should_emit_hydration_status {
+                        let status = if requested_tool_name == tool_name {
+                            format!("Auto-loaded deferred tool '{tool_name}' after model request.")
+                        } else {
+                            format!(
+                                "Auto-loaded deferred tool '{}' after resolving '{}'.",
+                                tool_name, requested_tool_name
+                            )
+                        };
+                        let _ = self.tx_event.send(Event::status(status)).await;
+                    }
+                    guard_result = Some(result);
+                }
+
+                if blocked_error.is_none()
+                    && guard_result.is_none()
                    && let AttemptDecision::Block(message) =
                        loop_guard.record_attempt(&tool_name, &tool_input)
                {
@@ -1180,6 +1183,7 @@ impl Engine {
                    guard_result,
                });
            }
+            active_tool_names.extend(deferred_tools_hydrated_this_batch);

            let parallel_allowed = should_parallelize_tool_batch(&plans);
            if parallel_allowed && plans.len() > 1 {
@@ -1656,6 +1660,12 @@ impl Engine {
                            &outcome.name,
                            &output,
                        );
+                        let tool_was_executed = output
+                            .metadata
+                            .as_ref()
+                            .and_then(|metadata| metadata.get("executed"))
+                            .and_then(serde_json::Value::as_bool)
+                            .unwrap_or(true);
                        let output_content = output.content;

                        tool_call.set_result(output_content.clone(), duration);
@@ -1670,7 +1680,7 @@ impl Engine {
                        // this on success — failed edits leave the file
                        // untouched, so polling for diagnostics would just
                        // surface stale state.
-                        if output.success {
+                        if output.success && tool_was_executed {
                            self.run_post_edit_lsp_hook(&outcome.name, &tool_input)
                                .await;
                        }