Merge branch 'codex/v0.8.53-tool-deferred-ux' into codex/v0.8.53
This commit is contained in:
@@ -470,8 +470,9 @@ fn core_native_tools_stay_loaded_in_yolo_mode() {
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
// git_blame remains deferred (read-only git history beyond log/show/diff).
|
||||
assert!(should_default_defer_tool(
|
||||
"git_show",
|
||||
"git_blame",
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
@@ -505,6 +506,17 @@ fn non_yolo_mode_retains_default_defer_policy() {
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
// #2654: read-only git history joins the active set.
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_log",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_show",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_status",
|
||||
AppMode::Agent,
|
||||
@@ -559,7 +571,7 @@ fn non_yolo_mode_retains_default_defer_policy() {
|
||||
&always_load
|
||||
));
|
||||
assert!(should_default_defer_tool(
|
||||
"git_show",
|
||||
"git_blame",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
@@ -762,9 +774,9 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
|
||||
|
||||
#[test]
|
||||
fn tools_always_load_overrides_default_native_deferral() {
|
||||
let always_load = HashSet::from(["git_show".to_string()]);
|
||||
let always_load = HashSet::from(["git_blame".to_string()]);
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_show",
|
||||
"git_blame",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
@@ -1041,6 +1053,68 @@ fn deferred_tool_preflight_loads_edit_schema_without_executing_bad_aliases() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deferred_tool_preflight_guides_rlm_open_misnamed_source_fields() {
|
||||
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
|
||||
let registry = engine
|
||||
.build_turn_tool_registry_builder(
|
||||
AppMode::Agent,
|
||||
engine.config.todos.clone(),
|
||||
engine.config.plan_state.clone(),
|
||||
)
|
||||
.build(engine.build_tool_context(AppMode::Agent, false));
|
||||
let always_load = HashSet::new();
|
||||
let mut catalog = build_model_tool_catalog(
|
||||
registry.to_api_tools_with_cache(true),
|
||||
vec![],
|
||||
AppMode::Agent,
|
||||
&always_load,
|
||||
);
|
||||
catalog
|
||||
.iter_mut()
|
||||
.find(|tool| tool.name == "rlm_open")
|
||||
.expect("rlm_open registered")
|
||||
.defer_loading = Some(true);
|
||||
let mut active = initial_active_tools(&catalog);
|
||||
assert!(!active.contains("rlm_open"));
|
||||
|
||||
let result = preflight_requested_deferred_tool(
|
||||
"rlm_open",
|
||||
&json!({
|
||||
"name": "active_prompt",
|
||||
"prompt": "inspect this",
|
||||
"path": "src/lib.rs"
|
||||
}),
|
||||
&catalog,
|
||||
&mut active,
|
||||
)
|
||||
.expect("deferred rlm_open should preflight");
|
||||
|
||||
assert!(active.contains("rlm_open"));
|
||||
assert!(result.success);
|
||||
assert!(result.content.contains("Tool `rlm_open` was deferred"));
|
||||
assert!(result.content.contains("The tool was not executed"));
|
||||
assert!(result.content.contains("session_object: string"));
|
||||
assert!(
|
||||
result.content.contains(
|
||||
"prompt -> file_path (local file), content (inline text), url, or session_object"
|
||||
),
|
||||
"prompt correction includes session_object: {}",
|
||||
result.content
|
||||
);
|
||||
assert!(
|
||||
result.content.contains(
|
||||
"path -> file_path (local file), content (inline text), url, or session_object"
|
||||
),
|
||||
"path correction includes session_object: {}",
|
||||
result.content
|
||||
);
|
||||
assert_eq!(
|
||||
result.metadata.as_ref().unwrap()["deferred_tool_loaded"],
|
||||
json!(true)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deferred_tool_preflight_guides_checklist_update_list_replacement() {
|
||||
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
|
||||
|
||||
@@ -49,6 +49,8 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
|
||||
"fetch_url",
|
||||
"file_search",
|
||||
"git_diff",
|
||||
"git_log",
|
||||
"git_show",
|
||||
"git_status",
|
||||
"grep_files",
|
||||
"list_dir",
|
||||
@@ -772,6 +774,29 @@ fn likely_field_corrections(
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
// RLM source fields are easy to misname (#2659). rlm_open takes exactly one
|
||||
// of file_path / content / url / session_object; nudge common wrong names
|
||||
// toward those.
|
||||
if tool_name == "rlm_open" {
|
||||
for wrong in [
|
||||
"prompt",
|
||||
"resident_file",
|
||||
"text",
|
||||
"body",
|
||||
"path",
|
||||
"file",
|
||||
"source",
|
||||
] {
|
||||
if has_received(wrong)
|
||||
&& !has_received("file_path")
|
||||
&& !has_received("content")
|
||||
&& !has_received("url")
|
||||
&& !has_received("session_object")
|
||||
{
|
||||
corrections.push(format!("{wrong} -> file_path (local file), content (inline text), url, or session_object"));
|
||||
}
|
||||
}
|
||||
}
|
||||
corrections
|
||||
}
|
||||
|
||||
|
||||
+71
-10
@@ -97,10 +97,11 @@ impl ToolSpec for RlmOpenTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Open a persistent RLM context. Loads `file_path`, `content`, or `url` \
|
||||
into a named Python kernel and returns only metadata: name, length, \
|
||||
preview, and sha256. Use this for large or unfamiliar inputs so the \
|
||||
parent transcript holds a handle, not the body."
|
||||
"Open a persistent RLM context. Loads `file_path`, `content`, `url`, \
|
||||
or `session_object` into a named Python kernel and returns only \
|
||||
metadata: name, length, preview, and sha256. Use this for large or \
|
||||
unfamiliar inputs so the parent transcript holds a handle, not the \
|
||||
body."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -146,9 +147,30 @@ impl ToolSpec for RlmOpenTool {
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let source_count = rlm_open_source_count(&input);
|
||||
if source_count != 1 {
|
||||
return Err(ToolError::invalid_input(
|
||||
"rlm_open: provide exactly one of `file_path`, `content`, or `url`",
|
||||
));
|
||||
let mut msg = String::from(
|
||||
"rlm_open: provide exactly one of `file_path` (local file), `content` (inline text), `url`, or `session_object`",
|
||||
);
|
||||
// "did you mean" for common misnamings (#2655).
|
||||
if let Some(obj) = input.as_object() {
|
||||
let seen: Vec<&str> = [
|
||||
"prompt",
|
||||
"resident_file",
|
||||
"text",
|
||||
"body",
|
||||
"path",
|
||||
"file",
|
||||
"source",
|
||||
]
|
||||
.into_iter()
|
||||
.filter(|k| obj.contains_key(*k))
|
||||
.collect();
|
||||
if !seen.is_empty() {
|
||||
msg.push_str(&format!(
|
||||
". Saw {seen:?} — did you mean file_path/content/url/session_object? (to evaluate against an existing context, pass its name to rlm_eval, or use `session_object`)"
|
||||
));
|
||||
}
|
||||
}
|
||||
return Err(ToolError::invalid_input(msg));
|
||||
}
|
||||
|
||||
let (body, source_type, source_hint) = load_source(&input, context).await?;
|
||||
@@ -234,8 +256,8 @@ impl ToolSpec for RlmEvalTool {
|
||||
"type": "object",
|
||||
"required": ["name", "code"],
|
||||
"properties": {
|
||||
"name": { "type": "string", "description": "RLM context name from rlm_open." },
|
||||
"code": { "type": "string", "description": "Python code to execute. Do not include markdown fences." }
|
||||
"name": { "type": "string", "description": "RLM context name returned by rlm_open." },
|
||||
"code": { "type": "string", "description": "Raw Python executed against the context (no markdown fences). The loaded source is in scope; call FINAL(value)/finalize(...) to return a result handle. Example: print(len(SOURCE))." }
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -250,7 +272,12 @@ impl ToolSpec for RlmEvalTool {
|
||||
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let name = required_non_empty_str(&input, "name")?;
|
||||
let code = required_non_empty_str(&input, "code")?;
|
||||
let code = required_non_empty_str(&input, "code").map_err(|_| {
|
||||
ToolError::invalid_input(
|
||||
"rlm_eval: `code` is required and runs raw Python against the RLM context (no markdown fences). \
|
||||
Example: {\"name\": \"<ctx>\", \"code\": \"print(len(SOURCE))\"}; call FINAL(value) to return a result handle.",
|
||||
)
|
||||
})?;
|
||||
let session = get_session(context, name).await?;
|
||||
let mut session = session.lock().await;
|
||||
let config = session.config.clone();
|
||||
@@ -789,6 +816,40 @@ mod tests {
|
||||
.expect("close");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rlm_open_misnamed_source_field_gets_did_you_mean_hint() {
|
||||
// #2655: a wrong source field name yields actionable guidance, not just
|
||||
// the canonical "provide exactly one" message.
|
||||
let ctx = ctx();
|
||||
let err = RlmOpenTool
|
||||
.execute(json!({"name": "doc", "prompt": "summarize this"}), &ctx)
|
||||
.await
|
||||
.expect_err("misnamed source field should fail");
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("file_path"), "names the real fields: {msg}");
|
||||
assert!(
|
||||
msg.contains("`url`, or `session_object`"),
|
||||
"names session_object in the valid source field list: {msg}"
|
||||
);
|
||||
assert!(msg.contains("prompt"), "echoes the wrong field: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rlm_eval_missing_code_explains_raw_python() {
|
||||
// #2655: the missing-code error should teach the tool, with an example.
|
||||
let ctx = ctx();
|
||||
let err = RlmEvalTool::new(None)
|
||||
.execute(json!({"name": "doc"}), &ctx)
|
||||
.await
|
||||
.expect_err("missing code should fail");
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains("raw Python"), "explains it runs Python: {msg}");
|
||||
assert!(
|
||||
msg.contains("print(len(SOURCE))") || msg.contains("FINAL"),
|
||||
"includes an example: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rlm_session_open_eval_close_lifecycle() {
|
||||
let ctx = ctx();
|
||||
|
||||
Reference in New Issue
Block a user