fix(tools): activate read-only git history + actionable RLM/field errors

v0.8.53 tool/deferred/error UX (PR group 4), low-risk subset:

- #2654: add git_log and git_show to DEFAULT_ACTIVE_NATIVE_TOOLS so read-only
  git history joins git_diff/git_status in the active partition (kept
  alphabetical → prefix-cache head stays sorted/byte-stable). git_blame and
  other history tools remain deferred.
- #2655: rlm_open's source-count error now echoes common misnamed fields with a
  "did you mean file_path/content/url" hint; rlm_eval's missing-`code` error
  explains it runs raw Python and shows an example. Schema descriptions for
  rlm_eval name/code sharpened.
- #2659: likely_field_corrections gains RLM source-field rename hints (the
  role/type vocabulary change itself lives in the WS3 PR #2684 to avoid a
  double-edit of normalize_role_alias).

Deferred to the medium-risk batch: #2648 (render deferred-tool hydration
distinctly from "done") — needs a ToolStatus/cell-build change with wider
render blast radius than this low-risk PR.

Verification: cargo test -p codewhale-tui --bins → 3944 passed, 0 failed
(incl. prefix-cache sort invariant); cargo clippy clean.
Targets codex/v0.8.53.
This commit is contained in:
Hunter Bown
2026-06-03 11:31:33 -07:00
parent 03d1bba538
commit 7bbc6b78e4
3 changed files with 84 additions and 10 deletions
+16 -4
View File
@@ -470,8 +470,9 @@ fn core_native_tools_stay_loaded_in_yolo_mode() {
AppMode::Yolo,
&always_load
));
// git_blame remains deferred (read-only git history beyond log/show/diff).
assert!(should_default_defer_tool(
"git_show",
"git_blame",
AppMode::Yolo,
&always_load
));
@@ -505,6 +506,17 @@ fn non_yolo_mode_retains_default_defer_policy() {
AppMode::Agent,
&always_load
));
// #2654: read-only git history joins the active set.
assert!(!should_default_defer_tool(
"git_log",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"git_show",
AppMode::Agent,
&always_load
));
assert!(!should_default_defer_tool(
"git_status",
AppMode::Agent,
@@ -546,7 +558,7 @@ fn non_yolo_mode_retains_default_defer_policy() {
&always_load
));
assert!(should_default_defer_tool(
"git_show",
"git_blame",
AppMode::Agent,
&always_load
));
@@ -749,9 +761,9 @@ fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
#[test]
fn tools_always_load_overrides_default_native_deferral() {
let always_load = HashSet::from(["git_show".to_string()]);
let always_load = HashSet::from(["git_blame".to_string()]);
assert!(!should_default_defer_tool(
"git_show",
"git_blame",
AppMode::Agent,
&always_load
));
@@ -47,6 +47,8 @@ pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
"fetch_url",
"file_search",
"git_diff",
"git_log",
"git_show",
"git_status",
"grep_files",
"list_dir",
@@ -770,6 +772,17 @@ fn likely_field_corrections(
.to_string(),
);
}
// RLM source fields are easy to misname (#2659). rlm_open takes exactly one
// of file_path / content / url; nudge common wrong names toward those.
if tool_name == "rlm_open" {
for wrong in ["prompt", "resident_file", "text", "body", "path", "file", "source"] {
if has_received(wrong) && !has_received("file_path") && !has_received("content")
&& !has_received("url")
{
corrections.push(format!("{wrong} -> file_path (local file), content (inline text), or url"));
}
}
}
corrections
}
+55 -6
View File
@@ -146,9 +146,23 @@ impl ToolSpec for RlmOpenTool {
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
let source_count = rlm_open_source_count(&input);
if source_count != 1 {
return Err(ToolError::invalid_input(
"rlm_open: provide exactly one of `file_path`, `content`, or `url`",
));
let mut msg = String::from(
"rlm_open: provide exactly one of `file_path` (local file), `content` (inline text), or `url`",
);
// "did you mean" for common misnamings (#2655).
if let Some(obj) = input.as_object() {
let seen: Vec<&str> =
["prompt", "resident_file", "text", "body", "path", "file", "source"]
.into_iter()
.filter(|k| obj.contains_key(*k))
.collect();
if !seen.is_empty() {
msg.push_str(&format!(
". Saw {seen:?} — did you mean file_path/content/url? (to evaluate against an existing context, pass its name to rlm_eval, or use `session_object`)"
));
}
}
return Err(ToolError::invalid_input(msg));
}
let (body, source_type, source_hint) = load_source(&input, context).await?;
@@ -234,8 +248,8 @@ impl ToolSpec for RlmEvalTool {
"type": "object",
"required": ["name", "code"],
"properties": {
"name": { "type": "string", "description": "RLM context name from rlm_open." },
"code": { "type": "string", "description": "Python code to execute. Do not include markdown fences." }
"name": { "type": "string", "description": "RLM context name returned by rlm_open." },
"code": { "type": "string", "description": "Raw Python executed against the context (no markdown fences). The loaded source is in scope; call FINAL(value)/finalize(...) to return a result handle. Example: print(len(SOURCE))." }
}
})
}
@@ -250,7 +264,12 @@ impl ToolSpec for RlmEvalTool {
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
let name = required_non_empty_str(&input, "name")?;
let code = required_non_empty_str(&input, "code")?;
let code = required_non_empty_str(&input, "code").map_err(|_| {
ToolError::invalid_input(
"rlm_eval: `code` is required and runs raw Python against the RLM context (no markdown fences). \
Example: {\"name\": \"<ctx>\", \"code\": \"print(len(SOURCE))\"}; call FINAL(value) to return a result handle.",
)
})?;
let session = get_session(context, name).await?;
let mut session = session.lock().await;
let config = session.config.clone();
@@ -789,6 +808,36 @@ mod tests {
.expect("close");
}
#[tokio::test]
async fn rlm_open_misnamed_source_field_gets_did_you_mean_hint() {
// #2655: a wrong source field name yields actionable guidance, not just
// the canonical "provide exactly one" message.
let ctx = ctx();
let err = RlmOpenTool
.execute(json!({"name": "doc", "prompt": "summarize this"}), &ctx)
.await
.expect_err("misnamed source field should fail");
let msg = err.to_string();
assert!(msg.contains("file_path"), "names the real fields: {msg}");
assert!(msg.contains("prompt"), "echoes the wrong field: {msg}");
}
#[tokio::test]
async fn rlm_eval_missing_code_explains_raw_python() {
// #2655: the missing-code error should teach the tool, with an example.
let ctx = ctx();
let err = RlmEvalTool::new(None)
.execute(json!({"name": "doc"}), &ctx)
.await
.expect_err("missing code should fail");
let msg = err.to_string();
assert!(msg.contains("raw Python"), "explains it runs Python: {msg}");
assert!(
msg.contains("print(len(SOURCE))") || msg.contains("FINAL"),
"includes an example: {msg}"
);
}
#[tokio::test]
async fn rlm_session_open_eval_close_lifecycle() {
let ctx = ctx();