From f7a602cd204b6b6773b75de0b40ee4947366cd93 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Wed, 3 Jun 2026 19:20:23 -0700 Subject: [PATCH] feat(tools): hide todo_* aliases from model catalog, add deprecation metadata (#2682) - Add model_visible() hook to ToolSpec trait (default true) - Override model_visible() -> false on todo_write, todo_add, todo_update, todo_list - Checklist variants remain model-visible as the canonical surface - Legacy todo_* calls still work for saved transcript replay - Return _deprecation metadata with use_instead and removed_in=0.9.0 - Update prompts to recommend checklist_* only - Update TOOL_SURFACE.md with v0.9.0 deprecation notes - Add tests for hidden catalog, compat alias behavior, and metadata Verification: cargo test -p codewhale-tui -- todo, cargo clippy -D warnings --- crates/tui/src/prompts/base.md | 2 +- crates/tui/src/prompts/base.txt | 2 +- crates/tui/src/tools/registry.rs | 38 +++++++++++ crates/tui/src/tools/spec.rs | 8 +++ crates/tui/src/tools/todo.rs | 109 +++++++++++++++++++++++++++++-- docs/TOOL_SURFACE.md | 7 +- 6 files changed, 158 insertions(+), 8 deletions(-) diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index abbfc0c9..fd194628 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -242,7 +242,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise ## Toolbox (fast reference — tool descriptions are authoritative) -- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory). +- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory). - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs. - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait. - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools. diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt index 7346f127..291e98d0 100644 --- a/crates/tui/src/prompts/base.txt +++ b/crates/tui/src/prompts/base.txt @@ -37,7 +37,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking` ## Toolbox (fast reference — tool descriptions are authoritative) -- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory). +- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory). - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs. - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools. diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index cb3ca0bf..b1dd5bd2 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -222,6 +222,7 @@ impl ToolRegistry { tools.sort_by(|a, b| a.name().cmp(b.name())); tools .into_iter() + .filter(|tool| tool.model_visible()) .map(|tool| { let mut schema = tool.input_schema(); schema_sanitize::sanitize(&mut schema); @@ -1202,6 +1203,43 @@ mod tests { assert_eq!(registry.len(), 1); } + #[test] + fn todo_aliases_stay_callable_but_hidden_from_model_catalog() { + let tmp = tempdir().expect("tempdir"); + let ctx = ToolContext::new(tmp.path().to_path_buf()); + let registry = ToolRegistryBuilder::new() + .with_todo_tool(crate::tools::todo::new_shared_todo_list()) + .build(ctx); + + for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] { + assert!(registry.contains(alias), "{alias} should remain callable"); + } + + let api_names = registry + .to_api_tools() + .into_iter() + .map(|tool| tool.name) + .collect::>(); + + for canonical in [ + "checklist_write", + "checklist_add", + "checklist_update", + "checklist_list", + ] { + assert!( + api_names.iter().any(|name| name == canonical), + "{canonical} should stay model-visible" + ); + } + for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] { + assert!( + api_names.iter().all(|name| name != alias), + "{alias} should be hidden from the model catalog" + ); + } + } + #[test] fn apply_overrides_removes_original_when_replacement_is_missing() { let tmp = tempdir().expect("tempdir"); diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 6a66c37f..52553cdf 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -664,6 +664,14 @@ pub trait ToolSpec: Send + Sync { false } + /// Returns whether this tool should be advertised in the model-facing + /// catalog. Hidden compatibility tools remain registered and executable + /// by name so saved transcripts can replay without teaching new sessions + /// the deprecated spelling. + fn model_visible(&self) -> bool { + true + } + /// Execute the tool with the given input and context. async fn execute(&self, input: Value, context: &ToolContext) -> Result; } diff --git a/crates/tui/src/tools/todo.rs b/crates/tui/src/tools/todo.rs index c24185ca..a955073c 100644 --- a/crates/tui/src/tools/todo.rs +++ b/crates/tui/src/tools/todo.rs @@ -174,6 +174,22 @@ pub fn new_shared_todo_list() -> SharedTodoList { Arc::new(Mutex::new(TodoList::new())) } +const TODO_ALIAS_FIRST_DEPRECATED_VERSION: &str = "0.8.53"; +const TODO_ALIAS_REMOVAL_VERSION: &str = "0.9.0"; + +fn is_compat_alias(tool_name: &str) -> bool { + tool_name.starts_with("todo_") +} + +fn checklist_replacement_tool_name(tool_name: &str) -> &'static str { + match tool_name { + "todo_add" | "checklist_add" => "checklist_add", + "todo_update" | "checklist_update" => "checklist_update", + "todo_list" | "checklist_list" => "checklist_list", + _ => "checklist_write", + } +} + /// Tool for writing and updating the todo list pub struct TodoWriteTool { todo_list: SharedTodoList, @@ -258,6 +274,10 @@ impl ToolSpec for TodoAddTool { ApprovalRequirement::Auto } + fn model_visible(&self) -> bool { + !is_compat_alias(self.tool_name) + } + async fn execute( &self, input: serde_json::Value, @@ -350,6 +370,10 @@ impl ToolSpec for TodoUpdateTool { ApprovalRequirement::Auto } + fn model_visible(&self) -> bool { + !is_compat_alias(self.tool_name) + } + async fn execute( &self, input: serde_json::Value, @@ -435,6 +459,10 @@ impl ToolSpec for TodoListTool { ApprovalRequirement::Auto } + fn model_visible(&self) -> bool { + !is_compat_alias(self.tool_name) + } + async fn execute( &self, _input: serde_json::Value, @@ -448,7 +476,8 @@ impl ToolSpec for TodoListTool { snapshot.items.len(), snapshot.completion_pct, result - ))) + )) + .with_metadata(checklist_metadata(&snapshot, self.tool_name))) } } @@ -502,6 +531,10 @@ impl ToolSpec for TodoWriteTool { ApprovalRequirement::Auto } + fn model_visible(&self) -> bool { + !is_compat_alias(self.tool_name) + } + async fn execute( &self, input: serde_json::Value, @@ -547,6 +580,8 @@ impl ToolSpec for TodoWriteTool { } fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_json::Value { + let canonical_tool = checklist_replacement_tool_name(tool_name); + let compat_alias = is_compat_alias(tool_name); let items = snapshot .items .iter() @@ -558,9 +593,9 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso }) }) .collect::>(); - json!({ - "canonical_tool": "checklist_write", - "compat_alias": tool_name.starts_with("todo_"), + let mut metadata = json!({ + "canonical_tool": canonical_tool, + "compat_alias": compat_alias, "task_updates": { "checklist": { "items": items, @@ -569,7 +604,22 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso "updated_at": null } } - }) + }); + if compat_alias && let Some(obj) = metadata.as_object_mut() { + obj.insert( + "_deprecation".to_string(), + json!({ + "this_tool": tool_name, + "use_instead": canonical_tool, + "first_deprecated": TODO_ALIAS_FIRST_DEPRECATED_VERSION, + "removed_in": TODO_ALIAS_REMOVAL_VERSION, + "message": format!( + "Tool '{tool_name}' is a hidden compatibility alias; use '{canonical_tool}' before v{TODO_ALIAS_REMOVAL_VERSION}." + ), + }), + ); + } + metadata } #[cfg(test)] @@ -626,5 +676,54 @@ mod tests { assert_eq!(tool.name(), "todo_write"); assert_eq!(metadata["canonical_tool"], "checklist_write"); assert_eq!(metadata["compat_alias"], true); + assert_eq!(metadata["_deprecation"]["this_tool"], "todo_write"); + assert_eq!(metadata["_deprecation"]["use_instead"], "checklist_write"); + assert_eq!(metadata["_deprecation"]["removed_in"], "0.9.0"); + assert!(!tool.model_visible()); + } + + #[tokio::test] + async fn todo_item_aliases_return_replacement_metadata() { + let list = new_shared_todo_list(); + let context = ToolContext::new(std::env::temp_dir()); + + let add = TodoAddTool::new(list.clone()); + let add_result = add + .execute( + json!({"content": "legacy add", "status": "in_progress"}), + &context, + ) + .await + .expect("todo add succeeds"); + let add_metadata = add_result.metadata.expect("add metadata"); + assert_eq!(add_metadata["canonical_tool"], "checklist_add"); + assert_eq!(add_metadata["_deprecation"]["use_instead"], "checklist_add"); + assert!(!add.model_visible()); + + let update = TodoUpdateTool::new(list.clone()); + let update_result = update + .execute(json!({"id": 1, "status": "completed"}), &context) + .await + .expect("todo update succeeds"); + let update_metadata = update_result.metadata.expect("update metadata"); + assert_eq!(update_metadata["canonical_tool"], "checklist_update"); + assert_eq!( + update_metadata["_deprecation"]["use_instead"], + "checklist_update" + ); + assert!(!update.model_visible()); + + let list_tool = TodoListTool::new(list); + let list_result = list_tool + .execute(json!({}), &context) + .await + .expect("todo list succeeds"); + let list_metadata = list_result.metadata.expect("list metadata"); + assert_eq!(list_metadata["canonical_tool"], "checklist_list"); + assert_eq!( + list_metadata["_deprecation"]["use_instead"], + "checklist_list" + ); + assert!(!list_tool.model_visible()); } } diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 250f4483..fe0c3858 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -110,9 +110,14 @@ to the model, such as `mcp__`. | `task_cancel` | Cancel a queued or running durable task. Approval-required. | | `checklist_write` | Granular progress under the active thread/task. Checklist state is subordinate to the durable task. | | `checklist_add` / `checklist_update` / `checklist_list` | Single-item checklist operations. | -| `todo_write` / `todo_add` / `todo_update` / `todo_list` | Compatibility aliases for the checklist tools. Existing sessions keep working, but new prompts should use `checklist_*`. | | `note` | One-off important fact for later. | +The legacy `todo_write`, `todo_add`, `todo_update`, and `todo_list` names are +hidden compatibility aliases for saved transcript replay. They remain callable +by exact name, but they are not part of the model-visible catalog; compatibility +results include `_deprecation.use_instead = checklist_*` and +`_deprecation.removed_in = 0.9.0`. + ### Verification gates and artifacts | Tool | Niche |