From f7a602cd204b6b6773b75de0b40ee4947366cd93 Mon Sep 17 00:00:00 2001
From: Hunter B <hmbown@gmail.com>
Date: Wed, 3 Jun 2026 19:20:23 -0700
Subject: [PATCH] feat(tools): hide todo_* aliases from model catalog, add
 deprecation metadata (#2682)

- Add model_visible() hook to ToolSpec trait (default true)
- Override model_visible() -> false on todo_write, todo_add, todo_update, todo_list
- Checklist variants remain model-visible as the canonical surface
- Legacy todo_* calls still work for saved transcript replay
- Return _deprecation metadata with use_instead and removed_in=0.9.0
- Update prompts to recommend checklist_* only
- Update TOOL_SURFACE.md with v0.9.0 deprecation notes
- Add tests for hidden catalog, compat alias behavior, and metadata

Verification: cargo test -p codewhale-tui -- todo, cargo clippy -D warnings
---
 crates/tui/src/prompts/base.md   |   2 +-
 crates/tui/src/prompts/base.txt  |   2 +-
 crates/tui/src/tools/registry.rs |  38 +++++++++++
 crates/tui/src/tools/spec.rs     |   8 +++
 crates/tui/src/tools/todo.rs     | 109 +++++++++++++++++++++++++++++--
 docs/TOOL_SURFACE.md             |   7 +-
 6 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md
index abbfc0c9..fd194628 100644
--- a/crates/tui/src/prompts/base.md
+++ b/crates/tui/src/prompts/base.md
@@ -242,7 +242,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
 
 ## Toolbox (fast reference — tool descriptions are authoritative)
 
-- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
+- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
 - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
 - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait.
 - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt
index 7346f127..291e98d0 100644
--- a/crates/tui/src/prompts/base.txt
+++ b/crates/tui/src/prompts/base.txt
@@ -37,7 +37,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`
 
 ## Toolbox (fast reference — tool descriptions are authoritative)
 
-- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
+- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
 - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
 - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`.
 - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs
index cb3ca0bf..b1dd5bd2 100644
--- a/crates/tui/src/tools/registry.rs
+++ b/crates/tui/src/tools/registry.rs
@@ -222,6 +222,7 @@ impl ToolRegistry {
         tools.sort_by(|a, b| a.name().cmp(b.name()));
         tools
             .into_iter()
+            .filter(|tool| tool.model_visible())
             .map(|tool| {
                 let mut schema = tool.input_schema();
                 schema_sanitize::sanitize(&mut schema);
@@ -1202,6 +1203,43 @@ mod tests {
         assert_eq!(registry.len(), 1);
     }
 
+    #[test]
+    fn todo_aliases_stay_callable_but_hidden_from_model_catalog() {
+        let tmp = tempdir().expect("tempdir");
+        let ctx = ToolContext::new(tmp.path().to_path_buf());
+        let registry = ToolRegistryBuilder::new()
+            .with_todo_tool(crate::tools::todo::new_shared_todo_list())
+            .build(ctx);
+
+        for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
+            assert!(registry.contains(alias), "{alias} should remain callable");
+        }
+
+        let api_names = registry
+            .to_api_tools()
+            .into_iter()
+            .map(|tool| tool.name)
+            .collect::<Vec<_>>();
+
+        for canonical in [
+            "checklist_write",
+            "checklist_add",
+            "checklist_update",
+            "checklist_list",
+        ] {
+            assert!(
+                api_names.iter().any(|name| name == canonical),
+                "{canonical} should stay model-visible"
+            );
+        }
+        for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
+            assert!(
+                api_names.iter().all(|name| name != alias),
+                "{alias} should be hidden from the model catalog"
+            );
+        }
+    }
+
     #[test]
     fn apply_overrides_removes_original_when_replacement_is_missing() {
         let tmp = tempdir().expect("tempdir");
diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs
index 6a66c37f..52553cdf 100644
--- a/crates/tui/src/tools/spec.rs
+++ b/crates/tui/src/tools/spec.rs
@@ -664,6 +664,14 @@ pub trait ToolSpec: Send + Sync {
         false
     }
 
+    /// Returns whether this tool should be advertised in the model-facing
+    /// catalog. Hidden compatibility tools remain registered and executable
+    /// by name so saved transcripts can replay without teaching new sessions
+    /// the deprecated spelling.
+    fn model_visible(&self) -> bool {
+        true
+    }
+
     /// Execute the tool with the given input and context.
     async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError>;
 }
diff --git a/crates/tui/src/tools/todo.rs b/crates/tui/src/tools/todo.rs
index c24185ca..a955073c 100644
--- a/crates/tui/src/tools/todo.rs
+++ b/crates/tui/src/tools/todo.rs
@@ -174,6 +174,22 @@ pub fn new_shared_todo_list() -> SharedTodoList {
     Arc::new(Mutex::new(TodoList::new()))
 }
 
+const TODO_ALIAS_FIRST_DEPRECATED_VERSION: &str = "0.8.53";
+const TODO_ALIAS_REMOVAL_VERSION: &str = "0.9.0";
+
+fn is_compat_alias(tool_name: &str) -> bool {
+    tool_name.starts_with("todo_")
+}
+
+fn checklist_replacement_tool_name(tool_name: &str) -> &'static str {
+    match tool_name {
+        "todo_add" | "checklist_add" => "checklist_add",
+        "todo_update" | "checklist_update" => "checklist_update",
+        "todo_list" | "checklist_list" => "checklist_list",
+        _ => "checklist_write",
+    }
+}
+
 /// Tool for writing and updating the todo list
 pub struct TodoWriteTool {
     todo_list: SharedTodoList,
@@ -258,6 +274,10 @@ impl ToolSpec for TodoAddTool {
         ApprovalRequirement::Auto
     }
 
+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
     async fn execute(
         &self,
         input: serde_json::Value,
@@ -350,6 +370,10 @@ impl ToolSpec for TodoUpdateTool {
         ApprovalRequirement::Auto
     }
 
+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
     async fn execute(
         &self,
         input: serde_json::Value,
@@ -435,6 +459,10 @@ impl ToolSpec for TodoListTool {
         ApprovalRequirement::Auto
     }
 
+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
     async fn execute(
         &self,
         _input: serde_json::Value,
@@ -448,7 +476,8 @@ impl ToolSpec for TodoListTool {
             snapshot.items.len(),
             snapshot.completion_pct,
             result
-        )))
+        ))
+        .with_metadata(checklist_metadata(&snapshot, self.tool_name)))
     }
 }
 
@@ -502,6 +531,10 @@ impl ToolSpec for TodoWriteTool {
         ApprovalRequirement::Auto
     }
 
+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
     async fn execute(
         &self,
         input: serde_json::Value,
@@ -547,6 +580,8 @@ impl ToolSpec for TodoWriteTool {
 }
 
 fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_json::Value {
+    let canonical_tool = checklist_replacement_tool_name(tool_name);
+    let compat_alias = is_compat_alias(tool_name);
     let items = snapshot
         .items
         .iter()
@@ -558,9 +593,9 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
             })
         })
         .collect::<Vec<_>>();
-    json!({
-        "canonical_tool": "checklist_write",
-        "compat_alias": tool_name.starts_with("todo_"),
+    let mut metadata = json!({
+        "canonical_tool": canonical_tool,
+        "compat_alias": compat_alias,
         "task_updates": {
             "checklist": {
                 "items": items,
@@ -569,7 +604,22 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
                 "updated_at": null
             }
         }
-    })
+    });
+    if compat_alias && let Some(obj) = metadata.as_object_mut() {
+        obj.insert(
+            "_deprecation".to_string(),
+            json!({
+                "this_tool": tool_name,
+                "use_instead": canonical_tool,
+                "first_deprecated": TODO_ALIAS_FIRST_DEPRECATED_VERSION,
+                "removed_in": TODO_ALIAS_REMOVAL_VERSION,
+                "message": format!(
+                    "Tool '{tool_name}' is a hidden compatibility alias; use '{canonical_tool}' before v{TODO_ALIAS_REMOVAL_VERSION}."
+                ),
+            }),
+        );
+    }
+    metadata
 }
 
 #[cfg(test)]
@@ -626,5 +676,54 @@ mod tests {
         assert_eq!(tool.name(), "todo_write");
         assert_eq!(metadata["canonical_tool"], "checklist_write");
         assert_eq!(metadata["compat_alias"], true);
+        assert_eq!(metadata["_deprecation"]["this_tool"], "todo_write");
+        assert_eq!(metadata["_deprecation"]["use_instead"], "checklist_write");
+        assert_eq!(metadata["_deprecation"]["removed_in"], "0.9.0");
+        assert!(!tool.model_visible());
+    }
+
+    #[tokio::test]
+    async fn todo_item_aliases_return_replacement_metadata() {
+        let list = new_shared_todo_list();
+        let context = ToolContext::new(std::env::temp_dir());
+
+        let add = TodoAddTool::new(list.clone());
+        let add_result = add
+            .execute(
+                json!({"content": "legacy add", "status": "in_progress"}),
+                &context,
+            )
+            .await
+            .expect("todo add succeeds");
+        let add_metadata = add_result.metadata.expect("add metadata");
+        assert_eq!(add_metadata["canonical_tool"], "checklist_add");
+        assert_eq!(add_metadata["_deprecation"]["use_instead"], "checklist_add");
+        assert!(!add.model_visible());
+
+        let update = TodoUpdateTool::new(list.clone());
+        let update_result = update
+            .execute(json!({"id": 1, "status": "completed"}), &context)
+            .await
+            .expect("todo update succeeds");
+        let update_metadata = update_result.metadata.expect("update metadata");
+        assert_eq!(update_metadata["canonical_tool"], "checklist_update");
+        assert_eq!(
+            update_metadata["_deprecation"]["use_instead"],
+            "checklist_update"
+        );
+        assert!(!update.model_visible());
+
+        let list_tool = TodoListTool::new(list);
+        let list_result = list_tool
+            .execute(json!({}), &context)
+            .await
+            .expect("todo list succeeds");
+        let list_metadata = list_result.metadata.expect("list metadata");
+        assert_eq!(list_metadata["canonical_tool"], "checklist_list");
+        assert_eq!(
+            list_metadata["_deprecation"]["use_instead"],
+            "checklist_list"
+        );
+        assert!(!list_tool.model_visible());
     }
 }
diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md
index 250f4483..fe0c3858 100644
--- a/docs/TOOL_SURFACE.md
+++ b/docs/TOOL_SURFACE.md
@@ -110,9 +110,14 @@ to the model, such as `mcp_<server>_<tool>`.
 | `task_cancel` | Cancel a queued or running durable task. Approval-required. |
 | `checklist_write` | Granular progress under the active thread/task. Checklist state is subordinate to the durable task. |
 | `checklist_add` / `checklist_update` / `checklist_list` | Single-item checklist operations. |
-| `todo_write` / `todo_add` / `todo_update` / `todo_list` | Compatibility aliases for the checklist tools. Existing sessions keep working, but new prompts should use `checklist_*`. |
 | `note` | One-off important fact for later. |
 
+The legacy `todo_write`, `todo_add`, `todo_update`, and `todo_list` names are
+hidden compatibility aliases for saved transcript replay. They remain callable
+by exact name, but they are not part of the model-visible catalog; compatibility
+results include `_deprecation.use_instead = checklist_*` and
+`_deprecation.removed_in = 0.9.0`.
+
 ### Verification gates and artifacts
 
 | Tool | Niche |