feat(tools): hide todo_* aliases from model catalog, add deprecation metadata (#2682)

- Add model_visible() hook to ToolSpec trait (default true) - Override model_visible() -> false on todo_write, todo_add, todo_update, todo_list - Checklist variants remain model-visible as the canonical surface - Legacy todo_* calls still work for saved transcript replay - Return _deprecation metadata with use_instead and removed_in=0.9.0 - Update prompts to recommend checklist_* only - Update TOOL_SURFACE.md with v0.9.0 deprecation notes - Add tests for hidden catalog, compat alias behavior, and metadata Verification: cargo test -p codewhale-tui -- todo, cargo clippy -D warnings
2026-06-03 19:20:23 -07:00
parent 8dff2f7525
commit f7a602cd20
6 changed files with 158 additions and 8 deletions
@@ -242,7 +242,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise

 ## Toolbox (fast reference — tool descriptions are authoritative)

- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
+- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
 - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
 - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait.
 - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
@@ -37,7 +37,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`

 ## Toolbox (fast reference — tool descriptions are authoritative)

- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
+- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
 - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
 - **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`.
 - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
@@ -222,6 +222,7 @@ impl ToolRegistry {
        tools.sort_by(|a, b| a.name().cmp(b.name()));
        tools
            .into_iter()
+            .filter(|tool| tool.model_visible())
            .map(|tool| {
                let mut schema = tool.input_schema();
                schema_sanitize::sanitize(&mut schema);
@@ -1202,6 +1203,43 @@ mod tests {
        assert_eq!(registry.len(), 1);
    }

+    #[test]
+    fn todo_aliases_stay_callable_but_hidden_from_model_catalog() {
+        let tmp = tempdir().expect("tempdir");
+        let ctx = ToolContext::new(tmp.path().to_path_buf());
+        let registry = ToolRegistryBuilder::new()
+            .with_todo_tool(crate::tools::todo::new_shared_todo_list())
+            .build(ctx);
+
+        for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
+            assert!(registry.contains(alias), "{alias} should remain callable");
+        }
+
+        let api_names = registry
+            .to_api_tools()
+            .into_iter()
+            .map(|tool| tool.name)
+            .collect::<Vec<_>>();
+
+        for canonical in [
+            "checklist_write",
+            "checklist_add",
+            "checklist_update",
+            "checklist_list",
+        ] {
+            assert!(
+                api_names.iter().any(|name| name == canonical),
+                "{canonical} should stay model-visible"
+            );
+        }
+        for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
+            assert!(
+                api_names.iter().all(|name| name != alias),
+                "{alias} should be hidden from the model catalog"
+            );
+        }
+    }
+
    #[test]
    fn apply_overrides_removes_original_when_replacement_is_missing() {
        let tmp = tempdir().expect("tempdir");
@@ -664,6 +664,14 @@ pub trait ToolSpec: Send + Sync {
        false
    }

+    /// Returns whether this tool should be advertised in the model-facing
+    /// catalog. Hidden compatibility tools remain registered and executable
+    /// by name so saved transcripts can replay without teaching new sessions
+    /// the deprecated spelling.
+    fn model_visible(&self) -> bool {
+        true
+    }
+
    /// Execute the tool with the given input and context.
    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError>;
 }
@@ -174,6 +174,22 @@ pub fn new_shared_todo_list() -> SharedTodoList {
    Arc::new(Mutex::new(TodoList::new()))
 }

+const TODO_ALIAS_FIRST_DEPRECATED_VERSION: &str = "0.8.53";
+const TODO_ALIAS_REMOVAL_VERSION: &str = "0.9.0";
+
+fn is_compat_alias(tool_name: &str) -> bool {
+    tool_name.starts_with("todo_")
+}
+
+fn checklist_replacement_tool_name(tool_name: &str) -> &'static str {
+    match tool_name {
+        "todo_add" | "checklist_add" => "checklist_add",
+        "todo_update" | "checklist_update" => "checklist_update",
+        "todo_list" | "checklist_list" => "checklist_list",
+        _ => "checklist_write",
+    }
+}
+
 /// Tool for writing and updating the todo list
 pub struct TodoWriteTool {
    todo_list: SharedTodoList,
@@ -258,6 +274,10 @@ impl ToolSpec for TodoAddTool {
        ApprovalRequirement::Auto
    }

+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
    async fn execute(
        &self,
        input: serde_json::Value,
@@ -350,6 +370,10 @@ impl ToolSpec for TodoUpdateTool {
        ApprovalRequirement::Auto
    }

+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
    async fn execute(
        &self,
        input: serde_json::Value,
@@ -435,6 +459,10 @@ impl ToolSpec for TodoListTool {
        ApprovalRequirement::Auto
    }

+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
    async fn execute(
        &self,
        _input: serde_json::Value,
@@ -448,7 +476,8 @@ impl ToolSpec for TodoListTool {
            snapshot.items.len(),
            snapshot.completion_pct,
            result
-        )))
+        ))
+        .with_metadata(checklist_metadata(&snapshot, self.tool_name)))
    }
 }

@@ -502,6 +531,10 @@ impl ToolSpec for TodoWriteTool {
        ApprovalRequirement::Auto
    }

+    fn model_visible(&self) -> bool {
+        !is_compat_alias(self.tool_name)
+    }
+
    async fn execute(
        &self,
        input: serde_json::Value,
@@ -547,6 +580,8 @@ impl ToolSpec for TodoWriteTool {
 }

 fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_json::Value {
+    let canonical_tool = checklist_replacement_tool_name(tool_name);
+    let compat_alias = is_compat_alias(tool_name);
    let items = snapshot
        .items
        .iter()
@@ -558,9 +593,9 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
            })
        })
        .collect::<Vec<_>>();
-    json!({
-        "canonical_tool": "checklist_write",
-        "compat_alias": tool_name.starts_with("todo_"),
+    let mut metadata = json!({
+        "canonical_tool": canonical_tool,
+        "compat_alias": compat_alias,
        "task_updates": {
            "checklist": {
                "items": items,
@@ -569,7 +604,22 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
                "updated_at": null
            }
        }
-    })
+    });
+    if compat_alias && let Some(obj) = metadata.as_object_mut() {
+        obj.insert(
+            "_deprecation".to_string(),
+            json!({
+                "this_tool": tool_name,
+                "use_instead": canonical_tool,
+                "first_deprecated": TODO_ALIAS_FIRST_DEPRECATED_VERSION,
+                "removed_in": TODO_ALIAS_REMOVAL_VERSION,
+                "message": format!(
+                    "Tool '{tool_name}' is a hidden compatibility alias; use '{canonical_tool}' before v{TODO_ALIAS_REMOVAL_VERSION}."
+                ),
+            }),
+        );
+    }
+    metadata
 }

 #[cfg(test)]
@@ -626,5 +676,54 @@ mod tests {
        assert_eq!(tool.name(), "todo_write");
        assert_eq!(metadata["canonical_tool"], "checklist_write");
        assert_eq!(metadata["compat_alias"], true);
+        assert_eq!(metadata["_deprecation"]["this_tool"], "todo_write");
+        assert_eq!(metadata["_deprecation"]["use_instead"], "checklist_write");
+        assert_eq!(metadata["_deprecation"]["removed_in"], "0.9.0");
+        assert!(!tool.model_visible());
+    }
+
+    #[tokio::test]
+    async fn todo_item_aliases_return_replacement_metadata() {
+        let list = new_shared_todo_list();
+        let context = ToolContext::new(std::env::temp_dir());
+
+        let add = TodoAddTool::new(list.clone());
+        let add_result = add
+            .execute(
+                json!({"content": "legacy add", "status": "in_progress"}),
+                &context,
+            )
+            .await
+            .expect("todo add succeeds");
+        let add_metadata = add_result.metadata.expect("add metadata");
+        assert_eq!(add_metadata["canonical_tool"], "checklist_add");
+        assert_eq!(add_metadata["_deprecation"]["use_instead"], "checklist_add");
+        assert!(!add.model_visible());
+
+        let update = TodoUpdateTool::new(list.clone());
+        let update_result = update
+            .execute(json!({"id": 1, "status": "completed"}), &context)
+            .await
+            .expect("todo update succeeds");
+        let update_metadata = update_result.metadata.expect("update metadata");
+        assert_eq!(update_metadata["canonical_tool"], "checklist_update");
+        assert_eq!(
+            update_metadata["_deprecation"]["use_instead"],
+            "checklist_update"
+        );
+        assert!(!update.model_visible());
+
+        let list_tool = TodoListTool::new(list);
+        let list_result = list_tool
+            .execute(json!({}), &context)
+            .await
+            .expect("todo list succeeds");
+        let list_metadata = list_result.metadata.expect("list metadata");
+        assert_eq!(list_metadata["canonical_tool"], "checklist_list");
+        assert_eq!(
+            list_metadata["_deprecation"]["use_instead"],
+            "checklist_list"
+        );
+        assert!(!list_tool.model_visible());
    }
 }
@@ -110,9 +110,14 @@ to the model, such as `mcp_<server>_<tool>`.
 | `task_cancel` | Cancel a queued or running durable task. Approval-required. |
 | `checklist_write` | Granular progress under the active thread/task. Checklist state is subordinate to the durable task. |
 | `checklist_add` / `checklist_update` / `checklist_list` | Single-item checklist operations. |
-| `todo_write` / `todo_add` / `todo_update` / `todo_list` | Compatibility aliases for the checklist tools. Existing sessions keep working, but new prompts should use `checklist_*`. |
 | `note` | One-off important fact for later. |

+The legacy `todo_write`, `todo_add`, `todo_update`, and `todo_list` names are
+hidden compatibility aliases for saved transcript replay. They remain callable
+by exact name, but they are not part of the model-visible catalog; compatibility
+results include `_deprecation.use_instead = checklist_*` and
+`_deprecation.removed_in = 0.9.0`.
+
 ### Verification gates and artifacts

 | Tool | Niche |