feat(tools): hide todo_* aliases from model catalog, add deprecation metadata (#2682)

- Add model_visible() hook to ToolSpec trait (default true)
- Override model_visible() -> false on todo_write, todo_add, todo_update, todo_list
- Checklist variants remain model-visible as the canonical surface
- Legacy todo_* calls still work for saved transcript replay
- Return _deprecation metadata with use_instead and removed_in=0.9.0
- Update prompts to recommend checklist_* only
- Update TOOL_SURFACE.md with v0.9.0 deprecation notes
- Add tests for hidden catalog, compat alias behavior, and metadata

Verification: cargo test -p codewhale-tui -- todo, cargo clippy -D warnings
This commit is contained in:
Hunter B
2026-06-03 19:20:23 -07:00
parent 8dff2f7525
commit f7a602cd20
6 changed files with 158 additions and 8 deletions
+1 -1
View File
@@ -242,7 +242,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
## Toolbox (fast reference — tool descriptions are authoritative)
- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
- **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait.
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
+1 -1
View File
@@ -37,7 +37,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`
## Toolbox (fast reference — tool descriptions are authoritative)
- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `todo_*` aliases (legacy compatibility), `note` (persistent memory).
- **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory).
- **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs.
- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`.
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools.
+38
View File
@@ -222,6 +222,7 @@ impl ToolRegistry {
tools.sort_by(|a, b| a.name().cmp(b.name()));
tools
.into_iter()
.filter(|tool| tool.model_visible())
.map(|tool| {
let mut schema = tool.input_schema();
schema_sanitize::sanitize(&mut schema);
@@ -1202,6 +1203,43 @@ mod tests {
assert_eq!(registry.len(), 1);
}
#[test]
fn todo_aliases_stay_callable_but_hidden_from_model_catalog() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path().to_path_buf());
let registry = ToolRegistryBuilder::new()
.with_todo_tool(crate::tools::todo::new_shared_todo_list())
.build(ctx);
for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
assert!(registry.contains(alias), "{alias} should remain callable");
}
let api_names = registry
.to_api_tools()
.into_iter()
.map(|tool| tool.name)
.collect::<Vec<_>>();
for canonical in [
"checklist_write",
"checklist_add",
"checklist_update",
"checklist_list",
] {
assert!(
api_names.iter().any(|name| name == canonical),
"{canonical} should stay model-visible"
);
}
for alias in ["todo_write", "todo_add", "todo_update", "todo_list"] {
assert!(
api_names.iter().all(|name| name != alias),
"{alias} should be hidden from the model catalog"
);
}
}
#[test]
fn apply_overrides_removes_original_when_replacement_is_missing() {
let tmp = tempdir().expect("tempdir");
+8
View File
@@ -664,6 +664,14 @@ pub trait ToolSpec: Send + Sync {
false
}
/// Returns whether this tool should be advertised in the model-facing
/// catalog. Hidden compatibility tools remain registered and executable
/// by name so saved transcripts can replay without teaching new sessions
/// the deprecated spelling.
fn model_visible(&self) -> bool {
true
}
/// Execute the tool with the given input and context.
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError>;
}
+104 -5
View File
@@ -174,6 +174,22 @@ pub fn new_shared_todo_list() -> SharedTodoList {
Arc::new(Mutex::new(TodoList::new()))
}
const TODO_ALIAS_FIRST_DEPRECATED_VERSION: &str = "0.8.53";
const TODO_ALIAS_REMOVAL_VERSION: &str = "0.9.0";
fn is_compat_alias(tool_name: &str) -> bool {
tool_name.starts_with("todo_")
}
fn checklist_replacement_tool_name(tool_name: &str) -> &'static str {
match tool_name {
"todo_add" | "checklist_add" => "checklist_add",
"todo_update" | "checklist_update" => "checklist_update",
"todo_list" | "checklist_list" => "checklist_list",
_ => "checklist_write",
}
}
/// Tool for writing and updating the todo list
pub struct TodoWriteTool {
todo_list: SharedTodoList,
@@ -258,6 +274,10 @@ impl ToolSpec for TodoAddTool {
ApprovalRequirement::Auto
}
fn model_visible(&self) -> bool {
!is_compat_alias(self.tool_name)
}
async fn execute(
&self,
input: serde_json::Value,
@@ -350,6 +370,10 @@ impl ToolSpec for TodoUpdateTool {
ApprovalRequirement::Auto
}
fn model_visible(&self) -> bool {
!is_compat_alias(self.tool_name)
}
async fn execute(
&self,
input: serde_json::Value,
@@ -435,6 +459,10 @@ impl ToolSpec for TodoListTool {
ApprovalRequirement::Auto
}
fn model_visible(&self) -> bool {
!is_compat_alias(self.tool_name)
}
async fn execute(
&self,
_input: serde_json::Value,
@@ -448,7 +476,8 @@ impl ToolSpec for TodoListTool {
snapshot.items.len(),
snapshot.completion_pct,
result
)))
))
.with_metadata(checklist_metadata(&snapshot, self.tool_name)))
}
}
@@ -502,6 +531,10 @@ impl ToolSpec for TodoWriteTool {
ApprovalRequirement::Auto
}
fn model_visible(&self) -> bool {
!is_compat_alias(self.tool_name)
}
async fn execute(
&self,
input: serde_json::Value,
@@ -547,6 +580,8 @@ impl ToolSpec for TodoWriteTool {
}
fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_json::Value {
let canonical_tool = checklist_replacement_tool_name(tool_name);
let compat_alias = is_compat_alias(tool_name);
let items = snapshot
.items
.iter()
@@ -558,9 +593,9 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
})
})
.collect::<Vec<_>>();
json!({
"canonical_tool": "checklist_write",
"compat_alias": tool_name.starts_with("todo_"),
let mut metadata = json!({
"canonical_tool": canonical_tool,
"compat_alias": compat_alias,
"task_updates": {
"checklist": {
"items": items,
@@ -569,7 +604,22 @@ fn checklist_metadata(snapshot: &TodoListSnapshot, tool_name: &str) -> serde_jso
"updated_at": null
}
}
})
});
if compat_alias && let Some(obj) = metadata.as_object_mut() {
obj.insert(
"_deprecation".to_string(),
json!({
"this_tool": tool_name,
"use_instead": canonical_tool,
"first_deprecated": TODO_ALIAS_FIRST_DEPRECATED_VERSION,
"removed_in": TODO_ALIAS_REMOVAL_VERSION,
"message": format!(
"Tool '{tool_name}' is a hidden compatibility alias; use '{canonical_tool}' before v{TODO_ALIAS_REMOVAL_VERSION}."
),
}),
);
}
metadata
}
#[cfg(test)]
@@ -626,5 +676,54 @@ mod tests {
assert_eq!(tool.name(), "todo_write");
assert_eq!(metadata["canonical_tool"], "checklist_write");
assert_eq!(metadata["compat_alias"], true);
assert_eq!(metadata["_deprecation"]["this_tool"], "todo_write");
assert_eq!(metadata["_deprecation"]["use_instead"], "checklist_write");
assert_eq!(metadata["_deprecation"]["removed_in"], "0.9.0");
assert!(!tool.model_visible());
}
#[tokio::test]
async fn todo_item_aliases_return_replacement_metadata() {
let list = new_shared_todo_list();
let context = ToolContext::new(std::env::temp_dir());
let add = TodoAddTool::new(list.clone());
let add_result = add
.execute(
json!({"content": "legacy add", "status": "in_progress"}),
&context,
)
.await
.expect("todo add succeeds");
let add_metadata = add_result.metadata.expect("add metadata");
assert_eq!(add_metadata["canonical_tool"], "checklist_add");
assert_eq!(add_metadata["_deprecation"]["use_instead"], "checklist_add");
assert!(!add.model_visible());
let update = TodoUpdateTool::new(list.clone());
let update_result = update
.execute(json!({"id": 1, "status": "completed"}), &context)
.await
.expect("todo update succeeds");
let update_metadata = update_result.metadata.expect("update metadata");
assert_eq!(update_metadata["canonical_tool"], "checklist_update");
assert_eq!(
update_metadata["_deprecation"]["use_instead"],
"checklist_update"
);
assert!(!update.model_visible());
let list_tool = TodoListTool::new(list);
let list_result = list_tool
.execute(json!({}), &context)
.await
.expect("todo list succeeds");
let list_metadata = list_result.metadata.expect("list metadata");
assert_eq!(list_metadata["canonical_tool"], "checklist_list");
assert_eq!(
list_metadata["_deprecation"]["use_instead"],
"checklist_list"
);
assert!(!list_tool.model_visible());
}
}
+6 -1
View File
@@ -110,9 +110,14 @@ to the model, such as `mcp_<server>_<tool>`.
| `task_cancel` | Cancel a queued or running durable task. Approval-required. |
| `checklist_write` | Granular progress under the active thread/task. Checklist state is subordinate to the durable task. |
| `checklist_add` / `checklist_update` / `checklist_list` | Single-item checklist operations. |
| `todo_write` / `todo_add` / `todo_update` / `todo_list` | Compatibility aliases for the checklist tools. Existing sessions keep working, but new prompts should use `checklist_*`. |
| `note` | One-off important fact for later. |
The legacy `todo_write`, `todo_add`, `todo_update`, and `todo_list` names are
hidden compatibility aliases for saved transcript replay. They remain callable
by exact name, but they are not part of the model-visible catalog; compatibility
results include `_deprecation.use_instead = checklist_*` and
`_deprecation.removed_in = 0.9.0`.
### Verification gates and artifacts
| Tool | Niche |