From 2a41102e0cdde6994086c21bfa316066fdd3f5c4 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Tue, 26 May 2026 10:08:09 -0500 Subject: [PATCH] feat: defer low-value native tools by default, reduce catalog tokens 73% (#2076) --- config.example.toml | 6 + crates/config/src/lib.rs | 19 +++ crates/tui/src/config.rs | 47 ++++++ crates/tui/src/core/engine.rs | 13 +- crates/tui/src/core/engine/tests.rs | 174 ++++++++++++++++++--- crates/tui/src/core/engine/tool_catalog.rs | 100 ++++++------ crates/tui/src/core/engine/turn_loop.rs | 2 +- crates/tui/src/main.rs | 1 + crates/tui/src/runtime_threads.rs | 1 + crates/tui/src/tui/ui.rs | 1 + docs/CONFIGURATION.md | 11 ++ scripts/measure-tool-catalog.py | 46 ++++++ 12 files changed, 346 insertions(+), 75 deletions(-) create mode 100755 scripts/measure-tool-catalog.py diff --git a/config.example.toml b/config.example.toml index 73c211f2..81332674 100644 --- a/config.example.toml +++ b/config.example.toml @@ -97,6 +97,12 @@ memory_path = "~/.deepseek/memory.md" # Parsed but currently unused (reserved for future versions): # tools_file = "./tools.json" +# Native tool catalog controls (#2076). By default only the core tool surface +# is loaded into the model context; less common native tools are discoverable +# through ToolSearch and loaded on first use. +# [tools] +# always_load = ["git_show", "notify"] + # ───────────────────────────────────────────────────────────────────────────────── # Security # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 039c28c7..11f8afe0 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -216,6 +216,9 @@ pub struct ConfigToml { pub telemetry: Option, pub approval_policy: Option, pub sandbox_mode: Option, + /// Native tool catalog controls shared with `codewhale-tui`. + #[serde(default)] + pub tools: Option, #[serde(default)] pub providers: ProvidersToml, /// Per-domain network policy (#135). When absent, network tools fall back @@ -253,6 +256,14 @@ pub struct SkillsToml { pub max_install_size_bytes: Option, } +/// On-disk schema for the `[tools]` table (#2076). +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct ToolsToml { + /// Native tool names to keep loaded outside the default core catalog. + #[serde(default)] + pub always_load: Vec, +} + /// On-disk schema for the `[snapshots]` table (#137). See /// `config.example.toml` for documentation. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -373,6 +384,13 @@ impl ConfigToml { { self.sandbox_mode = Some(mode); } + if project.tools.is_some() { + self.tools = project.tools; + } + // Provider is only overridden if explicitly set (non-default). + if project.provider != ProviderKind::Deepseek || has_api_key { + self.provider = project.provider; + } merge_project_provider_config(&mut self.providers.deepseek, &project.providers.deepseek); merge_project_provider_config( @@ -414,6 +432,7 @@ impl ConfigToml { "telemetry" => self.telemetry.map(|v| v.to_string()), "approval_policy" => self.approval_policy.clone(), "sandbox_mode" => self.sandbox_mode.clone(), + "tools.always_load" => self.tools.as_ref().map(|tools| tools.always_load.join(",")), "providers.deepseek.api_key" => self.providers.deepseek.api_key.clone(), "providers.deepseek.base_url" => self.providers.deepseek.base_url.clone(), "providers.deepseek.model" => self.providers.deepseek.model.clone(), diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 060385b6..681c7c94 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -664,6 +664,15 @@ pub struct SearchConfig { pub api_key: Option, } +/// Model-visible tool catalog controls (`[tools]` table in config.toml). +#[derive(Debug, Clone, Deserialize, Default)] +pub struct ToolsConfig { + /// Native tool names to keep loaded even when they are outside the small + /// default core catalog. Unknown names are harmless and simply never match. + #[serde(default)] + pub always_load: Vec, +} + /// One configurable footer item. /// /// Order in the user's `Vec` is preserved: items in the left @@ -953,6 +962,10 @@ pub struct Config { /// Defaults to `"max"` at runtime if unset. pub reasoning_effort: Option, pub tools_file: Option, + /// Native tool catalog controls. `tools_file` is the legacy external + /// schema path; this table controls built-in tool loading policy. + #[serde(default)] + pub tools: Option, pub skills_dir: Option, pub mcp_config_path: Option, pub notes_path: Option, @@ -1305,6 +1318,22 @@ impl Config { .unwrap_or(false) } + #[must_use] + pub fn tools_always_load(&self) -> std::collections::HashSet { + self.tools + .as_ref() + .map(|tools| { + tools + .always_load + .iter() + .map(|name| name.trim()) + .filter(|name| !name.is_empty()) + .map(ToOwned::to_owned) + .collect() + }) + .unwrap_or_default() + } + /// Load configuration from disk and merge with environment overrides. /// /// # Examples @@ -3071,6 +3100,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config { auth_mode: override_cfg.auth_mode.or(base.auth_mode), reasoning_effort: override_cfg.reasoning_effort.or(base.reasoning_effort), tools_file: override_cfg.tools_file.or(base.tools_file), + tools: override_cfg.tools.or(base.tools), skills_dir: override_cfg.skills_dir.or(base.skills_dir), mcp_config_path: override_cfg.mcp_config_path.or(base.mcp_config_path), notes_path: override_cfg.notes_path.or(base.notes_path), @@ -4089,6 +4119,23 @@ mod tests { assert_eq!(SearchProvider::default(), SearchProvider::Bing); } + #[test] + fn tools_always_load_parses_and_trims_names() { + let parsed: ConfigFile = toml::from_str( + r#" + [tools] + always_load = ["git_show", " notify ", ""] + "#, + ) + .expect("tools config"); + + let names = parsed.base.tools_always_load(); + + assert!(names.contains("git_show")); + assert!(names.contains("notify")); + assert!(!names.contains("")); + } + #[test] fn explicit_duckduckgo_search_provider_is_preserved() { let config: Config = toml::from_str( diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 1537d87d..e45dabe3 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -7,8 +7,8 @@ //! - Proper cancellation support //! - Tool execution orchestration -use std::collections::HashMap; use std::collections::hash_map::DefaultHasher; +use std::collections::{HashMap, HashSet}; use std::hash::{Hash, Hasher}; use std::path::PathBuf; use std::sync::{Arc, Mutex as StdMutex}; @@ -171,6 +171,9 @@ pub struct EngineConfig { /// once at engine construction, then threaded onto every /// `SubAgentRuntime` the engine builds (#1806, #1808). pub subagent_api_timeout: Duration, + /// Native tools that should stay in the model-visible catalog even when + /// they are outside the small default core surface (#2076). + pub tools_always_load: HashSet, /// When true and `/usr/bin/bwrap` is present on Linux, route exec_shell /// through bubblewrap instead of relying solely on Landlock (#2184). #[allow(dead_code)] // Wired through ShellManager in follow-up PR @@ -218,6 +221,7 @@ impl Default for EngineConfig { subagent_api_timeout: Duration::from_secs( crate::config::DEFAULT_SUBAGENT_API_TIMEOUT_SECS, ), + tools_always_load: HashSet::new(), prefer_bwrap: false, } } @@ -1130,7 +1134,12 @@ impl Engine { Vec::new() }; let tools = tool_registry.as_ref().map(|registry| { - build_model_tool_catalog(registry.to_api_tools_with_cache(true), mcp_tools, mode) + build_model_tool_catalog( + registry.to_api_tools_with_cache(true), + mcp_tools, + mode, + &self.config.tools_always_load, + ) }); // Main turn loop diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index e68f5fb2..6f612611 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -386,40 +386,74 @@ fn tool_exec_outcome_tracks_duration() { } #[test] -fn yolo_mode_keeps_tools_preloaded() { - assert!(!should_default_defer_tool("exec_shell", AppMode::Yolo)); +fn core_native_tools_stay_loaded_in_yolo_mode() { + let always_load = HashSet::new(); assert!(!should_default_defer_tool( - "mcp_read_resource", - AppMode::Yolo + "exec_shell", + AppMode::Yolo, + &always_load + )); + assert!(should_default_defer_tool( + "git_show", + AppMode::Yolo, + &always_load )); } #[test] fn non_yolo_mode_retains_default_defer_policy() { - // Shell tools are kept loaded in action modes so the model can verify - // work without an extra ToolSearch round-trip; non-action tools (e.g. - // MCP) still defer. - assert!(!should_default_defer_tool("exec_shell", AppMode::Agent)); - assert!(should_default_defer_tool("exec_shell", AppMode::Plan)); - assert!(!should_default_defer_tool("read_file", AppMode::Agent)); - assert!(!should_default_defer_tool("write_file", AppMode::Agent)); + let always_load = HashSet::new(); + assert!(!should_default_defer_tool( + "exec_shell", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "edit_file", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "run_tests", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "agent_open", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "read_file", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "write_file", + AppMode::Agent, + &always_load + )); assert!(should_default_defer_tool( - "mcp_read_resource", - AppMode::Agent + "git_show", + AppMode::Agent, + &always_load )); } #[test] fn model_tool_catalog_applies_native_and_mcp_deferral() { + let always_load = HashSet::new(); let catalog = build_model_tool_catalog( vec![ api_tool("read_file"), api_tool("write_file"), api_tool("exec_shell"), + api_tool("edit_file"), api_tool("project_map"), ], vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")], AppMode::Agent, + &always_load, ); let defer_loading = |name: &str| { @@ -432,11 +466,87 @@ fn model_tool_catalog_applies_native_and_mcp_deferral() { assert_eq!(defer_loading("read_file"), Some(false)); assert_eq!(defer_loading("write_file"), Some(false)); assert_eq!(defer_loading("exec_shell"), Some(false)); + assert_eq!(defer_loading("edit_file"), Some(false)); assert_eq!(defer_loading("project_map"), Some(true)); assert_eq!(defer_loading("list_mcp_resources"), Some(false)); assert_eq!(defer_loading("mcp_server_write"), Some(true)); } +#[test] +fn tools_always_load_overrides_default_native_deferral() { + let always_load = HashSet::from(["git_show".to_string()]); + assert!(!should_default_defer_tool( + "git_show", + AppMode::Agent, + &always_load + )); +} + +#[test] +#[ignore = "one-shot metric for scripts/measure-tool-catalog.py"] +fn print_agent_tool_catalog_metrics() { + let tmp = tempdir().expect("tempdir"); + let context = crate::tools::ToolContext::new(tmp.path().to_path_buf()); + let client = DeepSeekClient::new(&Config { + api_key: Some("test-key".to_string()), + ..Config::default() + }) + .expect("stub client"); + let manager = crate::tools::subagent::new_shared_subagent_manager(tmp.path().to_path_buf(), 8); + let runtime = crate::tools::subagent::SubAgentRuntime::new( + client, + DEFAULT_TEXT_MODEL.to_string(), + context.clone(), + true, + None, + manager.clone(), + ); + let registry = crate::tools::ToolRegistryBuilder::new() + .with_agent_tools(true) + .with_todo_tool(new_shared_todo_list()) + .with_plan_tool(new_shared_plan_state()) + .with_review_tool(None, DEFAULT_TEXT_MODEL.to_string()) + .with_rlm_tool(None, DEFAULT_TEXT_MODEL.to_string()) + .with_recall_archive_tool() + .with_notify_tool() + .with_subagent_tools(manager, runtime) + .build(context); + let baseline_catalog = registry.to_api_tools_with_cache(true); + let baseline_json = serde_json::to_vec(&baseline_catalog).expect("serialize baseline"); + + let always_load = HashSet::new(); + let mut catalog = build_model_tool_catalog( + baseline_catalog.clone(), + vec![], + AppMode::Agent, + &always_load, + ); + ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load); + let active = initial_active_tools(&catalog); + let active_catalog = active_tools_for_step(&catalog, &active, false); + let active_json = serde_json::to_vec(&active_catalog).expect("serialize active"); + let reduction_percent = if baseline_json.is_empty() { + 0.0 + } else { + 100.0 * (baseline_json.len().saturating_sub(active_json.len())) as f64 + / baseline_json.len() as f64 + }; + + println!( + "TOOL_CATALOG_METRICS {}", + serde_json::json!({ + "baseline_tools": baseline_catalog.len(), + "baseline_bytes": baseline_json.len(), + "baseline_tokens_est": baseline_json.len().div_ceil(4), + "active_tools": active_catalog.len(), + "active_bytes": active_json.len(), + "active_tokens_est": active_json.len().div_ceil(4), + "reduction_percent": reduction_percent, + "active_tool_names": active_catalog.iter().map(|tool| tool.name.as_str()).collect::>(), + }) + ); +} + #[test] fn deferred_edit_file_first_use_hydrates_schema_without_execution() { let mut edit = api_tool("edit_file"); @@ -511,14 +621,25 @@ fn deferred_edit_file_first_use_hydrates_schema_without_execution() { } #[test] -fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() { +fn model_tool_catalog_defers_non_core_native_tools_in_yolo_mode() { + let always_load = HashSet::new(); let catalog = build_model_tool_catalog( - vec![api_tool("project_map")], + vec![api_tool("read_file"), api_tool("project_map")], vec![api_tool("mcp_server_write")], AppMode::Yolo, + &always_load, ); - assert!(catalog.iter().all(|tool| tool.defer_loading == Some(false))); + let defer_loading = |name: &str| { + catalog + .iter() + .find(|tool| tool.name == name) + .and_then(|tool| tool.defer_loading) + }; + + assert_eq!(defer_loading("read_file"), Some(false)); + assert_eq!(defer_loading("project_map"), Some(true)); + assert_eq!(defer_loading("mcp_server_write"), Some(false)); } #[test] @@ -526,6 +647,7 @@ fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() { // Regression for #263: deterministic byte order of the tools array is a // hard requirement for DeepSeek's KV prefix cache. Built-ins stay as a // contiguous prefix; MCP tools follow. Within each partition: alphabetical. + let always_load = HashSet::new(); let catalog = build_model_tool_catalog( vec![ api_tool("read_file"), @@ -534,6 +656,7 @@ fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() { ], vec![api_tool("mcp_zoo_b"), api_tool("mcp_aardvark_a")], AppMode::Yolo, + &always_load, ); let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect(); @@ -588,11 +711,18 @@ fn deferred_tool_preflight_loads_edit_schema_without_executing_bad_aliases() { engine.config.plan_state.clone(), ) .build(engine.build_tool_context(AppMode::Agent, false)); - let catalog = build_model_tool_catalog( + let always_load = HashSet::new(); + let mut catalog = build_model_tool_catalog( registry.to_api_tools_with_cache(true), vec![], AppMode::Agent, + &always_load, ); + catalog + .iter_mut() + .find(|tool| tool.name == "edit_file") + .expect("edit_file registered") + .defer_loading = Some(true); let mut active = initial_active_tools(&catalog); assert!(!active.contains("edit_file")); @@ -633,10 +763,12 @@ fn deferred_tool_preflight_guides_checklist_update_list_replacement() { engine.config.plan_state.clone(), ) .build(engine.build_tool_context(AppMode::Agent, false)); + let always_load = HashSet::new(); let catalog = build_model_tool_catalog( registry.to_api_tools_with_cache(true), vec![], AppMode::Agent, + &always_load, ); let mut active = initial_active_tools(&catalog); assert!(!active.contains("checklist_update")); @@ -1726,7 +1858,8 @@ fn tool_search_activates_discovered_deferred_tools() { cache_control: None, }, ]; - ensure_advanced_tooling(&mut catalog, AppMode::Agent); + let always_load = HashSet::new(); + ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load); let mut active = initial_active_tools(&catalog); let result = execute_tool_search( TOOL_SEARCH_BM25_NAME, @@ -1753,7 +1886,8 @@ async fn code_execution_runs_python_and_returns_result_payload() { #[test] fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() { let mut plan_catalog = vec![api_tool("read_file")]; - ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan); + let always_load = HashSet::new(); + ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan, &always_load); assert!( !plan_catalog .iter() @@ -1762,7 +1896,7 @@ fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() { ); let mut agent_catalog = vec![api_tool("read_file")]; - ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent); + ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent, &always_load); assert!( agent_catalog .iter() diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index 3ce7cdac..c699a0a8 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -29,63 +29,48 @@ pub(super) fn is_tool_search_tool(name: &str) -> bool { matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME) } -pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { - if mode == AppMode::Yolo { +pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[ + "agent_open", + "apply_patch", + "edit_file", + "exec_shell", + "fetch_url", + "file_search", + "git_diff", + "git_status", + "grep_files", + "list_dir", + "read_file", + "run_tests", + "web_search", + "write_file", +]; + +pub(super) fn should_default_defer_tool( + name: &str, + _mode: AppMode, + always_load: &HashSet, +) -> bool { + if always_load.contains(name) { return false; } - // Shell exec tools are kept active in Agent so the model can run - // verification commands (build/test/git/cargo) without first having to - // discover them through ToolSearch. Plan mode does not register shell - // execution tools. - let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) - && matches!( - name, - "exec_shell" - | "exec_shell_wait" - | "exec_shell_interact" - | "exec_wait" - | "exec_interact" - ); - if always_loaded_in_action_modes { + if is_tool_search_tool(name) { return false; } - !matches!( - name, - "read_file" - | "write_file" - | "list_dir" - | "grep_files" - | "file_search" - | "diagnostics" - | "rlm_open" - | "rlm_eval" - | "rlm_configure" - | "rlm_close" - | "rlm_session_objects" - | "handle_read" - | "recall_archive" - | "notify" - | MULTI_TOOL_PARALLEL_NAME - | "update_plan" - | "checklist_write" - | "todo_write" - | "task_create" - | "task_list" - | "task_read" - | "task_gate_run" - | "task_shell_start" - | "task_shell_wait" - | "github_issue_context" - | "github_pr_context" - | REQUEST_USER_INPUT_NAME - ) + !DEFAULT_ACTIVE_NATIVE_TOOLS + .iter() + .any(|core_tool| core_tool == &name) } -pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], mode: AppMode) { +pub(super) fn apply_native_tool_deferral( + catalog: &mut [Tool], + mode: AppMode, + always_load: &HashSet, +) { for tool in catalog { - tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode)); + tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load)); } } @@ -111,8 +96,9 @@ pub(super) fn build_model_tool_catalog( mut native_tools: Vec, mut mcp_tools: Vec, mode: AppMode, + always_load: &HashSet, ) -> Vec { - apply_native_tool_deferral(&mut native_tools, mode); + apply_native_tool_deferral(&mut native_tools, mode, always_load); apply_mcp_tool_deferral(&mut mcp_tools, mode); // Sort each partition by name for prefix-cache stability (#263). The // upstream `to_api_tools()` already sorts the registry's HashMap output; @@ -126,7 +112,11 @@ pub(super) fn build_model_tool_catalog( native_tools } -pub(super) fn ensure_advanced_tooling(catalog: &mut Vec, mode: AppMode) { +pub(super) fn ensure_advanced_tooling( + catalog: &mut Vec, + mode: AppMode, + always_load: &HashSet, +) { // code_execution depends on a locally-installed Python interpreter // (python3 / python / py -3). Before v0.8.31, the tool was always // advertised and would fail at execution time on Windows where @@ -150,7 +140,11 @@ pub(super) fn ensure_advanced_tooling(catalog: &mut Vec, mode: AppMode) { "required": ["code"] }), allowed_callers: Some(vec!["direct".to_string()]), - defer_loading: Some(false), + defer_loading: Some(should_default_defer_tool( + CODE_EXECUTION_TOOL_NAME, + mode, + always_load, + )), input_examples: None, strict: None, cache_control: None, @@ -166,7 +160,9 @@ pub(super) fn ensure_advanced_tooling(catalog: &mut Vec, mode: AppMode) { && !catalog.iter().any(|t| t.name == JS_EXECUTION_TOOL_NAME) && crate::dependencies::resolve_node().is_some() { - catalog.push(crate::tools::js_execution::js_execution_tool_definition()); + let mut tool = crate::tools::js_execution::js_execution_tool_definition(); + tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load)); + catalog.push(tool); } if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) { diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 70f94f25..ca5d6b89 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -30,7 +30,7 @@ impl Engine { let mut context_recovery_attempts = 0u8; let mut tool_catalog = tools.unwrap_or_default(); if !tool_catalog.is_empty() { - ensure_advanced_tooling(&mut tool_catalog, mode); + ensure_advanced_tooling(&mut tool_catalog, mode, &self.config.tools_always_load); } let mut active_tool_names = initial_active_tools(&tool_catalog); let mut loop_guard = LoopGuard::default(); diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 252ea9f5..70af305b 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -5161,6 +5161,7 @@ async fn run_exec_agent( .and_then(|s| s.provider) .unwrap_or_default(), search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()), + tools_always_load: config.tools_always_load(), }; let engine_handle = spawn_engine(engine_config, config); diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index c2fca300..49cd02d1 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -1995,6 +1995,7 @@ impl RuntimeThreadManager { .and_then(|s| s.provider) .unwrap_or_default(), search_api_key: self.config.search.as_ref().and_then(|s| s.api_key.clone()), + tools_always_load: self.config.tools_always_load(), }; let engine = spawn_engine(engine_cfg, &self.config); diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index dbe69d38..bf5e9e3d 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -728,6 +728,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { .and_then(|s| s.provider) .unwrap_or_default(), search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()), + tools_always_load: config.tools_always_load(), } } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index ed15c9d6..ddd7153d 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -618,6 +618,17 @@ These keys are accepted by the config loader but not currently used by the inter - `tools_file` +## Tool Catalog + +CodeWhale loads a small core native tool catalog by default and leaves less +common native tools discoverable through ToolSearch. To keep specific native +tools loaded on every request, add them to `[tools].always_load`: + +```toml +[tools] +always_load = ["git_show", "notify"] +``` + ## Feature Flags Feature flags live under the `[features]` table and are merged across profiles. diff --git a/scripts/measure-tool-catalog.py b/scripts/measure-tool-catalog.py new file mode 100755 index 00000000..c5b40367 --- /dev/null +++ b/scripts/measure-tool-catalog.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +"""Measure serialized tool catalog size before and after default deferral. + +This delegates catalog construction to an ignored Rust test so the measurement +uses the same tool definitions, JSON serialization, and deferral policy as the +runtime. Token counts are deterministic estimates using ceil(serialized_bytes/4). +""" + +from __future__ import annotations + +import json +import subprocess +import sys + + +MARKER = "TOOL_CATALOG_METRICS " + + +def main() -> int: + cmd = [ + "cargo", + "test", + "-p", + "codewhale-tui", + "print_agent_tool_catalog_metrics", + "--", + "--ignored", + "--nocapture", + "--test-threads=1", + ] + proc = subprocess.run(cmd, text=True, capture_output=True, check=False) + sys.stderr.write(proc.stderr) + + for line in proc.stdout.splitlines(): + if MARKER in line: + metrics = json.loads(line.split(MARKER, 1)[1]) + print(json.dumps(metrics, indent=2, sort_keys=True)) + return proc.returncode + + sys.stdout.write(proc.stdout) + sys.stderr.write("missing TOOL_CATALOG_METRICS marker\n") + return proc.returncode or 1 + + +if __name__ == "__main__": + raise SystemExit(main())