feat: defer low-value native tools by default, reduce catalog tokens 73% (#2076)
This commit is contained in:
@@ -97,6 +97,12 @@ memory_path = "~/.deepseek/memory.md"
|
||||
# Parsed but currently unused (reserved for future versions):
|
||||
# tools_file = "./tools.json"
|
||||
|
||||
# Native tool catalog controls (#2076). By default only the core tool surface
|
||||
# is loaded into the model context; less common native tools are discoverable
|
||||
# through ToolSearch and loaded on first use.
|
||||
# [tools]
|
||||
# always_load = ["git_show", "notify"]
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Security
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -216,6 +216,9 @@ pub struct ConfigToml {
|
||||
pub telemetry: Option<bool>,
|
||||
pub approval_policy: Option<String>,
|
||||
pub sandbox_mode: Option<String>,
|
||||
/// Native tool catalog controls shared with `codewhale-tui`.
|
||||
#[serde(default)]
|
||||
pub tools: Option<ToolsToml>,
|
||||
#[serde(default)]
|
||||
pub providers: ProvidersToml,
|
||||
/// Per-domain network policy (#135). When absent, network tools fall back
|
||||
@@ -253,6 +256,14 @@ pub struct SkillsToml {
|
||||
pub max_install_size_bytes: Option<u64>,
|
||||
}
|
||||
|
||||
/// On-disk schema for the `[tools]` table (#2076).
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct ToolsToml {
|
||||
/// Native tool names to keep loaded outside the default core catalog.
|
||||
#[serde(default)]
|
||||
pub always_load: Vec<String>,
|
||||
}
|
||||
|
||||
/// On-disk schema for the `[snapshots]` table (#137). See
|
||||
/// `config.example.toml` for documentation.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -373,6 +384,13 @@ impl ConfigToml {
|
||||
{
|
||||
self.sandbox_mode = Some(mode);
|
||||
}
|
||||
if project.tools.is_some() {
|
||||
self.tools = project.tools;
|
||||
}
|
||||
// Provider is only overridden if explicitly set (non-default).
|
||||
if project.provider != ProviderKind::Deepseek || has_api_key {
|
||||
self.provider = project.provider;
|
||||
}
|
||||
|
||||
merge_project_provider_config(&mut self.providers.deepseek, &project.providers.deepseek);
|
||||
merge_project_provider_config(
|
||||
@@ -414,6 +432,7 @@ impl ConfigToml {
|
||||
"telemetry" => self.telemetry.map(|v| v.to_string()),
|
||||
"approval_policy" => self.approval_policy.clone(),
|
||||
"sandbox_mode" => self.sandbox_mode.clone(),
|
||||
"tools.always_load" => self.tools.as_ref().map(|tools| tools.always_load.join(",")),
|
||||
"providers.deepseek.api_key" => self.providers.deepseek.api_key.clone(),
|
||||
"providers.deepseek.base_url" => self.providers.deepseek.base_url.clone(),
|
||||
"providers.deepseek.model" => self.providers.deepseek.model.clone(),
|
||||
|
||||
@@ -664,6 +664,15 @@ pub struct SearchConfig {
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
|
||||
/// Model-visible tool catalog controls (`[tools]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct ToolsConfig {
|
||||
/// Native tool names to keep loaded even when they are outside the small
|
||||
/// default core catalog. Unknown names are harmless and simply never match.
|
||||
#[serde(default)]
|
||||
pub always_load: Vec<String>,
|
||||
}
|
||||
|
||||
/// One configurable footer item.
|
||||
///
|
||||
/// Order in the user's `Vec<StatusItem>` is preserved: items in the left
|
||||
@@ -953,6 +962,10 @@ pub struct Config {
|
||||
/// Defaults to `"max"` at runtime if unset.
|
||||
pub reasoning_effort: Option<String>,
|
||||
pub tools_file: Option<String>,
|
||||
/// Native tool catalog controls. `tools_file` is the legacy external
|
||||
/// schema path; this table controls built-in tool loading policy.
|
||||
#[serde(default)]
|
||||
pub tools: Option<ToolsConfig>,
|
||||
pub skills_dir: Option<String>,
|
||||
pub mcp_config_path: Option<String>,
|
||||
pub notes_path: Option<String>,
|
||||
@@ -1305,6 +1318,22 @@ impl Config {
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn tools_always_load(&self) -> std::collections::HashSet<String> {
|
||||
self.tools
|
||||
.as_ref()
|
||||
.map(|tools| {
|
||||
tools
|
||||
.always_load
|
||||
.iter()
|
||||
.map(|name| name.trim())
|
||||
.filter(|name| !name.is_empty())
|
||||
.map(ToOwned::to_owned)
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
/// Load configuration from disk and merge with environment overrides.
|
||||
///
|
||||
/// # Examples
|
||||
@@ -3071,6 +3100,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
|
||||
auth_mode: override_cfg.auth_mode.or(base.auth_mode),
|
||||
reasoning_effort: override_cfg.reasoning_effort.or(base.reasoning_effort),
|
||||
tools_file: override_cfg.tools_file.or(base.tools_file),
|
||||
tools: override_cfg.tools.or(base.tools),
|
||||
skills_dir: override_cfg.skills_dir.or(base.skills_dir),
|
||||
mcp_config_path: override_cfg.mcp_config_path.or(base.mcp_config_path),
|
||||
notes_path: override_cfg.notes_path.or(base.notes_path),
|
||||
@@ -4089,6 +4119,23 @@ mod tests {
|
||||
assert_eq!(SearchProvider::default(), SearchProvider::Bing);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tools_always_load_parses_and_trims_names() {
|
||||
let parsed: ConfigFile = toml::from_str(
|
||||
r#"
|
||||
[tools]
|
||||
always_load = ["git_show", " notify ", ""]
|
||||
"#,
|
||||
)
|
||||
.expect("tools config");
|
||||
|
||||
let names = parsed.base.tools_always_load();
|
||||
|
||||
assert!(names.contains("git_show"));
|
||||
assert!(names.contains("notify"));
|
||||
assert!(!names.contains(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_duckduckgo_search_provider_is_preserved() {
|
||||
let config: Config = toml::from_str(
|
||||
|
||||
@@ -7,8 +7,8 @@
|
||||
//! - Proper cancellation support
|
||||
//! - Tool execution orchestration
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::{Arc, Mutex as StdMutex};
|
||||
@@ -171,6 +171,9 @@ pub struct EngineConfig {
|
||||
/// once at engine construction, then threaded onto every
|
||||
/// `SubAgentRuntime` the engine builds (#1806, #1808).
|
||||
pub subagent_api_timeout: Duration,
|
||||
/// Native tools that should stay in the model-visible catalog even when
|
||||
/// they are outside the small default core surface (#2076).
|
||||
pub tools_always_load: HashSet<String>,
|
||||
/// When true and `/usr/bin/bwrap` is present on Linux, route exec_shell
|
||||
/// through bubblewrap instead of relying solely on Landlock (#2184).
|
||||
#[allow(dead_code)] // Wired through ShellManager in follow-up PR
|
||||
@@ -218,6 +221,7 @@ impl Default for EngineConfig {
|
||||
subagent_api_timeout: Duration::from_secs(
|
||||
crate::config::DEFAULT_SUBAGENT_API_TIMEOUT_SECS,
|
||||
),
|
||||
tools_always_load: HashSet::new(),
|
||||
prefer_bwrap: false,
|
||||
}
|
||||
}
|
||||
@@ -1130,7 +1134,12 @@ impl Engine {
|
||||
Vec::new()
|
||||
};
|
||||
let tools = tool_registry.as_ref().map(|registry| {
|
||||
build_model_tool_catalog(registry.to_api_tools_with_cache(true), mcp_tools, mode)
|
||||
build_model_tool_catalog(
|
||||
registry.to_api_tools_with_cache(true),
|
||||
mcp_tools,
|
||||
mode,
|
||||
&self.config.tools_always_load,
|
||||
)
|
||||
});
|
||||
|
||||
// Main turn loop
|
||||
|
||||
@@ -386,40 +386,74 @@ fn tool_exec_outcome_tracks_duration() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn yolo_mode_keeps_tools_preloaded() {
|
||||
assert!(!should_default_defer_tool("exec_shell", AppMode::Yolo));
|
||||
fn core_native_tools_stay_loaded_in_yolo_mode() {
|
||||
let always_load = HashSet::new();
|
||||
assert!(!should_default_defer_tool(
|
||||
"mcp_read_resource",
|
||||
AppMode::Yolo
|
||||
"exec_shell",
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
assert!(should_default_defer_tool(
|
||||
"git_show",
|
||||
AppMode::Yolo,
|
||||
&always_load
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_yolo_mode_retains_default_defer_policy() {
|
||||
// Shell tools are kept loaded in action modes so the model can verify
|
||||
// work without an extra ToolSearch round-trip; non-action tools (e.g.
|
||||
// MCP) still defer.
|
||||
assert!(!should_default_defer_tool("exec_shell", AppMode::Agent));
|
||||
assert!(should_default_defer_tool("exec_shell", AppMode::Plan));
|
||||
assert!(!should_default_defer_tool("read_file", AppMode::Agent));
|
||||
assert!(!should_default_defer_tool("write_file", AppMode::Agent));
|
||||
let always_load = HashSet::new();
|
||||
assert!(!should_default_defer_tool(
|
||||
"exec_shell",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"edit_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"run_tests",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_open",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"read_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"write_file",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(should_default_defer_tool(
|
||||
"mcp_read_resource",
|
||||
AppMode::Agent
|
||||
"git_show",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_tool_catalog_applies_native_and_mcp_deferral() {
|
||||
let always_load = HashSet::new();
|
||||
let catalog = build_model_tool_catalog(
|
||||
vec![
|
||||
api_tool("read_file"),
|
||||
api_tool("write_file"),
|
||||
api_tool("exec_shell"),
|
||||
api_tool("edit_file"),
|
||||
api_tool("project_map"),
|
||||
],
|
||||
vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")],
|
||||
AppMode::Agent,
|
||||
&always_load,
|
||||
);
|
||||
|
||||
let defer_loading = |name: &str| {
|
||||
@@ -432,11 +466,87 @@ fn model_tool_catalog_applies_native_and_mcp_deferral() {
|
||||
assert_eq!(defer_loading("read_file"), Some(false));
|
||||
assert_eq!(defer_loading("write_file"), Some(false));
|
||||
assert_eq!(defer_loading("exec_shell"), Some(false));
|
||||
assert_eq!(defer_loading("edit_file"), Some(false));
|
||||
assert_eq!(defer_loading("project_map"), Some(true));
|
||||
assert_eq!(defer_loading("list_mcp_resources"), Some(false));
|
||||
assert_eq!(defer_loading("mcp_server_write"), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tools_always_load_overrides_default_native_deferral() {
|
||||
let always_load = HashSet::from(["git_show".to_string()]);
|
||||
assert!(!should_default_defer_tool(
|
||||
"git_show",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "one-shot metric for scripts/measure-tool-catalog.py"]
|
||||
fn print_agent_tool_catalog_metrics() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let context = crate::tools::ToolContext::new(tmp.path().to_path_buf());
|
||||
let client = DeepSeekClient::new(&Config {
|
||||
api_key: Some("test-key".to_string()),
|
||||
..Config::default()
|
||||
})
|
||||
.expect("stub client");
|
||||
let manager = crate::tools::subagent::new_shared_subagent_manager(tmp.path().to_path_buf(), 8);
|
||||
let runtime = crate::tools::subagent::SubAgentRuntime::new(
|
||||
client,
|
||||
DEFAULT_TEXT_MODEL.to_string(),
|
||||
context.clone(),
|
||||
true,
|
||||
None,
|
||||
manager.clone(),
|
||||
);
|
||||
let registry = crate::tools::ToolRegistryBuilder::new()
|
||||
.with_agent_tools(true)
|
||||
.with_todo_tool(new_shared_todo_list())
|
||||
.with_plan_tool(new_shared_plan_state())
|
||||
.with_review_tool(None, DEFAULT_TEXT_MODEL.to_string())
|
||||
.with_rlm_tool(None, DEFAULT_TEXT_MODEL.to_string())
|
||||
.with_recall_archive_tool()
|
||||
.with_notify_tool()
|
||||
.with_subagent_tools(manager, runtime)
|
||||
.build(context);
|
||||
let baseline_catalog = registry.to_api_tools_with_cache(true);
|
||||
let baseline_json = serde_json::to_vec(&baseline_catalog).expect("serialize baseline");
|
||||
|
||||
let always_load = HashSet::new();
|
||||
let mut catalog = build_model_tool_catalog(
|
||||
baseline_catalog.clone(),
|
||||
vec![],
|
||||
AppMode::Agent,
|
||||
&always_load,
|
||||
);
|
||||
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
|
||||
let active = initial_active_tools(&catalog);
|
||||
let active_catalog = active_tools_for_step(&catalog, &active, false);
|
||||
let active_json = serde_json::to_vec(&active_catalog).expect("serialize active");
|
||||
let reduction_percent = if baseline_json.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
100.0 * (baseline_json.len().saturating_sub(active_json.len())) as f64
|
||||
/ baseline_json.len() as f64
|
||||
};
|
||||
|
||||
println!(
|
||||
"TOOL_CATALOG_METRICS {}",
|
||||
serde_json::json!({
|
||||
"baseline_tools": baseline_catalog.len(),
|
||||
"baseline_bytes": baseline_json.len(),
|
||||
"baseline_tokens_est": baseline_json.len().div_ceil(4),
|
||||
"active_tools": active_catalog.len(),
|
||||
"active_bytes": active_json.len(),
|
||||
"active_tokens_est": active_json.len().div_ceil(4),
|
||||
"reduction_percent": reduction_percent,
|
||||
"active_tool_names": active_catalog.iter().map(|tool| tool.name.as_str()).collect::<Vec<_>>(),
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
|
||||
let mut edit = api_tool("edit_file");
|
||||
@@ -511,14 +621,25 @@ fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() {
|
||||
fn model_tool_catalog_defers_non_core_native_tools_in_yolo_mode() {
|
||||
let always_load = HashSet::new();
|
||||
let catalog = build_model_tool_catalog(
|
||||
vec![api_tool("project_map")],
|
||||
vec![api_tool("read_file"), api_tool("project_map")],
|
||||
vec![api_tool("mcp_server_write")],
|
||||
AppMode::Yolo,
|
||||
&always_load,
|
||||
);
|
||||
|
||||
assert!(catalog.iter().all(|tool| tool.defer_loading == Some(false)));
|
||||
let defer_loading = |name: &str| {
|
||||
catalog
|
||||
.iter()
|
||||
.find(|tool| tool.name == name)
|
||||
.and_then(|tool| tool.defer_loading)
|
||||
};
|
||||
|
||||
assert_eq!(defer_loading("read_file"), Some(false));
|
||||
assert_eq!(defer_loading("project_map"), Some(true));
|
||||
assert_eq!(defer_loading("mcp_server_write"), Some(false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -526,6 +647,7 @@ fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() {
|
||||
// Regression for #263: deterministic byte order of the tools array is a
|
||||
// hard requirement for DeepSeek's KV prefix cache. Built-ins stay as a
|
||||
// contiguous prefix; MCP tools follow. Within each partition: alphabetical.
|
||||
let always_load = HashSet::new();
|
||||
let catalog = build_model_tool_catalog(
|
||||
vec![
|
||||
api_tool("read_file"),
|
||||
@@ -534,6 +656,7 @@ fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() {
|
||||
],
|
||||
vec![api_tool("mcp_zoo_b"), api_tool("mcp_aardvark_a")],
|
||||
AppMode::Yolo,
|
||||
&always_load,
|
||||
);
|
||||
|
||||
let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect();
|
||||
@@ -588,11 +711,18 @@ fn deferred_tool_preflight_loads_edit_schema_without_executing_bad_aliases() {
|
||||
engine.config.plan_state.clone(),
|
||||
)
|
||||
.build(engine.build_tool_context(AppMode::Agent, false));
|
||||
let catalog = build_model_tool_catalog(
|
||||
let always_load = HashSet::new();
|
||||
let mut catalog = build_model_tool_catalog(
|
||||
registry.to_api_tools_with_cache(true),
|
||||
vec![],
|
||||
AppMode::Agent,
|
||||
&always_load,
|
||||
);
|
||||
catalog
|
||||
.iter_mut()
|
||||
.find(|tool| tool.name == "edit_file")
|
||||
.expect("edit_file registered")
|
||||
.defer_loading = Some(true);
|
||||
let mut active = initial_active_tools(&catalog);
|
||||
assert!(!active.contains("edit_file"));
|
||||
|
||||
@@ -633,10 +763,12 @@ fn deferred_tool_preflight_guides_checklist_update_list_replacement() {
|
||||
engine.config.plan_state.clone(),
|
||||
)
|
||||
.build(engine.build_tool_context(AppMode::Agent, false));
|
||||
let always_load = HashSet::new();
|
||||
let catalog = build_model_tool_catalog(
|
||||
registry.to_api_tools_with_cache(true),
|
||||
vec![],
|
||||
AppMode::Agent,
|
||||
&always_load,
|
||||
);
|
||||
let mut active = initial_active_tools(&catalog);
|
||||
assert!(!active.contains("checklist_update"));
|
||||
@@ -1726,7 +1858,8 @@ fn tool_search_activates_discovered_deferred_tools() {
|
||||
cache_control: None,
|
||||
},
|
||||
];
|
||||
ensure_advanced_tooling(&mut catalog, AppMode::Agent);
|
||||
let always_load = HashSet::new();
|
||||
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
|
||||
let mut active = initial_active_tools(&catalog);
|
||||
let result = execute_tool_search(
|
||||
TOOL_SEARCH_BM25_NAME,
|
||||
@@ -1753,7 +1886,8 @@ async fn code_execution_runs_python_and_returns_result_payload() {
|
||||
#[test]
|
||||
fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() {
|
||||
let mut plan_catalog = vec![api_tool("read_file")];
|
||||
ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan);
|
||||
let always_load = HashSet::new();
|
||||
ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan, &always_load);
|
||||
assert!(
|
||||
!plan_catalog
|
||||
.iter()
|
||||
@@ -1762,7 +1896,7 @@ fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() {
|
||||
);
|
||||
|
||||
let mut agent_catalog = vec![api_tool("read_file")];
|
||||
ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent);
|
||||
ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent, &always_load);
|
||||
assert!(
|
||||
agent_catalog
|
||||
.iter()
|
||||
|
||||
@@ -29,63 +29,48 @@ pub(super) fn is_tool_search_tool(name: &str) -> bool {
|
||||
matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME)
|
||||
}
|
||||
|
||||
pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
|
||||
if mode == AppMode::Yolo {
|
||||
pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
|
||||
"agent_open",
|
||||
"apply_patch",
|
||||
"edit_file",
|
||||
"exec_shell",
|
||||
"fetch_url",
|
||||
"file_search",
|
||||
"git_diff",
|
||||
"git_status",
|
||||
"grep_files",
|
||||
"list_dir",
|
||||
"read_file",
|
||||
"run_tests",
|
||||
"web_search",
|
||||
"write_file",
|
||||
];
|
||||
|
||||
pub(super) fn should_default_defer_tool(
|
||||
name: &str,
|
||||
_mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) -> bool {
|
||||
if always_load.contains(name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Shell exec tools are kept active in Agent so the model can run
|
||||
// verification commands (build/test/git/cargo) without first having to
|
||||
// discover them through ToolSearch. Plan mode does not register shell
|
||||
// execution tools.
|
||||
let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
|
||||
&& matches!(
|
||||
name,
|
||||
"exec_shell"
|
||||
| "exec_shell_wait"
|
||||
| "exec_shell_interact"
|
||||
| "exec_wait"
|
||||
| "exec_interact"
|
||||
);
|
||||
if always_loaded_in_action_modes {
|
||||
if is_tool_search_tool(name) {
|
||||
return false;
|
||||
}
|
||||
|
||||
!matches!(
|
||||
name,
|
||||
"read_file"
|
||||
| "write_file"
|
||||
| "list_dir"
|
||||
| "grep_files"
|
||||
| "file_search"
|
||||
| "diagnostics"
|
||||
| "rlm_open"
|
||||
| "rlm_eval"
|
||||
| "rlm_configure"
|
||||
| "rlm_close"
|
||||
| "rlm_session_objects"
|
||||
| "handle_read"
|
||||
| "recall_archive"
|
||||
| "notify"
|
||||
| MULTI_TOOL_PARALLEL_NAME
|
||||
| "update_plan"
|
||||
| "checklist_write"
|
||||
| "todo_write"
|
||||
| "task_create"
|
||||
| "task_list"
|
||||
| "task_read"
|
||||
| "task_gate_run"
|
||||
| "task_shell_start"
|
||||
| "task_shell_wait"
|
||||
| "github_issue_context"
|
||||
| "github_pr_context"
|
||||
| REQUEST_USER_INPUT_NAME
|
||||
)
|
||||
!DEFAULT_ACTIVE_NATIVE_TOOLS
|
||||
.iter()
|
||||
.any(|core_tool| core_tool == &name)
|
||||
}
|
||||
|
||||
pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], mode: AppMode) {
|
||||
pub(super) fn apply_native_tool_deferral(
|
||||
catalog: &mut [Tool],
|
||||
mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) {
|
||||
for tool in catalog {
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode));
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -111,8 +96,9 @@ pub(super) fn build_model_tool_catalog(
|
||||
mut native_tools: Vec<Tool>,
|
||||
mut mcp_tools: Vec<Tool>,
|
||||
mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) -> Vec<Tool> {
|
||||
apply_native_tool_deferral(&mut native_tools, mode);
|
||||
apply_native_tool_deferral(&mut native_tools, mode, always_load);
|
||||
apply_mcp_tool_deferral(&mut mcp_tools, mode);
|
||||
// Sort each partition by name for prefix-cache stability (#263). The
|
||||
// upstream `to_api_tools()` already sorts the registry's HashMap output;
|
||||
@@ -126,7 +112,11 @@ pub(super) fn build_model_tool_catalog(
|
||||
native_tools
|
||||
}
|
||||
|
||||
pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
|
||||
pub(super) fn ensure_advanced_tooling(
|
||||
catalog: &mut Vec<Tool>,
|
||||
mode: AppMode,
|
||||
always_load: &HashSet<String>,
|
||||
) {
|
||||
// code_execution depends on a locally-installed Python interpreter
|
||||
// (python3 / python / py -3). Before v0.8.31, the tool was always
|
||||
// advertised and would fail at execution time on Windows where
|
||||
@@ -150,7 +140,11 @@ pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
|
||||
"required": ["code"]
|
||||
}),
|
||||
allowed_callers: Some(vec!["direct".to_string()]),
|
||||
defer_loading: Some(false),
|
||||
defer_loading: Some(should_default_defer_tool(
|
||||
CODE_EXECUTION_TOOL_NAME,
|
||||
mode,
|
||||
always_load,
|
||||
)),
|
||||
input_examples: None,
|
||||
strict: None,
|
||||
cache_control: None,
|
||||
@@ -166,7 +160,9 @@ pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
|
||||
&& !catalog.iter().any(|t| t.name == JS_EXECUTION_TOOL_NAME)
|
||||
&& crate::dependencies::resolve_node().is_some()
|
||||
{
|
||||
catalog.push(crate::tools::js_execution::js_execution_tool_definition());
|
||||
let mut tool = crate::tools::js_execution::js_execution_tool_definition();
|
||||
tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode, always_load));
|
||||
catalog.push(tool);
|
||||
}
|
||||
|
||||
if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) {
|
||||
|
||||
@@ -30,7 +30,7 @@ impl Engine {
|
||||
let mut context_recovery_attempts = 0u8;
|
||||
let mut tool_catalog = tools.unwrap_or_default();
|
||||
if !tool_catalog.is_empty() {
|
||||
ensure_advanced_tooling(&mut tool_catalog, mode);
|
||||
ensure_advanced_tooling(&mut tool_catalog, mode, &self.config.tools_always_load);
|
||||
}
|
||||
let mut active_tool_names = initial_active_tools(&tool_catalog);
|
||||
let mut loop_guard = LoopGuard::default();
|
||||
|
||||
@@ -5161,6 +5161,7 @@ async fn run_exec_agent(
|
||||
.and_then(|s| s.provider)
|
||||
.unwrap_or_default(),
|
||||
search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()),
|
||||
tools_always_load: config.tools_always_load(),
|
||||
};
|
||||
|
||||
let engine_handle = spawn_engine(engine_config, config);
|
||||
|
||||
@@ -1995,6 +1995,7 @@ impl RuntimeThreadManager {
|
||||
.and_then(|s| s.provider)
|
||||
.unwrap_or_default(),
|
||||
search_api_key: self.config.search.as_ref().and_then(|s| s.api_key.clone()),
|
||||
tools_always_load: self.config.tools_always_load(),
|
||||
};
|
||||
|
||||
let engine = spawn_engine(engine_cfg, &self.config);
|
||||
|
||||
@@ -728,6 +728,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
|
||||
.and_then(|s| s.provider)
|
||||
.unwrap_or_default(),
|
||||
search_api_key: config.search.as_ref().and_then(|s| s.api_key.clone()),
|
||||
tools_always_load: config.tools_always_load(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -618,6 +618,17 @@ These keys are accepted by the config loader but not currently used by the inter
|
||||
|
||||
- `tools_file`
|
||||
|
||||
## Tool Catalog
|
||||
|
||||
CodeWhale loads a small core native tool catalog by default and leaves less
|
||||
common native tools discoverable through ToolSearch. To keep specific native
|
||||
tools loaded on every request, add them to `[tools].always_load`:
|
||||
|
||||
```toml
|
||||
[tools]
|
||||
always_load = ["git_show", "notify"]
|
||||
```
|
||||
|
||||
## Feature Flags
|
||||
|
||||
Feature flags live under the `[features]` table and are merged across profiles.
|
||||
|
||||
Executable
+46
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Measure serialized tool catalog size before and after default deferral.
|
||||
|
||||
This delegates catalog construction to an ignored Rust test so the measurement
|
||||
uses the same tool definitions, JSON serialization, and deferral policy as the
|
||||
runtime. Token counts are deterministic estimates using ceil(serialized_bytes/4).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
MARKER = "TOOL_CATALOG_METRICS "
|
||||
|
||||
|
||||
def main() -> int:
|
||||
cmd = [
|
||||
"cargo",
|
||||
"test",
|
||||
"-p",
|
||||
"codewhale-tui",
|
||||
"print_agent_tool_catalog_metrics",
|
||||
"--",
|
||||
"--ignored",
|
||||
"--nocapture",
|
||||
"--test-threads=1",
|
||||
]
|
||||
proc = subprocess.run(cmd, text=True, capture_output=True, check=False)
|
||||
sys.stderr.write(proc.stderr)
|
||||
|
||||
for line in proc.stdout.splitlines():
|
||||
if MARKER in line:
|
||||
metrics = json.loads(line.split(MARKER, 1)[1])
|
||||
print(json.dumps(metrics, indent=2, sort_keys=True))
|
||||
return proc.returncode
|
||||
|
||||
sys.stdout.write(proc.stdout)
|
||||
sys.stderr.write("missing TOOL_CATALOG_METRICS marker\n")
|
||||
return proc.returncode or 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user