Files
codewhale/crates/tui/src/core/engine/tests.rs
T
Hunter Bown 8fadd764d2 Merge PR #3042 from Hmbown: exec --allowed-tools, --disallowed-tools, --max-turns, --append-system-prompt
feat(exec): add --allowed-tools, --disallowed-tools, --max-turns, --append-system-prompt
2026-06-10 22:11:07 -07:00

4118 lines
144 KiB
Rust

use super::*;
use super::context::TURN_MAX_OUTPUT_TOKENS;
use crate::models::SystemBlock;
use crate::test_support::lock_test_env;
use crate::tools::plan::{PlanItemArg, PlanSnapshot, StepStatus};
use crate::tools::spec::ToolCapability;
use serde_json::json;
use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use std::time::Instant;
use tempfile::tempdir;
const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
static CAPACITY_MEMORY_ENV_LOCK: LazyLock<tokio::sync::Mutex<()>> =
LazyLock::new(|| tokio::sync::Mutex::new(()));
struct ScopedCapacityMemoryDir {
previous: Option<OsString>,
}
impl ScopedCapacityMemoryDir {
fn set(path: &Path) -> Self {
let previous = std::env::var_os("DEEPSEEK_CAPACITY_MEMORY_DIR");
// Safety: capacity-memory tests serialize access with CAPACITY_MEMORY_ENV_LOCK
// and restore the original value in Drop.
unsafe {
std::env::set_var("DEEPSEEK_CAPACITY_MEMORY_DIR", path);
}
Self { previous }
}
}
impl Drop for ScopedCapacityMemoryDir {
fn drop(&mut self) {
// Safety: capacity-memory tests serialize access with CAPACITY_MEMORY_ENV_LOCK.
unsafe {
if let Some(previous) = self.previous.take() {
std::env::set_var("DEEPSEEK_CAPACITY_MEMORY_DIR", previous);
} else {
std::env::remove_var("DEEPSEEK_CAPACITY_MEMORY_DIR");
}
}
}
}
struct ScopedDeepSeekApiKey {
previous: Option<OsString>,
}
impl ScopedDeepSeekApiKey {
fn set(value: &str) -> Self {
let previous = std::env::var_os("DEEPSEEK_API_KEY");
// Safety: tests using this helper serialize with lock_test_env() and
// restore the original value in Drop.
unsafe {
std::env::set_var("DEEPSEEK_API_KEY", value);
}
Self { previous }
}
}
impl Drop for ScopedDeepSeekApiKey {
fn drop(&mut self) {
// Safety: tests using this helper serialize with lock_test_env().
unsafe {
if let Some(previous) = self.previous.take() {
std::env::set_var("DEEPSEEK_API_KEY", previous);
} else {
std::env::remove_var("DEEPSEEK_API_KEY");
}
}
}
}
fn build_engine_with_capacity(capacity: CapacityControllerConfig) -> Engine {
let engine_config = EngineConfig {
capacity,
..Default::default()
};
let (engine, _handle) = Engine::new(engine_config, &Config::default());
engine
}
fn catalog_tool(name: &str) -> Tool {
Tool {
tool_type: None,
name: name.to_string(),
description: String::new(),
input_schema: json!({"type": "object"}),
allowed_callers: None,
defer_loading: None,
input_examples: None,
strict: None,
cache_control: None,
}
}
#[test]
fn tool_catalog_filter_applies_allow_and_deny_gates() {
// #3027 AC1: the advertised catalog must not contain tools the execution
// gates would deny; deny wins over allow.
let mut catalog = vec![
catalog_tool("read_file"),
catalog_tool("exec_shell"),
catalog_tool("grep_files"),
];
filter_tool_catalog_for_gates(
&mut catalog,
Some(&["read_file".to_string(), "exec_shell".to_string()][..]),
Some(&["exec_shell".to_string()][..]),
);
let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect();
assert_eq!(names, ["read_file"]);
}
#[test]
fn tool_catalog_filter_is_inert_without_gates() {
let mut catalog = vec![catalog_tool("read_file"), catalog_tool("exec_shell")];
filter_tool_catalog_for_gates(&mut catalog, None, None);
assert_eq!(catalog.len(), 2);
}
#[test]
fn structured_state_block_includes_rich_plan_artifact() {
let state = StructuredState {
mode_label: "Plan".to_string(),
workspace: PathBuf::from("/workspace/codewhale"),
cwd: None,
working_set_summary: None,
todo_snapshot: None,
plan_snapshot: Some(PlanSnapshot {
objective: Some("Make Plan mode reviewable".to_string()),
context_summary: Some("Grounded in issue #2691".to_string()),
sources_used: vec!["gh issue view 2691".to_string()],
critical_files: vec!["crates/tui/src/tools/plan.rs".to_string()],
constraints: vec!["Preserve legacy payloads".to_string()],
recommended_approach: Some("Enrich update_plan".to_string()),
verification_plan: Some("Run focused tests".to_string()),
risks_and_unknowns: Some("Replay may drift".to_string()),
handoff_packet: Some("Next agent should inspect replay".to_string()),
items: vec![PlanItemArg {
step: "Render rich artifact".to_string(),
status: StepStatus::InProgress,
}],
..PlanSnapshot::default()
}),
subagent_snapshots: Vec::new(),
};
let block = state.to_system_block().expect("fork state block");
assert!(block.contains("Objective: Make Plan mode reviewable"));
assert!(block.contains("Context: Grounded in issue #2691"));
assert!(block.contains("Source: gh issue view 2691"));
assert!(block.contains("Critical file: crates/tui/src/tools/plan.rs"));
assert!(block.contains("Constraint: Preserve legacy payloads"));
assert!(block.contains("Verification plan: Run focused tests"));
assert!(block.contains("Handoff packet: Next agent should inspect replay"));
assert!(block.contains("- [~] Render rich artifact"));
}
#[test]
fn env_only_auth_error_gets_recovery_hint() {
let _guard = lock_test_env();
let _env = ScopedDeepSeekApiKey::set("stale-env-key");
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let message =
engine.decorate_auth_error_message("Authentication failed: invalid API key".to_string());
assert!(message.contains("DEEPSEEK_API_KEY"));
assert!(message.contains("no saved config key is present"));
assert!(message.contains("codewhale auth status"));
assert!(message.contains("codewhale auth set --provider deepseek"));
}
#[test]
fn config_auth_error_does_not_blame_env() {
let _guard = lock_test_env();
let _env = ScopedDeepSeekApiKey::set("stale-env-key");
let cfg = Config {
api_key: Some("fresh-config-key".to_string()),
..Config::default()
};
let (engine, _handle) = Engine::new(EngineConfig::default(), &cfg);
let message =
engine.decorate_auth_error_message("Authentication failed: invalid API key".to_string());
assert_eq!(message, "Authentication failed: invalid API key");
}
#[test]
fn plugin_tools_dir_honors_missing_custom_directory_without_fallback() {
let missing = PathBuf::from("definitely-missing-codewhale-plugin-dir");
let tools_config = crate::config::ToolsConfig {
plugin_dir: Some(missing.to_string_lossy().to_string()),
..Default::default()
};
assert_eq!(plugin_tools_dir(Some(&tools_config)), missing);
}
#[test]
fn configure_plugin_tools_applies_overrides_after_discovered_plugins() {
let tmp = tempdir().expect("tempdir");
let plugin_dir = tmp.path().join("tools");
fs::create_dir(&plugin_dir).expect("plugin dir");
fs::write(
plugin_dir.join("same-name.sh"),
"# name: same_tool\n# description: discovered plugin\n",
)
.expect("plugin script");
let mut overrides = HashMap::new();
overrides.insert(
"same_tool".to_string(),
crate::config::ToolOverride::Command {
command: "configured-command".to_string(),
args: None,
},
);
let tools_config = crate::config::ToolsConfig {
plugin_dir: Some(plugin_dir.to_string_lossy().to_string()),
overrides: Some(overrides),
..Default::default()
};
let ctx = crate::tools::ToolContext::new(tmp.path().to_path_buf());
let mut registry = crate::tools::ToolRegistry::new(ctx);
let plugin_names = configure_plugin_tools(&mut registry, Some(&tools_config));
let tool = registry.get("same_tool").expect("same_tool registered");
assert!(tool.description().contains("configured-command"));
assert!(plugin_names.contains("same_tool"));
}
fn make_plan(
read_only: bool,
supports_parallel: bool,
approval_required: bool,
interactive: bool,
) -> ToolExecutionPlan {
make_plan_at(
0,
read_only,
supports_parallel,
approval_required,
interactive,
)
}
fn make_plan_at(
index: usize,
read_only: bool,
supports_parallel: bool,
approval_required: bool,
interactive: bool,
) -> ToolExecutionPlan {
ToolExecutionPlan {
index,
id: format!("tool-{index}"),
name: "grep_files".to_string(),
input: json!({"pattern": "test"}),
caller: None,
interactive,
approval_required,
approval_description: "desc".to_string(),
supports_parallel,
read_only,
blocked_error: None,
guard_result: None,
}
}
fn api_tool(name: &str) -> Tool {
Tool {
tool_type: Some("function".to_string()),
name: name.to_string(),
description: format!("Test tool {name}"),
input_schema: json!({"type": "object"}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: None,
input_examples: None,
strict: None,
cache_control: None,
}
}
#[test]
fn engine_handle_cancel_tracks_latest_turn_token() {
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
let stale_token = engine.cancel_token.clone();
engine.reset_cancel_token();
handle.cancel();
assert!(engine.cancel_token.is_cancelled());
assert!(handle.is_cancelled());
assert!(!stale_token.is_cancelled());
}
#[test]
fn engine_initial_prompt_includes_configured_goal() {
let config = EngineConfig {
goal_objective: Some("Fix goal handoff".to_string()),
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let prompt = match engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text,
Some(SystemPrompt::Blocks(blocks)) => blocks
.into_iter()
.map(|block| block.text)
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
assert!(prompt.contains("<session_goal>"));
assert!(prompt.contains("Fix goal handoff"));
assert!(
engine
.config
.goal_state
.lock()
.expect("goal lock")
.is_active()
);
}
#[test]
fn refresh_system_prompt_uses_runtime_goal_state() {
let (mut engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
{
let mut goal = engine.config.goal_state.lock().expect("goal lock");
goal.create("Close the runtime goal loop".to_string(), None);
}
engine.refresh_system_prompt();
let prompt = match engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text,
Some(SystemPrompt::Blocks(blocks)) => blocks
.into_iter()
.map(|block| block.text)
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
assert!(prompt.contains("<session_goal>"));
assert!(prompt.contains("Close the runtime goal loop"));
}
#[test]
fn parallel_batch_requires_read_only_parallel_tools() {
let plans = vec![make_plan(true, true, false, false)];
assert!(should_parallelize_tool_batch(&plans));
let plans = vec![
make_plan(true, true, false, false),
make_plan(true, true, false, false),
];
assert!(should_parallelize_tool_batch(&plans));
let plans = vec![make_plan(false, true, false, false)];
assert!(!should_parallelize_tool_batch(&plans));
let plans = vec![make_plan(true, false, false, false)];
assert!(!should_parallelize_tool_batch(&plans));
let plans = vec![make_plan(true, true, true, false)];
assert!(!should_parallelize_tool_batch(&plans));
let plans = vec![make_plan(true, true, false, true)];
assert!(!should_parallelize_tool_batch(&plans));
}
#[test]
fn tool_execution_batches_use_serial_barriers() {
let batches = plan_tool_execution_batches(vec![
make_plan_at(0, true, true, false, false),
make_plan_at(1, true, true, false, false),
make_plan_at(2, false, false, true, false),
make_plan_at(3, true, true, false, false),
make_plan_at(4, true, false, false, false),
make_plan_at(5, true, true, false, false),
make_plan_at(6, true, true, false, false),
]);
assert_eq!(batches.len(), 5);
match &batches[0] {
ToolExecutionBatch::Parallel(plans) => {
assert_eq!(
plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
vec![0, 1]
);
}
ToolExecutionBatch::Serial(_) => panic!("first batch should be parallel"),
}
match &batches[1] {
ToolExecutionBatch::Serial(plan) => assert_eq!(plan.index, 2),
ToolExecutionBatch::Parallel(_) => panic!("second batch should be serial"),
}
match &batches[2] {
ToolExecutionBatch::Parallel(plans) => {
assert_eq!(
plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
vec![3]
);
}
ToolExecutionBatch::Serial(_) => panic!("third batch should be parallel"),
}
match &batches[3] {
ToolExecutionBatch::Serial(plan) => assert_eq!(plan.index, 4),
ToolExecutionBatch::Parallel(_) => panic!("fourth batch should be serial"),
}
match &batches[4] {
ToolExecutionBatch::Parallel(plans) => {
assert_eq!(
plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
vec![5, 6]
);
}
ToolExecutionBatch::Serial(_) => panic!("fifth batch should be parallel"),
}
}
#[test]
fn successful_update_plan_ends_plan_mode_turn_immediately() {
assert!(should_stop_after_plan_tool(
AppMode::Plan,
"update_plan",
&Ok(ToolResult::success("planned"))
));
assert!(!should_stop_after_plan_tool(
AppMode::Agent,
"update_plan",
&Ok(ToolResult::success("planned"))
));
assert!(!should_stop_after_plan_tool(
AppMode::Plan,
"request_user_input",
&Ok(ToolResult::success("input"))
));
assert!(!should_stop_after_plan_tool(
AppMode::Plan,
"update_plan",
&Err(ToolError::execution_failed("failed".to_string()))
));
}
#[test]
fn quick_plan_requests_force_update_plan_on_first_step() {
assert!(should_force_update_plan_first(
AppMode::Plan,
"Give me a quick 3-step plan to verify the UI changes."
));
assert!(should_force_update_plan_first(
AppMode::Plan,
"Make a high-level plan for the footer work."
));
assert!(!should_force_update_plan_first(
AppMode::Plan,
"Inspect the repo and then give me a quick plan."
));
assert!(!should_force_update_plan_first(
AppMode::Agent,
"Give me a quick 3-step plan."
));
}
#[test]
fn quick_plan_turn_can_narrow_first_step_tools_to_update_plan() {
let catalog = vec![
Tool {
tool_type: Some("function".to_string()),
name: "read_file".to_string(),
description: "Read a file".to_string(),
input_schema: json!({"type": "object"}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
},
Tool {
tool_type: Some("function".to_string()),
name: "update_plan".to_string(),
description: "Publish a plan".to_string(),
input_schema: json!({"type": "object"}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
},
];
let active = initial_active_tools(&catalog);
let forced = active_tools_for_step(&catalog, &active, true);
assert_eq!(forced.len(), 1);
assert_eq!(forced[0].name, "update_plan");
let default = active_tools_for_step(&catalog, &active, false);
assert_eq!(default.len(), 2);
}
#[test]
fn tool_error_messages_include_actionable_hints() {
let path_error = ToolError::path_escape(PathBuf::from("../escape.txt"));
let formatted = format_tool_error(&path_error, "read_file");
assert!(formatted.contains("escapes workspace"));
let missing_field = ToolError::missing_field("path");
let formatted = format_tool_error(&missing_field, "read_file");
assert!(formatted.contains("missing required field"));
let timeout = ToolError::Timeout { seconds: 5 };
let formatted = format_tool_error(&timeout, "exec_shell");
assert!(formatted.contains("timed out"));
// #3020: Plan-mode denials already explain the fix — pass through
// verbatim, with no conflicting "Adjust approval mode" suffix.
let plan_denied = ToolError::permission_denied(
"'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code.",
);
let formatted = format_tool_error(&plan_denied, "exec_shell");
assert_eq!(
formatted,
"'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code."
);
// Bare denials still get the actionable suffix.
let bare_denied = ToolError::permission_denied("nope");
let formatted = format_tool_error(&bare_denied, "exec_shell");
assert!(
formatted.contains("Adjust approval mode or request permission"),
"{formatted}"
);
// "model" must not satisfy the "mode" pass-through check.
let model_denied = ToolError::permission_denied("requested model is not allowed");
let formatted = format_tool_error(&model_denied, "agent_open");
assert!(
formatted.contains("Adjust approval mode or request permission"),
"{formatted}"
);
}
#[test]
fn tool_exec_outcome_tracks_duration() {
let outcome = ToolExecOutcome {
index: 0,
id: "tool-1".to_string(),
name: "grep_files".to_string(),
input: json!({"pattern": "test"}),
started_at: Instant::now(),
result: Ok(ToolResult::success("ok")),
};
assert!(outcome.started_at.elapsed().as_nanos() > 0);
}
#[test]
fn core_native_tools_stay_loaded_in_yolo_mode() {
let always_load = HashSet::new();
assert!(!should_default_defer_tool("exec_shell", &always_load));
// git_blame remains deferred (read-only git history beyond log/show/diff).
assert!(should_default_defer_tool("git_blame", &always_load));
}
#[test]
fn non_yolo_mode_retains_default_defer_policy() {
let always_load = HashSet::new();
assert!(!should_default_defer_tool("exec_shell", &always_load));
assert!(!should_default_defer_tool("edit_file", &always_load));
assert!(!should_default_defer_tool("apply_patch", &always_load));
assert!(!should_default_defer_tool("fetch_url", &always_load));
assert!(!should_default_defer_tool("git_diff", &always_load));
// #2654: read-only git history joins the active set.
assert!(!should_default_defer_tool("git_log", &always_load));
assert!(!should_default_defer_tool("git_show", &always_load));
assert!(!should_default_defer_tool("git_status", &always_load));
assert!(!should_default_defer_tool("run_tests", &always_load));
assert!(!should_default_defer_tool("agent_open", &always_load));
// #2605: the fetch/close side of the sub-agent surface must also stay
// active so a first `agent_eval`/`agent_close` executes instead of
// hydrating its schema and forcing a double-invoke.
assert!(!should_default_defer_tool("agent_eval", &always_load));
assert!(!should_default_defer_tool("agent_close", &always_load));
assert!(!should_default_defer_tool("read_file", &always_load));
assert!(!should_default_defer_tool("web_search", &always_load));
assert!(!should_default_defer_tool("write_file", &always_load));
assert!(!should_default_defer_tool("task_shell_start", &always_load));
assert!(!should_default_defer_tool("task_shell_wait", &always_load));
assert!(should_default_defer_tool("git_blame", &always_load));
}
#[test]
fn model_tool_catalog_applies_native_and_mcp_deferral() {
let always_load = HashSet::new();
let catalog = build_model_tool_catalog(
vec![
api_tool("read_file"),
api_tool("write_file"),
api_tool("exec_shell"),
api_tool("edit_file"),
api_tool("project_map"),
],
vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")],
AppMode::Agent,
&always_load,
);
let defer_loading = |name: &str| {
catalog
.iter()
.find(|tool| tool.name == name)
.and_then(|tool| tool.defer_loading)
};
assert_eq!(defer_loading("read_file"), Some(false));
assert_eq!(defer_loading("write_file"), Some(false));
assert_eq!(defer_loading("exec_shell"), Some(false));
assert_eq!(defer_loading("edit_file"), Some(false));
assert_eq!(defer_loading("project_map"), Some(true));
assert_eq!(defer_loading("list_mcp_resources"), Some(false));
assert_eq!(defer_loading("mcp_server_write"), Some(true));
}
#[test]
fn arcee_provider_policy_defers_risky_tools_keeps_read_only_and_tool_search() {
let always_load = HashSet::new();
let mut catalog = vec![
api_tool("read_file"),
api_tool("list_dir"),
api_tool("git_status"),
api_tool("git_diff"),
api_tool("grep_files"),
api_tool("file_search"),
api_tool("update_plan"),
api_tool("checklist_write"),
api_tool("exec_shell"),
api_tool("apply_patch"),
api_tool("write_file"),
api_tool("edit_file"),
api_tool("fetch_url"),
api_tool("web_search"),
api_tool("tool_search_tool_regex"),
api_tool("tool_search_tool_bm25"),
];
apply_provider_tool_policy(&mut catalog, ApiProvider::Arcee, &always_load);
let defer = |name: &str| {
catalog
.iter()
.find(|tool| tool.name == name)
.and_then(|tool| tool.defer_loading)
};
// Benign read-only first-turn set stays active so the opening Arcee
// request clears Cloudflare's WAF.
for active in [
"read_file",
"list_dir",
"git_status",
"git_diff",
"grep_files",
"file_search",
"update_plan",
"checklist_write",
] {
assert_eq!(defer(active), Some(false), "{active} should stay active");
}
// Tool-search stays active so the deferred tail remains discoverable.
assert_eq!(defer("tool_search_tool_regex"), Some(false));
assert_eq!(defer("tool_search_tool_bm25"), Some(false));
// WAF-risky / mutating tools are deferred on the first Arcee turn.
for deferred in [
"exec_shell",
"apply_patch",
"write_file",
"edit_file",
"fetch_url",
"web_search",
] {
assert_eq!(defer(deferred), Some(true), "{deferred} should be deferred");
}
let active = initial_active_tools(&catalog);
assert!(active.contains("read_file"));
assert!(active.contains("tool_search_tool_regex"));
assert!(!active.contains("exec_shell"));
assert!(!active.contains("apply_patch"));
}
#[test]
fn provider_tool_policy_is_noop_for_non_waf_providers() {
let always_load = HashSet::new();
let mut catalog = vec![api_tool("exec_shell"), api_tool("read_file")];
// DeepSeek has no reduced first-turn surface: the policy must leave the
// default deferral flags untouched (here: still unset).
apply_provider_tool_policy(&mut catalog, ApiProvider::Deepseek, &always_load);
assert!(catalog.iter().all(|tool| tool.defer_loading.is_none()));
}
#[test]
fn arcee_provider_policy_honors_always_load_override() {
let mut always_load = HashSet::new();
always_load.insert("exec_shell".to_string());
let mut catalog = vec![api_tool("exec_shell"), api_tool("apply_patch")];
apply_provider_tool_policy(&mut catalog, ApiProvider::Arcee, &always_load);
let defer = |name: &str| {
catalog
.iter()
.find(|tool| tool.name == name)
.and_then(|tool| tool.defer_loading)
};
// A user-pinned always_load tool stays active even on Arcee.
assert_eq!(defer("exec_shell"), Some(false));
// Other risky tools remain deferred.
assert_eq!(defer("apply_patch"), Some(true));
}
#[test]
fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let registry = engine
.build_turn_tool_registry_builder(
AppMode::Agent,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(AppMode::Agent, false));
let always_load = HashSet::new();
let catalog = build_model_tool_catalog(
registry.to_api_tools_with_cache(true),
vec![],
AppMode::Agent,
&always_load,
);
let edit = catalog
.iter()
.find(|tool| tool.name == "edit_file")
.expect("edit_file registered");
assert_eq!(edit.defer_loading, Some(false));
let required = edit.input_schema["required"]
.as_array()
.expect("edit_file schema should include required fields");
assert!(required.iter().any(|field| field.as_str() == Some("path")));
assert!(
required
.iter()
.any(|field| field.as_str() == Some("search"))
);
assert!(
required
.iter()
.any(|field| field.as_str() == Some("replace"))
);
assert!(!required.iter().any(|field| field.as_str() == Some("fuzz")));
assert_eq!(
edit.input_schema["properties"]["fuzz"]["type"].as_str(),
Some("boolean")
);
let active_at_batch_start = initial_active_tools(&catalog);
assert!(active_at_batch_start.contains("edit_file"));
let mut hydrated_this_batch = HashSet::new();
assert!(
maybe_hydrate_requested_deferred_tool(
"edit_file",
&json!({
"path": "src/foo.rs",
"search": "before",
"replace": "after"
}),
&catalog,
&active_at_batch_start,
&mut hydrated_this_batch,
)
.is_none(),
"loaded edit_file calls without fuzz should execute instead of hydrating the schema"
);
assert!(hydrated_this_batch.is_empty());
}
#[test]
fn tools_always_load_overrides_default_native_deferral() {
let always_load = HashSet::from(["git_blame".to_string()]);
assert!(!should_default_defer_tool("git_blame", &always_load));
}
#[test]
#[ignore = "one-shot metric for scripts/measure-tool-catalog.py"]
#[allow(clippy::print_stderr)]
fn print_agent_tool_catalog_metrics() {
let tmp = tempdir().expect("tempdir");
let context = crate::tools::ToolContext::new(tmp.path().to_path_buf());
let client = DeepSeekClient::new(&Config {
api_key: Some("test-key".to_string()),
..Config::default()
})
.expect("stub client");
let manager = crate::tools::subagent::new_shared_subagent_manager(tmp.path().to_path_buf(), 8);
let runtime = crate::tools::subagent::SubAgentRuntime::new(
client,
DEFAULT_TEXT_MODEL.to_string(),
context.clone(),
true,
None,
manager.clone(),
);
let registry = crate::tools::ToolRegistryBuilder::new()
.with_agent_tools(true)
.with_todo_tool(new_shared_todo_list())
.with_plan_tool(new_shared_plan_state())
.with_review_tool(None, DEFAULT_TEXT_MODEL.to_string())
.with_rlm_tool(None, DEFAULT_TEXT_MODEL.to_string())
.with_notify_tool()
.with_subagent_tools(manager, runtime)
.build(context);
let baseline_catalog = registry.to_api_tools_with_cache(true);
let baseline_json = serde_json::to_vec(&baseline_catalog).expect("serialize baseline");
let always_load = HashSet::new();
let mut catalog = build_model_tool_catalog(
baseline_catalog.clone(),
vec![],
AppMode::Agent,
&always_load,
);
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
let active = initial_active_tools(&catalog);
let active_catalog = active_tools_for_step(&catalog, &active, false);
let active_json = serde_json::to_vec(&active_catalog).expect("serialize active");
let reduction_percent = if baseline_json.is_empty() {
0.0
} else {
100.0 * (baseline_json.len().saturating_sub(active_json.len())) as f64
/ baseline_json.len() as f64
};
eprintln!(
"TOOL_CATALOG_METRICS {}",
serde_json::json!({
"baseline_tools": baseline_catalog.len(),
"baseline_bytes": baseline_json.len(),
"baseline_tokens_est": baseline_json.len().div_ceil(4),
"active_tools": active_catalog.len(),
"active_bytes": active_json.len(),
"active_tokens_est": active_json.len().div_ceil(4),
"reduction_percent": reduction_percent,
"active_tool_names": active_catalog.iter().map(|tool| tool.name.as_str()).collect::<Vec<_>>(),
})
);
}
#[test]
fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
let mut edit = api_tool("edit_file");
edit.defer_loading = Some(true);
edit.input_schema = json!({
"type": "object",
"properties": {
"path": { "type": "string" },
"search": { "type": "string" },
"replace": { "type": "string" }
},
"required": ["path", "search", "replace"]
});
let catalog = vec![edit];
let active_at_batch_start = HashSet::new();
let mut hydrated_this_batch = HashSet::new();
let result = maybe_hydrate_requested_deferred_tool(
"edit_file",
&json!({
"path": "src/foo.rs",
"old_string": "before",
"new_string": "after"
}),
&catalog,
&active_at_batch_start,
&mut hydrated_this_batch,
)
.expect("first deferred use should hydrate");
assert!(!active_at_batch_start.contains("edit_file"));
assert!(hydrated_this_batch.contains("edit_file"));
assert!(result.success);
assert!(result.content.contains("Tool `edit_file` was deferred"));
assert!(result.content.contains("path: string"));
assert!(result.content.contains("search: string"));
assert!(result.content.contains("replace: string"));
assert!(result.content.contains("old_string -> search"));
assert!(result.content.contains("new_string -> replace"));
assert!(result.content.contains("The tool was not executed"));
let metadata = result.metadata.expect("metadata");
assert_eq!(metadata["event"], "tool.schema_hydrated");
assert_eq!(metadata["executed"], false);
assert_eq!(metadata["retry_required"], true);
let second_result = maybe_hydrate_requested_deferred_tool(
"edit_file",
&json!({"path": "src/bar.rs", "old_string": "before", "new_string": "after"}),
&catalog,
&active_at_batch_start,
&mut hydrated_this_batch,
)
.expect("later calls in the same batch should hydrate instead of executing");
assert_eq!(second_result.metadata.unwrap()["executed"], false);
assert_eq!(hydrated_this_batch.len(), 1);
let mut active_next_batch = active_at_batch_start.clone();
active_next_batch.extend(hydrated_this_batch);
let mut hydrated_next_batch = HashSet::new();
assert!(
maybe_hydrate_requested_deferred_tool(
"edit_file",
&json!({"path": "src/foo.rs", "search": "before", "replace": "after"}),
&catalog,
&active_next_batch,
&mut hydrated_next_batch,
)
.is_none(),
"tools hydrated in a previous batch should execute normally"
);
}
#[test]
fn model_tool_catalog_defers_non_core_native_tools_in_yolo_mode() {
let always_load = HashSet::new();
let catalog = build_model_tool_catalog(
vec![api_tool("read_file"), api_tool("project_map")],
vec![api_tool("mcp_server_write")],
AppMode::Yolo,
&always_load,
);
let defer_loading = |name: &str| {
catalog
.iter()
.find(|tool| tool.name == name)
.and_then(|tool| tool.defer_loading)
};
assert_eq!(defer_loading("read_file"), Some(false));
assert_eq!(defer_loading("project_map"), Some(true));
assert_eq!(defer_loading("mcp_server_write"), Some(false));
}
#[test]
fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() {
// Regression for #263: deterministic byte order of the tools array is a
// hard requirement for DeepSeek's KV prefix cache. Built-ins stay as a
// contiguous prefix; MCP tools follow. Within each partition: alphabetical.
let always_load = HashSet::new();
let catalog = build_model_tool_catalog(
vec![
api_tool("read_file"),
api_tool("apply_patch"),
api_tool("exec_shell"),
],
vec![api_tool("mcp_zoo_b"), api_tool("mcp_aardvark_a")],
AppMode::Yolo,
&always_load,
);
let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect();
assert_eq!(
names,
vec![
"apply_patch",
"exec_shell",
"read_file",
"mcp_aardvark_a",
"mcp_zoo_b",
],
"built-ins must be alphabetical and contiguous; MCP tools follow, alphabetical",
);
}
#[test]
fn active_tool_list_pushes_deferred_activations_to_the_tail() {
// Regression for #263: when ToolSearch activates a deferred tool mid-
// session, it must NOT be inserted at its catalog index — that would
// shift every later tool's byte offset and bust the cached prefix.
// Deferred-but-now-active tools belong at the tail.
let mut a = api_tool("a_load_now");
a.defer_loading = Some(false);
let mut search = api_tool("search_via_toolsearch");
search.defer_loading = Some(true);
let mut b = api_tool("b_load_now");
b.defer_loading = Some(false);
let catalog = vec![a, search, b];
let active: HashSet<String> = ["a_load_now", "search_via_toolsearch", "b_load_now"]
.into_iter()
.map(String::from)
.collect();
let listed = active_tools_for_step(&catalog, &active, false);
let names: Vec<&str> = listed.iter().map(|t| t.name.as_str()).collect();
assert_eq!(
names,
vec!["a_load_now", "b_load_now", "search_via_toolsearch"],
"deferred-but-active tools must come after always-loaded tools",
);
}
#[test]
fn deferred_tool_preflight_loads_edit_schema_without_executing_bad_aliases() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let registry = engine
.build_turn_tool_registry_builder(
AppMode::Agent,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(AppMode::Agent, false));
let always_load = HashSet::new();
let mut catalog = build_model_tool_catalog(
registry.to_api_tools_with_cache(true),
vec![],
AppMode::Agent,
&always_load,
);
catalog
.iter_mut()
.find(|tool| tool.name == "edit_file")
.expect("edit_file registered")
.defer_loading = Some(true);
let mut active = initial_active_tools(&catalog);
assert!(!active.contains("edit_file"));
let result = preflight_requested_deferred_tool(
"edit_file",
&json!({
"path": "src/foo.rs",
"old_string": "before",
"new_string": "after"
}),
&catalog,
&mut active,
)
.expect("deferred edit_file should preflight");
assert!(active.contains("edit_file"));
assert!(result.success);
assert!(result.content.contains("Tool `edit_file` was deferred"));
assert!(result.content.contains("The tool was not executed"));
assert!(result.content.contains("path: string required"));
assert!(result.content.contains("search: string required"));
assert!(result.content.contains("replace: string required"));
assert!(result.content.contains("old_string -> search"));
assert!(result.content.contains("new_string -> replace"));
assert_eq!(
result.metadata.as_ref().unwrap()["deferred_tool_loaded"],
json!(true)
);
}
#[test]
fn deferred_tool_preflight_guides_rlm_open_misnamed_source_fields() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let registry = engine
.build_turn_tool_registry_builder(
AppMode::Agent,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(AppMode::Agent, false));
let always_load = HashSet::new();
let mut catalog = build_model_tool_catalog(
registry.to_api_tools_with_cache(true),
vec![],
AppMode::Agent,
&always_load,
);
catalog
.iter_mut()
.find(|tool| tool.name == "rlm_open")
.expect("rlm_open registered")
.defer_loading = Some(true);
let mut active = initial_active_tools(&catalog);
assert!(!active.contains("rlm_open"));
let result = preflight_requested_deferred_tool(
"rlm_open",
&json!({
"name": "active_prompt",
"prompt": "inspect this",
"path": "src/lib.rs"
}),
&catalog,
&mut active,
)
.expect("deferred rlm_open should preflight");
assert!(active.contains("rlm_open"));
assert!(result.success);
assert!(result.content.contains("Tool `rlm_open` was deferred"));
assert!(result.content.contains("The tool was not executed"));
assert!(result.content.contains("session_object: string"));
assert!(
result.content.contains(
"prompt -> file_path (local file), content (inline text), url, or session_object"
),
"prompt correction includes session_object: {}",
result.content
);
assert!(
result.content.contains(
"path -> file_path (local file), content (inline text), url, or session_object"
),
"path correction includes session_object: {}",
result.content
);
assert_eq!(
result.metadata.as_ref().unwrap()["deferred_tool_loaded"],
json!(true)
);
}
#[test]
fn deferred_tool_preflight_guides_checklist_update_list_replacement() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let registry = engine
.build_turn_tool_registry_builder(
AppMode::Agent,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(AppMode::Agent, false));
let always_load = HashSet::new();
let catalog = build_model_tool_catalog(
registry.to_api_tools_with_cache(true),
vec![],
AppMode::Agent,
&always_load,
);
let mut active = initial_active_tools(&catalog);
assert!(!active.contains("checklist_update"));
let result = preflight_requested_deferred_tool(
"checklist_update",
&json!({
"todos": [
{ "content": "wire preflight", "status": "completed" }
]
}),
&catalog,
&mut active,
)
.expect("deferred checklist_update should preflight");
assert!(active.contains("checklist_update"));
assert!(result.success);
assert!(
result
.content
.contains("Tool `checklist_update` was deferred")
);
assert!(result.content.contains("id: integer required"));
assert!(result.content.contains("status: string"));
assert!(result.content.contains("Missing required fields:"));
assert!(result.content.contains("id, status"));
assert!(result.content.contains("Unexpected fields:"));
assert!(result.content.contains("todos"));
assert!(result.content.contains("Use checklist_write"));
}
#[tokio::test]
async fn run_shell_command_op_requests_approval_and_executes_shell() {
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
let handle_for_approval = handle.clone();
let task = tokio::spawn(async move {
engine
.handle_run_shell_command(
"echo bang-ok".to_string(),
AppMode::Agent,
false,
false,
crate::tui::approval::ApprovalMode::Suggest,
)
.await;
});
let mut saw_started = false;
let mut saw_approval = false;
let mut saw_complete = false;
let mut saw_turn_complete = false;
let mut rx = handle.rx_event.write().await;
while let Some(event) = rx.recv().await {
match event {
Event::TurnStarted { turn_id } => {
assert!(turn_id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
}
Event::ToolCallStarted { id, name, input } => {
saw_started = true;
assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
assert_eq!(name, "exec_shell");
assert_eq!(input["command"], json!("echo bang-ok"));
assert_eq!(input["source"], json!("user"));
}
Event::ApprovalRequired { id, tool_name, .. } => {
saw_approval = true;
assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
assert_eq!(tool_name, "exec_shell");
handle_for_approval
.approve_tool_call(id)
.await
.expect("approve shell");
}
Event::ToolCallComplete { id, name, result } => {
saw_complete = true;
assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
assert_eq!(name, "exec_shell");
let result = result.expect("shell result");
assert!(result.success, "{result:?}");
assert!(result.content.contains("bang-ok"), "{result:?}");
}
Event::TurnComplete { status, .. } => {
saw_turn_complete = true;
assert_eq!(status, TurnOutcomeStatus::Completed);
break;
}
_ => {}
}
}
drop(rx);
task.await.expect("shell op task");
assert!(saw_started);
assert!(saw_approval);
assert!(saw_complete);
assert!(saw_turn_complete);
}
#[tokio::test]
async fn run_shell_command_op_skips_approval_when_auto_approved() {
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
engine
.handle_run_shell_command(
"echo bang-yolo".to_string(),
AppMode::Yolo,
true,
true,
crate::tui::approval::ApprovalMode::Auto,
)
.await;
let mut saw_complete = false;
let mut rx = handle.rx_event.write().await;
while let Some(event) = rx.recv().await {
match event {
Event::ApprovalRequired { .. } => {
panic!("auto-approved shell shortcut should not request approval");
}
Event::ToolCallComplete { result, .. } => {
saw_complete = true;
let result = result.expect("shell result");
assert!(result.success, "{result:?}");
assert!(result.content.contains("bang-yolo"), "{result:?}");
}
Event::TurnComplete { status, .. } => {
assert_eq!(status, TurnOutcomeStatus::Completed);
break;
}
_ => {}
}
}
assert!(saw_complete);
}
#[tokio::test]
async fn run_shell_command_op_preserves_plan_mode_shell_block() {
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
engine
.handle_run_shell_command(
"echo blocked".to_string(),
AppMode::Plan,
false,
false,
crate::tui::approval::ApprovalMode::Suggest,
)
.await;
let mut saw_complete = false;
let mut saw_turn_complete = false;
let mut rx = handle.rx_event.write().await;
while let Some(event) = rx.recv().await {
match event {
Event::ApprovalRequired { .. } => {
panic!("Plan mode shell should be blocked before approval");
}
Event::ToolCallComplete { name, result, .. } => {
saw_complete = true;
assert_eq!(name, "exec_shell");
let err = result.expect_err("plan shell should fail");
assert!(
err.to_string().contains("unavailable in Plan mode"),
"{err}"
);
}
Event::TurnComplete { status, .. } => {
saw_turn_complete = true;
assert_eq!(status, TurnOutcomeStatus::Failed);
break;
}
_ => {}
}
}
assert!(saw_complete);
assert!(saw_turn_complete);
}
#[test]
fn deferred_tool_preflight_skips_already_active_tools() {
let mut tool = api_tool("deferred_tool");
tool.defer_loading = Some(true);
let catalog = vec![tool];
let mut active = HashSet::from(["deferred_tool".to_string()]);
assert!(
preflight_requested_deferred_tool("deferred_tool", &json!({}), &catalog, &mut active,)
.is_none(),
"already active tools should execute normally"
);
}
#[test]
fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let registry = engine
.build_turn_tool_registry_builder(
AppMode::Plan,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(AppMode::Plan, false));
assert!(registry.contains("read_file"));
assert!(registry.contains("list_dir"));
assert!(!registry.contains("write_file"));
assert!(!registry.contains("edit_file"));
assert!(!registry.contains("exec_shell"));
assert!(!registry.contains("exec_shell_wait"));
assert!(!registry.contains("exec_shell_interact"));
assert!(!registry.contains("task_shell_start"));
assert!(!registry.contains("task_create"));
assert!(!registry.contains("task_gate_run"));
assert!(!registry.contains("rlm"));
assert!(!registry.contains("fim_edit"));
assert!(registry.contains("update_plan"));
assert!(registry.contains("create_goal"));
assert!(registry.contains("get_goal"));
assert!(registry.contains("update_goal"));
assert!(registry.contains("task_list"));
assert!(registry.contains("task_read"));
assert!(registry.contains("handle_read"));
let plan_state_tools = [
"checklist_add",
"checklist_update",
"checklist_write",
"todo_add",
"todo_update",
"todo_write",
"update_plan",
];
let mut write_or_exec_tools: Vec<String> = registry
.all()
.into_iter()
.filter(|tool| !plan_state_tools.contains(&tool.name()))
.filter(|tool| {
let capabilities = tool.capabilities();
capabilities.contains(&ToolCapability::WritesFiles)
|| capabilities.contains(&ToolCapability::ExecutesCode)
})
.map(|tool| tool.name().to_string())
.collect();
write_or_exec_tools.sort();
assert!(
write_or_exec_tools.is_empty(),
"Plan mode must not register file-writing or code-execution tools: {write_or_exec_tools:?}"
);
}
/// Plan mode toggle must not change the byte representation of the tool
/// catalog head. DeepSeek's KV prefix cache includes the tools array in
/// the immutable prefix; if toggling between Plan and Agent mode changes
/// the tool bytes, every mode switch forces a full re-prefill.
///
/// This test verifies two invariants:
/// 1. Building the catalog twice for the same mode produces identical bytes.
/// 2. The head of the catalog (non-deferred tools) preserves its order
/// when deferred tools are activated mid-session.
#[test]
fn plan_mode_toggle_preserves_catalog_byte_stability() {
let always_load = HashSet::new();
// Build catalog for Plan mode twice — must be byte-identical.
let plan_native = vec![
api_tool("read_file"),
api_tool("list_dir"),
api_tool("write_file"),
api_tool("edit_file"),
api_tool("exec_shell"),
];
let plan_mcp = vec![api_tool("mcp_search"), api_tool("mcp_write")];
let catalog_a = build_model_tool_catalog(
plan_native.clone(),
plan_mcp.clone(),
AppMode::Plan,
&always_load,
);
let catalog_b = build_model_tool_catalog(
plan_native.clone(),
plan_mcp.clone(),
AppMode::Plan,
&always_load,
);
let json_a = serde_json::to_string(&catalog_a).unwrap();
let json_b = serde_json::to_string(&catalog_b).unwrap();
assert_eq!(
json_a, json_b,
"building the catalog twice for Plan mode must produce identical bytes"
);
// Build catalog for Agent mode twice — must be byte-identical.
let agent_catalog_a = build_model_tool_catalog(
plan_native.clone(),
plan_mcp.clone(),
AppMode::Agent,
&always_load,
);
let agent_catalog_b = build_model_tool_catalog(
plan_native.clone(),
plan_mcp.clone(),
AppMode::Agent,
&always_load,
);
let agent_json_a = serde_json::to_string(&agent_catalog_a).unwrap();
let agent_json_b = serde_json::to_string(&agent_catalog_b).unwrap();
assert_eq!(
agent_json_a, agent_json_b,
"building the catalog twice for Agent mode must produce identical bytes"
);
// Verify that the non-deferred tools that are common to both modes
// appear in the same order. Plan mode excludes execution tools, but
// the tools that are present in both modes must have stable ordering.
let plan_names: Vec<&str> = catalog_a
.iter()
.filter(|t| !t.defer_loading.unwrap_or(false))
.map(|t| t.name.as_str())
.collect();
let agent_names: Vec<&str> = agent_catalog_a
.iter()
.filter(|t| !t.defer_loading.unwrap_or(false))
.map(|t| t.name.as_str())
.collect();
// The common prefix of non-deferred tools must be identical.
let common_len = plan_names.len().min(agent_names.len());
assert_eq!(
&plan_names[..common_len],
&agent_names[..common_len],
"non-deferred tools common to Plan and Agent must appear in the same order"
);
// Verify that activating a deferred tool mid-session appends to the
// tail without reordering the head.
let mut tools_with_deferred = plan_native.clone();
tools_with_deferred.push({
let mut t = api_tool("deferred_search");
t.defer_loading = Some(true);
t
});
let catalog_with_deferred = build_model_tool_catalog(
tools_with_deferred,
plan_mcp.clone(),
AppMode::Agent,
&always_load,
);
// Activate the deferred tool.
let mut active: HashSet<String> = catalog_with_deferred
.iter()
.filter(|t| !t.defer_loading.unwrap_or(false))
.map(|t| t.name.clone())
.collect();
active.insert("deferred_search".to_string());
let listed = active_tools_for_step(&catalog_with_deferred, &active, false);
let listed_names: Vec<&str> = listed.iter().map(|t| t.name.as_str()).collect();
// The head (non-deferred tools) must still be in their original order.
let head_names: Vec<&str> = catalog_with_deferred
.iter()
.filter(|t| !t.defer_loading.unwrap_or(false))
.map(|t| t.name.as_str())
.collect();
assert!(
listed_names.starts_with(&head_names),
"activating a deferred tool must not reorder the catalog head: \
expected {head_names:?} as prefix, got {listed_names:?}"
);
// The deferred tool must be at the tail.
assert_eq!(
listed_names.last(),
Some(&"deferred_search"),
"deferred tool must be appended at the tail"
);
}
#[test]
fn parent_turn_registry_includes_goal_tools_for_all_modes() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
for mode in [AppMode::Plan, AppMode::Agent, AppMode::Yolo] {
let registry = engine
.build_turn_tool_registry_builder(
mode,
engine.config.todos.clone(),
engine.config.plan_state.clone(),
)
.build(engine.build_tool_context(mode, false));
for name in ["create_goal", "get_goal", "update_goal"] {
assert!(
registry.contains(name),
"parent {mode:?} registry should expose {name}"
);
}
}
}
#[test]
fn agent_mode_can_build_auto_approved_tool_context() {
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
assert!(
!engine
.build_tool_context(AppMode::Agent, false)
.auto_approve
);
assert!(engine.build_tool_context(AppMode::Agent, true).auto_approve);
assert!(engine.build_tool_context(AppMode::Yolo, false).auto_approve);
}
#[test]
fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
// Regression for #273: the seatbelt-default policy denies all outbound
// network (including DNS), which broke `curl`, `yt-dlp`, package managers,
// and similar shell commands in Agent mode. Elevation must include
// network access so the application-level NetworkPolicy stays the only
// outbound boundary.
let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
let agent_ctx = engine.build_tool_context(AppMode::Agent, false);
let agent_policy = agent_ctx
.elevated_sandbox_policy
.as_ref()
.expect("Agent mode should elevate the sandbox policy");
assert!(
agent_policy.has_network_access(),
"Agent mode must allow shell network access; got {agent_policy:?}",
);
let yolo_ctx = engine.build_tool_context(AppMode::Yolo, false);
let yolo_policy = yolo_ctx
.elevated_sandbox_policy
.as_ref()
.expect("Yolo mode should elevate the sandbox policy");
assert!(yolo_policy.has_network_access());
// v0.8.11: YOLO drops to DangerFullAccess (no sandbox) so the user
// is not bounced through approval round-trips for legitimate
// outside-workspace writes (package installs, sub-agent
// workspaces, ~/.cache mutations, etc.). YOLO is opt-in and
// already enables trust mode + auto-approve; the sandbox was the
// last guardrail and contradicts the contract.
assert!(
matches!(yolo_policy, crate::sandbox::SandboxPolicy::DangerFullAccess),
"Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
);
// Plan mode (#1077): the sandbox must actually deny workspace writes.
// The previous WorkspaceWrite-with-empty-network policy whitelisted the
// workspace as writable, so `python -c "open('f','w').write('x')"`
// mutated files inside the workspace despite Plan-mode's intent. Lock
// it to ReadOnly: no writes anywhere, no network. The shell tool stays
// exposed for read-only inspection (`ls`, `git log`, `grep`, …) and
// the per-platform sandbox enforces the rest.
let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
let plan_policy = plan_ctx
.elevated_sandbox_policy
.as_ref()
.expect("Plan mode should make the shell sandbox policy explicit");
assert!(
matches!(plan_policy, crate::sandbox::SandboxPolicy::ReadOnly),
"Plan mode must use ReadOnly sandbox to deny workspace writes (#1077); got {plan_policy:?}",
);
assert!(!plan_policy.has_network_access());
assert!(!plan_policy.has_full_disk_write_access());
assert!(
plan_policy
.get_writable_roots(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
.is_empty(),
"ReadOnly policy must enumerate zero writable roots; got {plan_policy:?}",
);
assert!(
plan_ctx
.shell_network_denied_hint
.as_deref()
.is_some_and(|hint| hint.contains("Plan mode") && hint.contains("read-only")),
);
}
#[test]
fn sandbox_policy_for_mode_returns_correct_policy_per_mode() {
use super::tool_setup::sandbox_policy_for_mode;
use crate::sandbox::SandboxPolicy;
let workspace = PathBuf::from("/tmp/example-workspace");
// Plan: ReadOnly. The whole point of #1077.
assert!(matches!(
sandbox_policy_for_mode(AppMode::Plan, &workspace),
SandboxPolicy::ReadOnly
));
// Agent: WorkspaceWrite with workspace as writable root, network on.
match sandbox_policy_for_mode(AppMode::Agent, &workspace) {
SandboxPolicy::WorkspaceWrite {
writable_roots,
network_access,
..
} => {
assert_eq!(writable_roots, vec![workspace.clone()]);
assert!(network_access, "Agent mode must allow shell network access");
}
other => panic!("Agent mode should be WorkspaceWrite; got {other:?}"),
}
// YOLO: DangerFullAccess.
assert!(matches!(
sandbox_policy_for_mode(AppMode::Yolo, &workspace),
SandboxPolicy::DangerFullAccess
));
}
#[tokio::test]
async fn session_update_preserves_reasoning_tool_only_turn() {
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
let assistant = Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "Need a tool before answering.".to_string(),
},
ContentBlock::ToolUse {
id: "tool-1".to_string(),
name: "read_file".to_string(),
input: json!({"path": "Cargo.toml"}),
caller: None,
},
],
};
engine.add_session_message(assistant.clone()).await;
let event = {
let mut rx = handle.rx_event.write().await;
rx.recv().await.expect("session update event")
};
let Event::SessionUpdated { messages, .. } = event else {
panic!("expected session update event");
};
assert_eq!(messages, vec![assistant]);
}
#[tokio::test]
async fn set_model_reloads_instruction_sources_and_updates_session_prompt() {
let tmp = tempdir().expect("tempdir");
let instructions = tmp.path().join("instructions.md");
fs::write(&instructions, "FLASH_INSTRUCTIONS_MARKER").expect("write instructions");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
model: "deepseek-v4-flash".to_string(),
instructions: vec![instructions.clone().into()],
..Default::default()
};
let (engine, handle) = Engine::new(config, &Config::default());
fs::write(&instructions, "PRO_INSTRUCTIONS_MARKER").expect("rewrite instructions");
let run = tokio::spawn(engine.run());
handle
.send(Op::SetModel {
model: "deepseek-v4-pro".to_string(),
mode: AppMode::Agent,
})
.await
.expect("send set model");
let (model, prompt) = {
let mut rx = handle.rx_event.write().await;
loop {
let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
.await
.expect("session update after model switch")
.expect("event");
if let Event::SessionUpdated {
model,
system_prompt,
..
} = event
{
let prompt = match system_prompt.expect("system prompt") {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(blocks) => blocks
.into_iter()
.map(|block| block.text)
.collect::<Vec<_>>()
.join("\n"),
};
break (model, prompt);
}
}
};
run.abort();
assert_eq!(model, "deepseek-v4-pro");
assert!(prompt.contains("PRO_INSTRUCTIONS_MARKER"));
assert!(!prompt.contains("FLASH_INSTRUCTIONS_MARKER"));
}
#[tokio::test]
async fn change_mode_refreshes_session_prompt_and_updates_session() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
model: "deepseek-v4-pro".to_string(),
..Default::default()
};
let (engine, handle) = Engine::new(config, &Config::default());
let run = tokio::spawn(engine.run());
handle
.send(Op::ChangeMode {
mode: AppMode::Yolo,
})
.await
.expect("send change mode");
let (_prompt, messages) = {
let mut rx = handle.rx_event.write().await;
loop {
let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
.await
.expect("session update after mode switch")
.expect("event");
if let Event::SessionUpdated {
system_prompt,
messages,
..
} = event
{
let prompt = match system_prompt.expect("system prompt") {
SystemPrompt::Text(text) => text,
SystemPrompt::Blocks(blocks) => blocks
.into_iter()
.map(|block| block.text)
.collect::<Vec<_>>()
.join("\n"),
};
break (prompt, messages);
}
}
};
run.abort();
assert!(
messages.iter().all(|message| message.role != "system"),
"mode switch must not persist appended system messages: {messages:?}"
);
assert!(
messages.iter().all(|message| {
message.content.iter().all(|block| {
!matches!(
block,
ContentBlock::Text { text, .. }
if text.contains("<runtime_prompt")
)
})
}),
"runtime prompt tags should be request-time metadata, not session history"
);
}
#[test]
fn turn_approval_mode_prefers_auto_approve_flag() {
use crate::tui::approval::ApprovalMode;
assert_eq!(
agent_approval_mode_for_turn(true, ApprovalMode::Suggest),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(
AppMode::Agent,
agent_approval_mode_for_turn(true, ApprovalMode::Never),
),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(AppMode::Yolo, ApprovalMode::Suggest),
ApprovalMode::Auto
);
assert_eq!(
approval_mode_for(AppMode::Plan, ApprovalMode::Auto),
ApprovalMode::Never
);
}
#[test]
fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
use crate::tui::approval::ApprovalMode;
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.current_mode = AppMode::Plan;
engine.session.approval_mode = ApprovalMode::Suggest;
engine.session.messages = vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "summary after compaction".to_string(),
cache_control: None,
}],
}]
.into();
let stored = engine.session.messages.clone();
let request_messages = engine.messages_with_turn_metadata();
assert_eq!(&*engine.session.messages, &*stored);
assert_eq!(request_messages.len(), stored.len() + 1);
assert!(
request_messages
.iter()
.all(|message| message.role != "system"),
"runtime prompts must not create appended system messages"
);
let runtime = request_messages.last().expect("runtime prompt message");
assert_eq!(runtime.role, "user");
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
else {
panic!("expected text runtime prompt");
};
assert!(text.contains("<runtime_prompt"));
assert!(text.contains("mode=\"plan\""));
assert!(
text.contains("approval=\"never\""),
"Plan mode should project its fixed never-approval policy: {text}"
);
}
#[tokio::test]
async fn change_mode_op_updates_current_mode_and_emits_status() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
model: "deepseek-v4-pro".to_string(),
..Default::default()
};
let (engine, handle) = Engine::new(config, &Config::default());
let run = tokio::spawn(engine.run());
handle
.send(Op::ChangeMode {
mode: AppMode::Yolo,
})
.await
.expect("send change mode");
// Expect a SessionUpdated event confirming the mode change (the
// per-turn <runtime_prompt> tag carries the mode in every request,
// so no separate persistence of a mode_change runtime event is needed).
let mut rx = handle.rx_event.write().await;
let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("session update after mode switch")
.expect("event");
let Event::SessionUpdated { messages, .. } = session_updated else {
panic!("should emit SessionUpdated after mode change, got: {session_updated:?}");
};
assert!(
messages.iter().all(|message| {
message.content.iter().all(|block| {
!matches!(
block,
ContentBlock::Text { text, .. }
if text.contains("<runtime_prompt")
)
})
}),
"runtime prompt tags must not be persisted into session messages after mode change"
);
// Also expect a status event
let status = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
.await
.expect("status after mode switch")
.expect("event");
assert!(
matches!(status, Event::Status { .. }),
"should emit Status after mode change, got: {status:?}"
);
run.abort();
}
#[test]
fn detects_context_length_errors_from_provider_payloads() {
let msg = r#"SSE stream request failed: HTTP 400 Bad Request: {"error":{"message":"This model's maximum context length is 131072 tokens. However, you requested 153056 tokens (148960 in the messages, 4096 in the completion).","type":"invalid_request_error"}}"#;
assert!(is_context_length_error_message(msg));
assert!(!is_context_length_error_message(
"SSE stream request failed: HTTP 400 Bad Request: model not found"
));
}
#[test]
fn context_budget_reserves_output_and_headroom() {
// Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
// the internal effective_max_output_tokens() call sees a stable env.
let _lock = lock_test_env();
// V4 has a 1M context window — the only family that comfortably hosts
// a 256K output reservation without saturating the input budget to 0.
let budget = context_input_budget("deepseek-v4-pro")
.expect("deepseek-v4-pro should have a known context window");
let v4_window: usize = 1_000_000;
let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
assert_eq!(budget, expected);
}
#[test]
fn effective_max_output_tokens_caps_api_request_for_large_window_models() {
// Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
// v4_cap and flash_cap below see the same env state.
let _lock = lock_test_env();
// V4 models have a 1M context window but the API request cap must stay
// well below common provider limits (e.g., 131K total on self-hosted
// vLLM/SGLang). The cap should never exceed 65K.
let v4_cap = effective_max_output_tokens("deepseek-v4-pro");
assert!(
v4_cap <= 65_536,
"V4 API request cap should be ≤64K, got {v4_cap}"
);
assert!(
v4_cap > 0,
"V4 API request cap should be positive, got {v4_cap}"
);
let flash_cap = effective_max_output_tokens("deepseek-v4-flash");
assert_eq!(v4_cap, flash_cap);
}
struct ScopedDeepSeekMaxOutputTokens {
previous: Option<OsString>,
}
impl ScopedDeepSeekMaxOutputTokens {
fn set(value: &str) -> Self {
let previous = std::env::var_os("DEEPSEEK_MAX_OUTPUT_TOKENS");
// Safety: tests using this helper serialize with lock_test_env() and
// restore the original value in Drop.
unsafe {
std::env::set_var("DEEPSEEK_MAX_OUTPUT_TOKENS", value);
}
Self { previous }
}
fn unset() -> Self {
let previous = std::env::var_os("DEEPSEEK_MAX_OUTPUT_TOKENS");
// Safety: see set().
unsafe {
std::env::remove_var("DEEPSEEK_MAX_OUTPUT_TOKENS");
}
Self { previous }
}
}
impl Drop for ScopedDeepSeekMaxOutputTokens {
fn drop(&mut self) {
// Safety: tests using this helper serialize with lock_test_env().
unsafe {
if let Some(previous) = self.previous.take() {
std::env::set_var("DEEPSEEK_MAX_OUTPUT_TOKENS", previous);
} else {
std::env::remove_var("DEEPSEEK_MAX_OUTPUT_TOKENS");
}
}
}
}
#[test]
fn effective_max_output_tokens_env_override_returns_positive_value() {
let _lock = lock_test_env();
let _guard = ScopedDeepSeekMaxOutputTokens::set("16384");
// Override applies regardless of model — V4 hosted, V4 flash, sub-500K
// self-hosted all return the env value verbatim.
assert_eq!(effective_max_output_tokens("deepseek-v4-pro"), 16_384);
assert_eq!(effective_max_output_tokens("deepseek-v4-flash"), 16_384);
assert_eq!(effective_max_output_tokens("qwen3-32b-256k"), 16_384);
}
#[test]
fn effective_max_output_tokens_env_override_rejects_zero_and_invalid() {
let _lock = lock_test_env();
// Establish the heuristic baseline with the env unset.
let baseline = {
let _guard = ScopedDeepSeekMaxOutputTokens::unset();
effective_max_output_tokens("deepseek-v4-pro")
};
assert!(baseline > 0);
// 0, non-numeric, and empty values must all fall through to the heuristic
// rather than producing a zero/garbage cap that would silently break
// request budgeting.
for raw in ["0", "abc", "", " ", "-1"] {
let _guard = ScopedDeepSeekMaxOutputTokens::set(raw);
assert_eq!(
effective_max_output_tokens("deepseek-v4-pro"),
baseline,
"env={raw:?} should fall through to heuristic"
);
}
}
#[test]
fn internal_context_budget_tiers_reserved_output_by_window() {
// Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
// both branches below see a stable env.
let _lock = lock_test_env();
// Large-context (>=500K) models reserve the full TURN_MAX_OUTPUT_TOKENS
// headroom so long V4 sessions don't compact prematurely.
let internal_budget =
context_input_budget("deepseek-v4-pro").expect("V4 should have a known context window");
let v4_window: usize = 1_000_000;
let expected_internal = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
assert_eq!(internal_budget, expected_internal);
// Sub-500K windows cross into the effective-cap branch: a 256K self-hosted
// deployment must yield a usable positive budget rather than None. The
// previous formula reserved the full 262K and computed 256K - 262K - 1K,
// which underflowed to None and silently disabled preflight/recovery.
let small_window_budget = context_input_budget("qwen3-32b-256k")
.expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
let effective_output = effective_max_output_tokens("qwen3-32b-256k") as usize;
let expected_small = 256_000 - effective_output - 1_024;
assert_eq!(small_window_budget, expected_small);
}
#[test]
fn v4_tool_outputs_keep_large_file_reads_in_context() {
let content = "0123456789abcdef\n".repeat(2_000);
let output = ToolResult::success(content.clone());
let v4_context = compact_tool_result_for_context("deepseek-v4-pro", "exec_shell", &output);
assert_eq!(v4_context, content.trim());
let legacy_context =
compact_tool_result_for_context("deepseek-v3.2-128k", "exec_shell", &output);
assert!(legacy_context.contains("output compacted to protect context"));
assert!(legacy_context.len() < v4_context.len());
}
#[test]
fn subagent_results_are_summarized_before_parent_context_insertion() {
let long_result = "verified detail\n".repeat(1_000);
let output = ToolResult::success(
json!({
"agent_id": "agent_1234abcd",
"agent_type": "explore",
"assignment": {
"objective": "Inspect the RLM rendering path and report the smallest fix."
},
"model": "deepseek-v4-flash",
"status": "Completed",
"result": long_result,
"steps_taken": 12,
"duration_ms": 3456
})
.to_string(),
);
let context = compact_tool_result_for_context("deepseek-v4-pro", "agent_eval", &output);
assert!(context.contains("[sub-agent result summarized for parent context]"));
assert!(context.contains("agent_1234abcd (explore) status=Completed"));
assert!(context.contains("Inspect the RLM rendering path"));
assert!(context.contains("steps=12"));
assert!(context.len() < output.content.len());
assert!(context.contains("self-report"));
assert!(context.contains("verify side effects"));
assert!(context.contains("read_file") && context.contains("list_dir"));
assert!(context.contains("handle_read"));
}
#[test]
fn run_verifiers_results_are_structured_before_context_insertion() {
let noisy_failure = "node lint failure detail\n".repeat(300);
let noisy_success = "successful check output\n".repeat(300);
let output = ToolResult::success(
json!({
"success": false,
"profile": "auto",
"level": "quick",
"workspace": "/repo",
"gate_count": 3,
"passed": 1,
"failed": 1,
"skipped": 1,
"summary": "1 passed, 1 failed, 1 skipped",
"gates": [
{
"name": "rust-check",
"ecosystem": "rust",
"status": "passed",
"command": "cargo check --workspace --locked",
"cwd": "/repo",
"exit_code": 0,
"duration_ms": 110,
"stdout": noisy_success.clone(),
"stderr": "",
"stdout_truncated": false,
"stderr_truncated": false,
"skipped_reason": null
},
{
"name": "node-lint",
"ecosystem": "node",
"status": "failed",
"command": "npm run lint",
"cwd": "/repo",
"exit_code": 1,
"duration_ms": 220,
"stdout": "",
"stderr": noisy_failure,
"stdout_truncated": false,
"stderr_truncated": false,
"skipped_reason": null
},
{
"name": "python-pytest",
"ecosystem": "python",
"status": "skipped",
"command": "",
"cwd": "/repo",
"exit_code": null,
"duration_ms": 0,
"stdout": "",
"stderr": "",
"stdout_truncated": false,
"stderr_truncated": false,
"skipped_reason": "pytest is not installed"
}
]
})
.to_string(),
);
let context = compact_tool_result_for_context("deepseek-v4-pro", "run_verifiers", &output);
assert!(context.contains("[run_verifiers result summarized for context]"));
assert!(context.contains("summary: 1 passed, 1 failed, 1 skipped"));
assert!(context.contains("selection: profile=auto, level=quick"));
assert!(context.contains("- node-lint (node): failed exit=1"));
assert!(context.contains("command: npm run lint"));
assert!(context.contains("- python-pytest (python): skipped"));
assert!(context.contains("pytest is not installed"));
assert!(context.contains("- rust-check (rust): passed exit=0"));
assert!(context.len() < output.content.len());
assert!(
!context.contains(&noisy_success),
"successful gate stdout should not be copied into parent context"
);
}
#[test]
fn run_tests_results_are_structured_before_context_insertion() {
let stdout = "running test suite\n".repeat(500);
let stderr = "error[E0425]: cannot find value `missing`\n".repeat(500);
let output = ToolResult::success(
json!({
"success": false,
"exit_code": 101,
"stdout": stdout,
"stderr": stderr,
"command": "(cd /repo && cargo test --workspace --all-features)"
})
.to_string(),
);
let context = compact_tool_result_for_context("deepseek-v4-pro", "run_tests", &output);
assert!(context.contains("[run_tests result summarized for context]"));
assert!(context.contains("status: failed, exit_code: 101"));
assert!(context.contains("cargo test --workspace --all-features"));
assert!(context.contains("error[E0425]"));
assert!(context.contains("running test suite"));
assert!(context.len() < output.content.len());
}
#[test]
fn task_gate_run_results_are_structured_before_context_insertion() {
let output = ToolResult::success(
json!({
"gate": {
"id": "gate_abcd1234",
"gate": "clippy",
"command": "cargo clippy -p codewhale-tui --all-targets --all-features --locked -- -D warnings",
"cwd": "/repo",
"exit_code": 1,
"status": "failed",
"classification": "compile_failure",
"duration_ms": 5000,
"summary": "warning promoted to error in verifier.rs",
"log_path": "/repo/.codewhale/runtime/gate.log",
"recorded_at": "2026-06-01T12:00:00Z"
},
"stdout_summary": "",
"stderr_summary": "warning promoted to error"
})
.to_string(),
);
let context = compact_tool_result_for_context("deepseek-v4-pro", "task_gate_run", &output);
assert!(context.contains("[task_gate_run result summarized for context]"));
assert!(context.contains("gate: clippy, status: failed, exit_code: 1"));
assert!(context.contains("cargo clippy -p codewhale-tui"));
assert!(context.contains("summary: warning promoted to error"));
assert!(context.contains("log_path: /repo/.codewhale/runtime/gate.log"));
}
#[test]
fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
engine.refresh_system_prompt();
let prompt = match &engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text.clone(),
Some(SystemPrompt::Blocks(blocks)) => blocks
.iter()
.map(|block| block.text.as_str())
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}
#[test]
fn working_set_reaches_model_as_turn_metadata() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
let user_msg =
engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string());
engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let last_block = messages
.first()
.and_then(|message| message.content.last())
.expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
assert!(text.starts_with("<turn_meta>\n"));
assert!(text.contains(WORKING_SET_SUMMARY_MARKER));
assert!(text.contains("src/lib.rs"));
}
#[test]
fn turn_metadata_includes_current_local_date_without_working_set() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
model: "deepseek-v4-flash".to_string(),
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string());
engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let last_block = messages
.first()
.and_then(|message| message.content.last())
.expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
assert!(text.starts_with("<turn_meta>\n"));
assert!(text.contains(&format!("Current local date: {today}")));
assert!(text.contains("Current model: deepseek-v4-flash"));
}
#[test]
fn turn_metadata_includes_auto_model_route() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let user_msg = engine.user_text_message_with_turn_metadata_for_route(
"debug this regression".to_string(),
AppMode::Agent,
"deepseek-v4-pro",
true,
Some("max"),
true,
);
let last_block = user_msg.content.last().expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
assert!(text.contains("Current model: deepseek-v4-pro"));
assert!(text.contains("Auto model route: deepseek-v4-pro"));
assert!(text.contains("Auto reasoning effort: max"));
assert!(!text.contains("debug this regression"));
}
#[test]
fn turn_metadata_includes_current_mode() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let user_msg = engine.user_text_message_with_turn_metadata_for_route(
"test mode metadata".to_string(),
AppMode::Yolo,
"deepseek-v4-flash",
false,
None,
false,
);
// turn_meta was relocated to the tail of the user message in #2517
// to keep the leading bytes (user input) stable across date / model
// route / working-set changes.
let last_block = user_msg.content.last().expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
assert!(
text.contains("Current mode: YOLO mode - full tool access without approvals"),
"turn metadata should include the current mode label, got: {text}"
);
}
#[test]
fn turn_metadata_mode_updates_with_change_mode_op() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
// In agent mode by default. The turn_meta block now sits at the
// *tail* of the user message (see #2517) so we read `content.last()`.
let msg = engine.user_text_message_with_turn_metadata("hello".to_string());
let last_block = msg.content.last().expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
assert!(
text.contains("Agent mode"),
"initial mode should be Agent, got: {text}"
);
// Switch to YOLO — user_text_message_with_turn_metadata should reflect the new mode
engine.current_mode = AppMode::Yolo;
let msg = engine.user_text_message_with_turn_metadata("hello again".to_string());
let last_block = msg.content.last().expect("turn metadata block");
let ContentBlock::Text { text, .. } = last_block else {
panic!("expected text metadata block");
};
assert!(
text.contains("YOLO mode"),
"mode after change should be YOLO, got: {text}"
);
}
#[test]
fn current_mode_field_assignment_takes_effect_synchronously() {
// Basic unit-level invariant: the current_mode field mutates as expected
// and the per-turn <runtime_prompt> tag reflects the current mode.
// Op::ChangeMode dispatch through the run loop is exercised by the
// integration test change_mode_op_updates_current_mode_and_emits_status.
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
model: "deepseek-v4-pro".to_string(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
assert_eq!(engine.current_mode, AppMode::Agent);
// Verify runtime tag in Agent mode
let agent_messages = engine.messages_with_turn_metadata();
let agent_tag = agent_messages.last().expect("runtime tag message");
let ContentBlock::Text {
text: agent_text, ..
} = agent_tag.content.first().expect("text block")
else {
panic!("expected text runtime tag in Agent mode");
};
assert!(
agent_text.contains("mode=\"agent\""),
"Agent mode should produce runtime tag with mode=\"agent\", got: {agent_text}"
);
// Switch to YOLO
engine.current_mode = AppMode::Yolo;
assert_eq!(engine.current_mode, AppMode::Yolo);
// Verify runtime tag reflects the YOLO mode with auto approval
let yolo_messages = engine.messages_with_turn_metadata();
let yolo_tag = yolo_messages.last().expect("runtime tag message");
let ContentBlock::Text {
text: yolo_text, ..
} = yolo_tag.content.first().expect("text block")
else {
panic!("expected text runtime tag in YOLO mode");
};
assert!(
yolo_text.contains("mode=\"yolo\""),
"YOLO mode should produce runtime tag with mode=\"yolo\", got: {yolo_text}"
);
assert!(
yolo_text.contains("approval=\"auto\""),
"YOLO mode should project auto approval in runtime tag, got: {yolo_text}"
);
}
#[test]
fn user_text_message_keeps_current_turn_input_after_turn_metadata() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let user_msg =
engine.user_text_message_with_turn_metadata("explain the cache metrics".to_string());
// User text is now at position 0, turn_meta at position 1.
let first_text = user_msg
.content
.iter()
.find_map(|block| {
if let ContentBlock::Text { text, .. } = block {
Some(text.as_str())
} else {
None
}
})
.expect("user text block");
assert_eq!(first_text, "explain the cache metrics");
}
#[test]
fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("inspect src/lib.rs", tmp.path());
let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
engine.session.add_message(first_user.clone());
let first_request = engine.messages_with_turn_metadata();
assert_eq!(
&first_request[..engine.session.messages.len()],
&engine.session.messages[..]
);
assert_eq!(first_request.len(), engine.session.messages.len() + 1);
assert_eq!(first_request.first(), Some(&first_user));
assert_eq!(
first_request.last().map(|message| message.role.as_str()),
Some("user")
);
engine.session.add_message(Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Text {
text: "I inspected it.".to_string(),
cache_control: None,
}],
});
engine
.session
.working_set
.observe_user_message("now summarize it", tmp.path());
let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string());
engine.session.add_message(second_user);
let second_request = engine.messages_with_turn_metadata();
assert_eq!(
&second_request[..engine.session.messages.len()],
&engine.session.messages[..]
);
assert_eq!(second_request.len(), engine.session.messages.len() + 1);
assert_eq!(second_request.first(), Some(&first_user));
let runtime = second_request.last().expect("runtime prompt");
let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
else {
panic!("expected runtime prompt text");
};
assert!(text.contains("<runtime_prompt"));
}
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
/// the wire but are stored as role="user" internally. `<turn_meta>` must
/// be stored only on actual user-text messages. Request-time runtime metadata
/// is appended separately and must not mutate tool-result messages.
#[test]
fn turn_metadata_skips_tool_result_messages() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("inspect src/lib.rs", tmp.path());
// Real user message — should be eligible for injection.
let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
engine.session.add_message(user_msg);
// Assistant tool-call.
engine.session.add_message(Message {
role: "assistant".to_string(),
content: vec![ContentBlock::ToolUse {
id: "call_42".to_string(),
name: "read_file".to_string(),
input: serde_json::json!({"path": "src/lib.rs"}),
caller: None,
}],
});
// Tool result, stored as role="user" internally.
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "call_42".to_string(),
content: "pub fn sample() {}".to_string(),
is_error: None,
content_blocks: None,
}],
});
let messages = engine.messages_with_turn_metadata();
// The stored trailing message is the tool result and MUST be untouched —
// no Text block sneaking in front of the ToolResult block.
let trailing = messages
.get(messages.len().saturating_sub(2))
.expect("stored trailing message");
assert_eq!(trailing.role, "user");
assert_eq!(trailing.content.len(), 1);
assert!(matches!(
trailing.content.first(),
Some(ContentBlock::ToolResult { .. })
));
// The earlier real user message carries user text first, turn_meta last.
let real_user = messages.first().expect("first user message");
assert_eq!(real_user.role, "user");
let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
else {
panic!("expected Text block on real user message");
};
assert_eq!(text, "inspect src/lib.rs");
// turn_meta is at the tail of the content array.
let last_block = real_user.content.last().expect("turn_meta block");
let ContentBlock::Text { text: meta, .. } = last_block else {
panic!("expected Text block for turn_meta at tail");
};
assert!(meta.starts_with("<turn_meta>\n"));
assert!(meta.contains("src/lib.rs"));
assert!(
matches!(
messages.last().and_then(|message| message.content.first()),
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
),
"request projection should append transient runtime metadata"
);
}
/// User text must appear before turn_meta in the content array so that
/// the leading bytes of each user message stay stable across date changes.
/// DeepSeek's KV prefix cache matches byte sequences from the start of
/// each message; placing the volatile date-bearing turn_meta at position
/// 0 would invalidate the entire user message prefix at every date
/// boundary. Moving it to the tail preserves the user-input prefix.
#[test]
fn user_message_turn_meta_is_appended_not_prepended() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let msg = engine.user_text_message_with_turn_metadata("hello world".to_string());
assert_eq!(msg.role, "user");
assert_eq!(msg.content.len(), 2);
// First content block: user text.
let ContentBlock::Text { text, .. } = &msg.content[0] else {
panic!("expected Text block at position 0");
};
assert_eq!(text, "hello world");
// Second content block: turn_meta.
let ContentBlock::Text { text: meta, .. } = &msg.content[1] else {
panic!("expected Text block for turn_meta at position 1");
};
assert!(
meta.starts_with("<turn_meta>\n"),
"turn_meta must be at the tail"
);
assert!(
meta.contains("Current local date:"),
"turn_meta must contain the date"
);
}
/// When the turn is mid-execution and the trailing user message is a
/// tool result, no turn_meta is injected into that tool-result message. The
/// working_set surfaces again on the next stored user-text message.
#[test]
fn turn_metadata_skips_when_only_tool_results_trail() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("inspect src/lib.rs", tmp.path());
// Only a tool-result message in history — simulates the corner case
// where the prior real user message has already been compacted away
// but a tool-result is still pending. We must not retroactively
// inject.
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "call_42".to_string(),
content: "pub fn sample() {}".to_string(),
is_error: None,
content_blocks: None,
}],
});
let messages = engine.messages_with_turn_metadata();
// Stored tool-result message is unchanged: no Text prefix, content length == 1.
let only = messages.first().expect("stored tool result message");
assert_eq!(only.content.len(), 1);
assert!(matches!(
only.content.first(),
Some(ContentBlock::ToolResult { .. })
));
assert_eq!(messages.len(), 2);
assert!(
matches!(
messages.last().and_then(|message| message.content.first()),
Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
),
"request projection should still append transient runtime metadata"
);
}
#[test]
fn refresh_system_prompt_is_noop_when_unchanged() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.refresh_system_prompt();
let first_hash = engine.session.last_system_prompt_hash;
let first_prompt = engine.session.system_prompt.clone();
engine.refresh_system_prompt();
assert_eq!(engine.session.last_system_prompt_hash, first_hash);
assert_eq!(engine.session.system_prompt, first_prompt);
}
#[test]
fn engine_prompt_respects_hidden_thinking_config() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
locale_tag: "zh-Hans".to_string(),
show_thinking: false,
..Default::default()
};
let (engine, _handle) = Engine::new(config, &Config::default());
let prompt = match engine.session.system_prompt.as_ref() {
Some(SystemPrompt::Text(text)) => text,
Some(SystemPrompt::Blocks(_)) => panic!("expected text system prompt"),
None => panic!("expected system prompt"),
};
assert!(prompt.contains("## Hidden Thinking Language"));
assert!(prompt.contains("reasoning_content"));
assert!(prompt.contains("English"));
assert!(!prompt.contains("## 语言再次提醒"));
}
fn sync_runtime_system_prompt_override(engine: &mut Engine, system_prompt: SystemPrompt) {
engine.session.compaction_summary_prompt =
extract_compaction_summary_prompt(Some(system_prompt.clone()));
engine.session.system_prompt = Some(system_prompt);
engine.session.system_prompt_override = true;
}
#[test]
fn text_system_prompt_override_via_runtime_sync_survives_refresh() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
let prompt = SystemPrompt::Text("TANGERINE-7".to_string());
let expected = Some(prompt.clone());
sync_runtime_system_prompt_override(&mut engine, prompt);
engine.refresh_system_prompt();
assert_eq!(engine.session.system_prompt, expected);
}
#[test]
fn blocks_system_prompt_override_via_runtime_sync_survives_mode_change_refresh() {
let tmp = tempdir().expect("tempdir");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
let prompt = SystemPrompt::Blocks(vec![SystemBlock {
block_type: "text".to_string(),
text: "TANGERINE-7".to_string(),
cache_control: None,
}]);
let expected = Some(prompt.clone());
sync_runtime_system_prompt_override(&mut engine, prompt);
engine.refresh_system_prompt();
assert_eq!(engine.session.system_prompt, expected);
}
#[test]
fn compaction_summary_stays_in_stable_system_prompt() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/main.rs"), "fn main() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("continue in src/main.rs", tmp.path());
engine.refresh_system_prompt();
engine.merge_compaction_summary(Some(SystemPrompt::Blocks(vec![SystemBlock {
block_type: "text".to_string(),
text: format!("{COMPACTION_SUMMARY_MARKER}\nsummary"),
cache_control: None,
}])));
let prompt = match &engine.session.system_prompt {
Some(SystemPrompt::Text(text)) => text.clone(),
Some(SystemPrompt::Blocks(blocks)) => blocks
.iter()
.map(|block| block.text.as_str())
.collect::<Vec<_>>()
.join("\n"),
None => panic!("expected system prompt"),
};
assert!(prompt.contains(COMPACTION_SUMMARY_MARKER));
assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}
#[tokio::test]
async fn pre_request_refresh_skips_compaction_below_normal_threshold() {
let capacity = CapacityControllerConfig {
enabled: true,
low_risk_max: 0.0,
medium_risk_max: 1.0,
min_turns_before_guardrail: 0,
..Default::default()
};
let mut engine = build_engine_with_capacity(capacity.clone());
engine.config.capacity = capacity.clone();
engine.capacity_controller = CapacityController::new(capacity);
engine.turn_counter = 5;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
engine.session.model = "deepseek-v4-pro".to_string();
engine.config.model = "deepseek-v4-pro".to_string();
for i in 0..20 {
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!("small message {i}"),
cache_control: None,
}],
});
}
let before = engine.estimated_input_tokens();
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let applied = engine
.run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
.await;
let after = engine.estimated_input_tokens();
assert!(!applied);
assert_eq!(after, before);
assert_eq!(engine.session.messages.len(), before_len);
}
#[tokio::test]
async fn pre_request_refresh_invoked_when_medium_risk() {
let capacity = CapacityControllerConfig {
enabled: true,
low_risk_max: 0.0,
medium_risk_max: 1.0,
min_turns_before_guardrail: 0,
..Default::default()
};
let mut engine = build_engine_with_capacity(capacity.clone());
engine.config.capacity = capacity.clone();
engine.capacity_controller = CapacityController::new(capacity);
engine.turn_counter = 5;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
// Pin the model to an explicit 128k-context variant so the pressure ratio stays
// stable regardless of changes to the workspace-wide default model.
engine.session.model = "deepseek-v3.2-128k".to_string();
engine.config.model = "deepseek-v3.2-128k".to_string();
let long = "x".repeat(5_000);
for _ in 0..900 {
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: long.clone(),
cache_control: None,
}],
});
}
let before = engine.estimated_input_tokens();
let turn = TurnContext::new(10);
let applied = engine
.run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
.await;
let after = engine.estimated_input_tokens();
assert!(applied);
assert!(after < before);
}
#[tokio::test]
async fn post_tool_replay_invoked_when_high_non_severe_risk() {
let tmp = tempdir().expect("tempdir");
fs::write(tmp.path().join("sample.txt"), "hello replay").expect("write");
let capacity = CapacityControllerConfig {
enabled: true,
low_risk_max: 0.0,
medium_risk_max: 0.0,
severe_min_slack: -10.0,
severe_violation_ratio: 2.0,
min_turns_before_guardrail: 0,
..Default::default()
};
let mut engine = build_engine_with_capacity(capacity.clone());
engine.session.workspace = tmp.path().to_path_buf();
engine.config.workspace = tmp.path().to_path_buf();
engine.config.capacity = capacity.clone();
engine.capacity_controller = CapacityController::new(capacity);
engine.turn_counter = 4;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
let mut turn = TurnContext::new(10);
let mut tool_call = TurnToolCall::new(
"tool_read_1".to_string(),
"read_file".to_string(),
json!({ "path": "sample.txt" }),
);
tool_call.set_result(
"hello replay".to_string(),
std::time::Duration::from_millis(1),
);
turn.record_tool_call(tool_call);
let registry = ToolRegistryBuilder::new()
.with_read_only_file_tools()
.build(engine.build_tool_context(AppMode::Agent, false));
let restarted = engine
.run_capacity_post_tool_checkpoint(
&turn,
Some(&registry),
Arc::new(RwLock::new(())),
None,
0,
0,
)
.await;
assert!(!restarted);
let has_verification_note = engine.session.messages.iter().any(|msg| {
msg.content.iter().any(|block| match block {
ContentBlock::ToolResult { content, .. } => content.contains("[verification replay]"),
_ => false,
})
});
assert!(has_verification_note);
}
#[tokio::test]
async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
let _env_lock = CAPACITY_MEMORY_ENV_LOCK.lock().await;
let tmp = tempdir().expect("tempdir");
let _env = ScopedCapacityMemoryDir::set(tmp.path());
let capacity = CapacityControllerConfig {
enabled: true,
low_risk_max: 0.0,
medium_risk_max: 0.0,
min_turns_before_guardrail: 0,
..Default::default()
};
let mut engine = build_engine_with_capacity(capacity.clone());
engine.config.capacity = capacity.clone();
engine.capacity_controller = CapacityController::new(capacity);
engine.turn_counter = 6;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
for i in 0..10 {
engine.session.messages.push(Message {
role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(),
content: vec![ContentBlock::Text {
text: format!("noise message {i}"),
cache_control: None,
}],
});
}
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Please finish task".to_string(),
cache_control: None,
}],
});
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let restarted = engine
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
.await;
assert!(restarted);
assert!(engine.session.messages.len() < before_len);
assert!(engine.session.messages.len() <= 2);
let records = load_last_k_capacity_records(&engine.session.id, 1).expect("load memory");
assert!(!records.is_empty());
assert!(!records[0].canonical_state.goal.is_empty());
}
/// v0.8.11: `CapacityControllerConfig::default()` ships with
/// `enabled = false`. The capacity controller's destructive
/// interventions (TargetedContextRefresh silently runs compaction;
/// VerifyAndReplan clears the session message log) silently rewrote
/// or nuked the user's transcript ("resetting plan" footer +
/// black-screen symptom). v0.8.11 commits to "trust the model with
/// the full 1M-token context, only compact on explicit user
/// /compact" — auto-managing the prefix contradicts that posture.
/// Power users can still opt in via `capacity.enabled = true`.
#[tokio::test]
async fn capacity_disabled_by_default_keeps_messages_intact() {
let _env_lock = CAPACITY_MEMORY_ENV_LOCK.lock().await;
let tmp = tempdir().expect("tempdir");
let _env = ScopedCapacityMemoryDir::set(tmp.path());
// Default config — what real users get.
let mut engine = build_engine_with_capacity(CapacityControllerConfig::default());
assert!(
!engine.config.capacity.enabled,
"capacity controller must be off by default in v0.8.11+"
);
engine.turn_counter = 6;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
for i in 0..10 {
engine.session.messages.push(Message {
role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(),
content: vec![ContentBlock::Text {
text: format!("noise message {i}"),
cache_control: None,
}],
});
}
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Please finish task".to_string(),
cache_control: None,
}],
});
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let restarted = engine
.run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
.await;
// Capacity is disabled → no replan, no message clear.
assert!(!restarted);
assert_eq!(engine.session.messages.len(), before_len);
}
#[tokio::test]
async fn controller_disabled_keeps_behavior_unchanged() {
let capacity = CapacityControllerConfig {
enabled: false,
..Default::default()
};
let mut engine = build_engine_with_capacity(capacity.clone());
engine.config.capacity = capacity.clone();
engine.capacity_controller = CapacityController::new(capacity);
engine.turn_counter = 3;
engine
.capacity_controller
.mark_turn_start(engine.turn_counter);
let long = "y".repeat(5_000);
for _ in 0..120 {
engine.session.messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: long.clone(),
cache_control: None,
}],
});
}
let before = engine.estimated_input_tokens();
let before_len = engine.session.messages.len();
let turn = TurnContext::new(10);
let applied = engine
.run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
.await;
let after = engine.estimated_input_tokens();
let after_len = engine.session.messages.len();
assert!(!applied);
assert_eq!(before, after);
assert_eq!(before_len, after_len);
}
#[test]
fn caller_policy_defaults_to_direct() {
let tool = Tool {
tool_type: None,
name: "read_file".to_string(),
description: "Read".to_string(),
input_schema: json!({"type":"object"}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
};
let direct = ToolCaller {
caller_type: "direct".to_string(),
tool_id: None,
};
let code = ToolCaller {
caller_type: "code_execution_20250825".to_string(),
tool_id: Some("srvtoolu_1".to_string()),
};
assert!(caller_allowed_for_tool(Some(&direct), Some(&tool)));
assert!(!caller_allowed_for_tool(Some(&code), Some(&tool)));
assert!(caller_allowed_for_tool(None, Some(&tool)));
}
#[test]
fn tool_search_activates_discovered_deferred_tools() {
let mut catalog = vec![
Tool {
tool_type: None,
name: "read_file".to_string(),
description: "Read files".to_string(),
input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(true),
input_examples: None,
strict: None,
cache_control: None,
},
Tool {
tool_type: None,
name: "grep_files".to_string(),
description: "Search files".to_string(),
input_schema: json!({"type":"object","properties":{"pattern":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(true),
input_examples: None,
strict: None,
cache_control: None,
},
];
let always_load = HashSet::new();
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
let mut active = initial_active_tools(&catalog);
let result = execute_tool_search(
TOOL_SEARCH_BM25_NAME,
&json!({"query":"read file"}),
&catalog,
&mut active,
)
.expect("search succeeds");
assert!(result.success);
assert!(active.contains("read_file"));
}
fn tool_search_catalog_with_matches(count: usize) -> Vec<Tool> {
let mut catalog = (0..count)
.map(|idx| Tool {
tool_type: None,
name: format!("matching_tool_{idx:03}"),
description: "Matching deferred test tool".to_string(),
input_schema: json!({"type":"object","properties":{"query":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(true),
input_examples: None,
strict: None,
cache_control: None,
})
.collect::<Vec<_>>();
let always_load = HashSet::new();
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
catalog
}
fn tool_search_reference_count(result: &ToolResult) -> usize {
result
.metadata
.as_ref()
.and_then(|metadata| metadata.get("tool_references"))
.and_then(|references| references.as_array())
.map_or(0, Vec::len)
}
#[test]
fn tool_search_defaults_to_twenty_results_for_regex_and_bm25() {
let catalog = tool_search_catalog_with_matches(25);
for tool_name in [TOOL_SEARCH_REGEX_NAME, TOOL_SEARCH_BM25_NAME] {
let mut active = initial_active_tools(&catalog);
let result = execute_tool_search(
tool_name,
&json!({"query":"matching"}),
&catalog,
&mut active,
)
.expect("search succeeds");
assert_eq!(tool_search_reference_count(&result), 20);
}
}
#[test]
fn tool_search_respects_and_caps_max_results() {
let catalog = tool_search_catalog_with_matches(120);
let mut active = initial_active_tools(&catalog);
let limited = execute_tool_search(
TOOL_SEARCH_BM25_NAME,
&json!({"query":"matching","max_results":7}),
&catalog,
&mut active,
)
.expect("search succeeds");
assert_eq!(tool_search_reference_count(&limited), 7);
let mut active = initial_active_tools(&catalog);
let capped = execute_tool_search(
TOOL_SEARCH_REGEX_NAME,
&json!({"query":"matching","max_results":999}),
&catalog,
&mut active,
)
.expect("search succeeds");
assert_eq!(tool_search_reference_count(&capped), 100);
}
#[test]
fn tool_search_schema_exposes_max_results_default_and_cap() {
let mut catalog = Vec::new();
let always_load = HashSet::new();
ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
for tool_name in [TOOL_SEARCH_REGEX_NAME, TOOL_SEARCH_BM25_NAME] {
let tool = catalog
.iter()
.find(|tool| tool.name == tool_name)
.expect("tool search definition exists");
let schema = &tool.input_schema["properties"]["max_results"];
assert_eq!(schema["default"], 20);
assert_eq!(schema["maximum"], 100);
assert_eq!(schema["minimum"], 1);
}
}
#[tokio::test]
async fn code_execution_runs_python_and_returns_result_payload() {
let tmp = tempdir().expect("tempdir");
let result =
execute_code_execution_tool(&json!({"code":"print('hello from code exec')"}), tmp.path())
.await
.expect("code execution should run");
assert!(result.content.contains("hello from code exec"));
assert!(result.content.contains("return_code"));
}
#[test]
fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() {
let mut plan_catalog = vec![api_tool("read_file")];
let always_load = HashSet::new();
ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan, &always_load);
assert!(
!plan_catalog
.iter()
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
"Plan mode must not expose code_execution"
);
let mut agent_catalog = vec![api_tool("read_file")];
ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent, &always_load);
assert!(
agent_catalog
.iter()
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
"Agent mode should still expose code_execution"
);
}
#[test]
fn deferred_tool_requests_are_auto_activated() {
use std::collections::HashSet;
let catalog = vec![Tool {
tool_type: None,
name: "exec_shell".to_string(),
description: "Run shell commands".to_string(),
input_schema: json!({"type":"object","properties":{"cmd":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(true),
input_examples: None,
strict: None,
cache_control: None,
}];
let mut active = HashSet::new();
assert!(!active.contains("exec_shell"));
assert!(maybe_activate_requested_deferred_tool(
"exec_shell",
&catalog,
&mut active
));
assert!(active.contains("exec_shell"));
}
#[test]
fn missing_tool_error_message_offers_suggestions() {
let catalog = vec![
Tool {
tool_type: None,
name: "read_file".to_string(),
description: "Read file contents".to_string(),
input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
},
Tool {
tool_type: None,
name: "grep_files".to_string(),
description: "Search file contents".to_string(),
input_schema: json!({"type":"object","properties":{"pattern":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
},
];
let message = missing_tool_error_message("reed_file", &catalog);
assert!(message.contains("Did you mean:"));
assert!(message.contains("read_file"));
assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}
#[test]
fn missing_tool_error_message_includes_discovery_guidance_when_no_match() {
let catalog = vec![Tool {
tool_type: None,
name: "read_file".to_string(),
description: "Read file contents".to_string(),
input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
allowed_callers: Some(vec!["direct".to_string()]),
defer_loading: Some(false),
input_examples: None,
strict: None,
cache_control: None,
}];
let message = missing_tool_error_message("totally_unknown_tool", &catalog);
assert!(message.contains("not available in the current tool catalog"));
assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}
#[test]
fn missing_shell_tool_error_message_names_allow_shell_gate() {
let catalog = vec![api_tool("read_file")];
for tool_name in [
"exec_shell",
"exec_shell_wait",
"exec_shell_interact",
"task_shell_start",
"task_shell_wait",
] {
let message = missing_tool_error_message(tool_name, &catalog);
assert!(message.contains("not available in the current tool catalog"));
assert!(
message.contains("allow_shell = false"),
"{tool_name}: {message}"
);
assert!(message.contains("allow_shell"), "{tool_name}: {message}");
assert!(
message.contains("/config allow_shell true"),
"{tool_name}: {message}"
);
assert!(message.contains("--save"), "{tool_name}: {message}");
assert!(message.contains("Agent mode"), "{tool_name}: {message}");
assert!(
message.contains("approval gating"),
"{tool_name}: {message}"
);
assert!(!message.contains("YOLO"), "{tool_name}: {message}");
assert!(!message.contains("auto-approve"), "{tool_name}: {message}");
assert!(
message.contains(TOOL_SEARCH_BM25_NAME),
"{tool_name}: {message}"
);
}
}
#[test]
fn missing_shell_tool_error_message_keeps_allow_shell_hint_with_suggestions() {
let catalog = vec![api_tool("exec")];
let message = missing_tool_error_message("exec_shell", &catalog);
assert!(message.contains("Did you mean:"));
assert!(message.contains("exec"));
assert!(message.contains("allow_shell = false"));
assert!(message.contains("allow_shell"));
assert!(message.contains("/config allow_shell true"));
assert!(message.contains("--save"));
assert!(message.contains("Agent mode"));
assert!(!message.contains("YOLO"));
assert!(!message.contains("auto-approve"));
assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}
#[test]
fn filter_tool_call_delta_strips_bracket_marker() {
let mut in_block = false;
let visible = filter_tool_call_delta(
"intro [TOOL_CALL]\n{\"tool\":\"x\"}\n[/TOOL_CALL] outro",
&mut in_block,
);
assert!(!in_block);
assert!(!visible.contains("[TOOL_CALL]"));
assert!(!visible.contains("[/TOOL_CALL]"));
assert!(!visible.contains("\"tool\":\"x\""));
assert!(visible.contains("intro"));
assert!(visible.contains("outro"));
}
#[test]
fn filter_tool_call_delta_strips_deepseek_xml_marker() {
let mut in_block = false;
let visible = filter_tool_call_delta(
"before <codewhale:tool_call name=\"x\">payload</codewhale:tool_call> after",
&mut in_block,
);
assert!(!in_block);
for marker in TOOL_CALL_START_MARKERS {
assert!(
!visible.contains(marker),
"visible text leaked start marker `{marker}`: {visible:?}"
);
}
assert!(visible.contains("before"));
assert!(visible.contains("after"));
}
#[test]
fn filter_tool_call_delta_strips_generic_tool_call_marker() {
let mut in_block = false;
let visible = filter_tool_call_delta(
"lead <tool_call>\n{\"name\":\"do\"}\n</tool_call> tail",
&mut in_block,
);
assert!(!in_block);
assert!(!visible.contains("<tool_call"));
assert!(!visible.contains("</tool_call>"));
assert!(visible.contains("lead"));
assert!(visible.contains("tail"));
}
#[test]
fn filter_tool_call_delta_strips_invoke_marker() {
let mut in_block = false;
let visible = filter_tool_call_delta(
"alpha <invoke name=\"x\"><parameter name=\"k\">v</parameter></invoke> beta",
&mut in_block,
);
assert!(!in_block);
assert!(!visible.contains("<invoke "));
assert!(!visible.contains("</invoke>"));
assert!(visible.contains("alpha"));
assert!(visible.contains("beta"));
}
#[test]
fn filter_tool_call_delta_strips_function_calls_marker() {
let mut in_block = false;
let visible = filter_tool_call_delta(
"head <function_calls>\n{\"name\":\"x\"}\n</function_calls> tail",
&mut in_block,
);
assert!(!in_block);
assert!(!visible.contains("<function_calls>"));
assert!(!visible.contains("</function_calls>"));
assert!(visible.contains("head"));
assert!(visible.contains("tail"));
}
#[test]
fn filter_tool_call_delta_handles_chunk_split_marker() {
let mut in_block = false;
// First chunk opens the wrapper but does not close it.
let visible_a = filter_tool_call_delta("hello <tool_call>partial", &mut in_block);
assert!(in_block, "filter must remember it is mid-wrapper");
assert_eq!(visible_a, "hello ");
// Second chunk continues inside the wrapper, then closes it and adds tail.
let visible_b = filter_tool_call_delta("payload</tool_call> tail", &mut in_block);
assert!(!in_block);
assert_eq!(visible_b, " tail");
}
#[test]
fn filter_tool_call_delta_unmatched_open_suppresses_remainder() {
let mut in_block = false;
let visible = filter_tool_call_delta("ok [TOOL_CALL]rest of stream", &mut in_block);
assert_eq!(visible, "ok ");
assert!(
in_block,
"unmatched open must leave filter in tool-call mode"
);
}
#[test]
fn filter_tool_call_delta_passes_through_clean_text() {
let mut in_block = false;
let input = "no markers here, just prose with code `<not a tag>`.";
let visible = filter_tool_call_delta(input, &mut in_block);
assert!(!in_block);
assert_eq!(visible, input);
}
#[test]
fn contains_fake_tool_wrapper_detects_each_marker() {
for marker in TOOL_CALL_START_MARKERS {
let needle = format!("noise {marker} more noise");
assert!(
contains_fake_tool_wrapper(&needle),
"marker `{marker}` should be detected"
);
}
}
#[test]
fn contains_fake_tool_wrapper_returns_false_on_clean_text() {
assert!(!contains_fake_tool_wrapper(
"plain assistant text without wrappers"
));
assert!(!contains_fake_tool_wrapper(
"`<tool` lookalike but not a real start marker"
));
}
#[test]
fn fake_wrapper_notice_is_compact_and_actionable() {
// Keep this short so it fits cleanly in a single status line.
assert!(FAKE_WRAPPER_NOTICE.len() < 120);
assert!(FAKE_WRAPPER_NOTICE.contains("API tool channel"));
}
// ---- final_tool_input: bug-class regression for "<command>" placeholder ----
//
// Background: a streamed tool block carries its `input` in two pieces — an
// initial value at `ContentBlockStart` (often `{}`), then `InputJsonDelta`
// chunks that build up `input_buffer`. The TUI used to fire `ToolCallStarted`
// from `ContentBlockStart` with the empty initial input and never re-emit
// once args were known, so cells rendered the literal text `<command>` /
// `<file>` placeholders. The fix relocates the emission to `ContentBlockStop`
// and routes the input through `final_tool_input`, which prefers the parsed
// buffer over a stale empty placeholder.
fn tool_state(initial: serde_json::Value, buffer: &str) -> ToolUseState {
ToolUseState {
id: "t1".into(),
name: "exec_shell".into(),
input: initial,
caller: None,
input_buffer: buffer.into(),
}
}
#[test]
fn final_tool_input_prefers_parsed_buffer_over_empty_initial() {
// The exact regression: ContentBlockStart delivered `{}`, then args
// streamed in via InputJsonDelta. The emitted ToolCallStarted must
// carry the parsed buffer, not the placeholder.
let state = tool_state(json!({}), r#"{"command": "ls -la"}"#);
assert_eq!(final_tool_input(&state), json!({"command": "ls -la"}));
}
#[test]
fn final_tool_input_falls_back_to_initial_when_buffer_empty() {
// Models occasionally embed args directly in the start frame and never
// send any InputJsonDelta. We must still report those args.
let state = tool_state(json!({"command": "echo hi"}), "");
assert_eq!(final_tool_input(&state), json!({"command": "echo hi"}));
}
#[test]
fn final_tool_input_repairs_unparseable_buffer() {
// The arg_repair module converts unparseable input to an empty object
// {} so dispatch always proceeds. The buffer wins over the initial input.
let state = tool_state(json!({"command": "echo hi"}), "{not json");
assert_eq!(final_tool_input(&state), json!({}));
}
// === #103 transparent stream-retry policy =====================================
#[test]
fn stream_retry_zero_content_then_error_is_transparently_retried() {
// Case 2 from issue #103: stream yielded ZERO content then errored.
// The decoder hit Err on the very first poll → engine should retry
// because DeepSeek hasn't billed and the user has seen nothing.
assert!(
super::should_transparently_retry_stream(false, 0, false),
"first attempt with no content must be eligible for transparent retry"
);
assert!(
super::should_transparently_retry_stream(false, 1, false),
"second attempt (one prior retry) with no content must still be eligible"
);
}
#[test]
fn stream_retry_after_content_received_surfaces_error() {
// Case 3 from issue #103: stream yielded content then errored. We must
// NOT transparently retry — the model has emitted billed output tokens
// and the UI has streamed deltas; resending would double-bill and the
// user would see the same prefix twice.
assert!(
!super::should_transparently_retry_stream(true, 0, false),
"any content received → no transparent retry, even with full budget"
);
assert!(
!super::should_transparently_retry_stream(true, 1, false),
"any content received → no transparent retry on subsequent attempts"
);
}
#[test]
fn stream_retry_budget_caps_transparent_retries_at_two() {
// Case 4 from issue #103: after MAX_TRANSPARENT_STREAM_RETRIES attempts
// we stop trying transparently and let the outer error path surface.
// (The outer per-turn `stream_retry_attempts` retry is a separate layer
// and is still in effect at the whole-turn level.)
assert!(
super::should_transparently_retry_stream(
false,
super::MAX_TRANSPARENT_STREAM_RETRIES - 1,
false,
),
"one short of the cap should still retry"
);
assert!(
!super::should_transparently_retry_stream(
false,
super::MAX_TRANSPARENT_STREAM_RETRIES,
false,
),
"at the cap, no further transparent retries"
);
assert!(
!super::should_transparently_retry_stream(
false,
super::MAX_TRANSPARENT_STREAM_RETRIES + 5,
false,
),
"well past the cap, definitely no transparent retries"
);
}
#[test]
fn stream_retry_respects_cancellation() {
// Cancellation overrides every other condition. If the user pressed
// Esc / Ctrl-C, do not silently re-issue the request behind their back.
assert!(
!super::should_transparently_retry_stream(false, 0, true),
"cancelled turn must not be transparently retried"
);
assert!(
!super::should_transparently_retry_stream(false, 1, true),
"cancelled turn must not be transparently retried even with budget"
);
}
// === #2990 sleep-resume policy ================================================
#[test]
fn sleep_gap_requires_wallclock_to_outrun_monotonic_clock() {
use std::time::Duration;
// No divergence: ordinary network failure, clocks agree.
assert!(
!super::sleep_gap_detected(Duration::from_secs(30), Duration::from_secs(30)),
"equal elapsed times must not register as a sleep gap"
);
// Divergence below the threshold: NTP slew / scheduling jitter.
assert!(
!super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(14)),
"9s of divergence is below the 10s threshold"
);
// Divergence above the threshold: the host was suspended.
assert!(
super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(16)),
"11s of divergence must register as a sleep gap"
);
// Wall clock went backwards (NTP step): saturating_sub → zero gap.
assert!(
!super::sleep_gap_detected(Duration::from_secs(60), Duration::from_secs(5)),
"wall clock behind monotonic must never register as a sleep gap"
);
}
#[test]
fn sleep_resume_retries_even_after_content_streamed() {
// The whole point of #2990: unlike the #103 transparent retry, a
// detected sleep gap retries regardless of streamed content — the
// partial output predates the sleep and the user was not watching.
assert!(
super::should_resume_after_sleep(true, 0, false),
"detected sleep with full budget must resume"
);
assert!(
super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES - 1, false),
"detected sleep one short of the budget must still resume"
);
}
#[test]
fn sleep_resume_requires_a_detected_gap() {
// Without a sleep gap this layer stays out of the way entirely, so the
// deliberate no-retry-after-content policy for ordinary flakes (#103)
// is preserved.
assert!(
!super::should_resume_after_sleep(false, 0, false),
"no sleep gap → never resume via this layer"
);
}
#[test]
fn sleep_resume_respects_budget_and_cancellation() {
assert!(
!super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES, false),
"budget exhausted → surface the failure instead of looping"
);
assert!(
!super::should_resume_after_sleep(true, 0, true),
"cancelled turn must not be resumed behind the user's back"
);
}
#[test]
fn stream_retry_threshold_relaxed_to_five() {
// Case 1+4 from issue #103: the consecutive-error threshold for marking
// the turn failed was relaxed from 3 → 5 in v0.6.7 because the new
// HTTP/2 keepalive defaults make spurious decode errors rarer.
// This test pins the constant so a future regression to 3 fails loudly.
assert_eq!(
super::MAX_STREAM_ERRORS_BEFORE_FAIL,
5,
"the consecutive-stream-error threshold should be 5; \
lowering it back to 3 will fail mid-turn under transient flakiness"
);
// And a regression guard on the transparent-retry cap.
assert_eq!(
super::MAX_TRANSPARENT_STREAM_RETRIES,
2,
"transparent-retry cap should be 2; raising it risks hammering the \
provider on real outages"
);
}
// === Issue #66: error taxonomy wired through engine + audit + capacity ===
/// A failed-tool audit entry must carry the typed `category` and `severity`
/// fields derived from the underlying `ToolError`. This is what makes
/// downstream tooling able to bucket failures without scraping the message
/// string.
#[test]
fn tool_failure_audit_payload_carries_category_and_severity() {
use crate::error_taxonomy::ErrorEnvelope;
use crate::tools::spec::ToolError;
let error = ToolError::Timeout { seconds: 30 };
let envelope: ErrorEnvelope = error.clone().into();
let payload = json!({
"event": "tool.result",
"tool_id": "tool-1",
"tool_name": "exec_shell",
"success": false,
"error": error.to_string(),
"category": envelope.category.to_string(),
"severity": envelope.severity.to_string(),
});
assert_eq!(payload["category"], "timeout");
assert_eq!(payload["severity"], "warning");
assert_eq!(payload["success"], false);
}
/// Capacity escalation sees `ErrorCategory::InvalidInput` as a context-overflow
/// signal that must escalate even on the first failure (no consecutive
/// requirement). The previous string-matching path scanned the message for
/// "context length" — categories give us a typed contract instead.
#[test]
fn capacity_escalation_treats_invalid_input_as_overflow_signal() {
use crate::error_taxonomy::ErrorCategory;
// Replays the categorization branches inside
// `run_capacity_error_escalation_checkpoint`. Keeping the assertions on
// the typed surface (slice of `ErrorCategory`) means this test fails
// loudly if a future refactor reverts to substring matching.
let categories: &[ErrorCategory] = &[ErrorCategory::InvalidInput];
let has_context_overflow = categories.contains(&ErrorCategory::InvalidInput);
assert!(has_context_overflow);
let only_transient = !categories.is_empty()
&& categories.iter().all(|c| {
matches!(
c,
ErrorCategory::Network | ErrorCategory::RateLimit | ErrorCategory::Timeout
)
});
assert!(!only_transient);
}
/// Transient categories (network / rate limit / timeout) must NOT escalate by
/// themselves — those resolve via the existing retry loop and shouldn't
/// trigger a capacity-driven replan.
#[test]
fn capacity_escalation_skips_pure_transient_categories() {
use crate::error_taxonomy::ErrorCategory;
let categories: &[ErrorCategory] = &[
ErrorCategory::Network,
ErrorCategory::RateLimit,
ErrorCategory::Timeout,
];
let has_context_overflow = categories.contains(&ErrorCategory::InvalidInput);
assert!(!has_context_overflow);
let only_transient = !categories.is_empty()
&& categories.iter().all(|c| {
matches!(
c,
ErrorCategory::Network | ErrorCategory::RateLimit | ErrorCategory::Timeout
)
});
assert!(only_transient);
}
// ── #136: post-edit LSP diagnostics hook ─────────────────────────────────
#[test]
fn edited_paths_for_edit_file_returns_path() {
let input = json!({ "path": "src/foo.rs", "search": "x", "replace": "y" });
let paths = edited_paths_for_tool("edit_file", &input);
assert_eq!(paths, vec![PathBuf::from("src/foo.rs")]);
}
#[test]
fn edited_paths_for_write_file_returns_path() {
let input = json!({ "path": "src/bar.rs", "content": "fn main() {}" });
let paths = edited_paths_for_tool("write_file", &input);
assert_eq!(paths, vec![PathBuf::from("src/bar.rs")]);
}
#[test]
fn edited_paths_for_apply_patch_with_changes_returns_each_path() {
let input = json!({
"changes": [
{ "path": "a.rs", "content": "" },
{ "path": "b.rs", "content": "" }
]
});
let paths = edited_paths_for_tool("apply_patch", &input);
assert_eq!(paths, vec![PathBuf::from("a.rs"), PathBuf::from("b.rs")]);
}
#[test]
fn edited_paths_for_apply_patch_with_diff_text_extracts_paths() {
let input = json!({
"patch": "--- a/foo.rs\n+++ b/foo.rs\n@@ -1 +1 @@\n-let x: i32 = 0;\n+let x: i32 = \"oops\";\n"
});
let paths = edited_paths_for_tool("apply_patch", &input);
assert_eq!(paths, vec![PathBuf::from("foo.rs")]);
}
#[test]
fn edited_paths_for_apply_patch_with_invalid_diff_returns_empty() {
let input = json!({
"patch": "@@ -1 +1 @@\n-old\n+new\n"
});
let paths = edited_paths_for_tool("apply_patch", &input);
assert!(paths.is_empty());
}
#[test]
fn edited_paths_for_unknown_tool_returns_empty() {
let input = json!({ "path": "irrelevant.rs" });
let paths = edited_paths_for_tool("read_file", &input);
assert!(paths.is_empty());
let paths = edited_paths_for_tool("grep_files", &input);
assert!(paths.is_empty());
}
#[test]
fn parse_patch_paths_skips_dev_null() {
let patch = "--- a/keep.rs\n+++ b/keep.rs\n@@ -1 +1 @@\n-old\n+new\n--- a/deleted.rs\n+++ /dev/null\n@@ -1 +0,0 @@\n-delete me\n";
let paths = edited_paths_for_tool("apply_patch", &json!({ "patch": patch }));
assert_eq!(paths, vec![PathBuf::from("keep.rs")]);
}
#[tokio::test]
async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
use crate::lsp::{Diagnostic, Language, Severity};
use std::sync::Arc;
let tmp = tempdir().expect("tempdir");
let workspace = tmp.path().to_path_buf();
let target = workspace.join("src").join("main.rs");
fs::create_dir_all(workspace.join("src")).unwrap();
fs::write(&target, "let x: i32 = \"not a number\";").unwrap();
let lsp_config = crate::lsp::LspConfig::default();
let engine_config = EngineConfig {
workspace: workspace.clone(),
lsp_config: Some(lsp_config),
..Default::default()
};
let (mut engine, _handle) = Engine::new(engine_config, &Config::default());
// Install a fake transport that always reports a type error.
let fake = Arc::new(crate::lsp::tests::FakeTransport::new(vec![Diagnostic {
line: 1,
column: 14,
severity: Severity::Error,
message: "expected i32, found &str".to_string(),
}]));
engine
.lsp_manager
.install_test_transport(Language::Rust, fake)
.await;
// Simulate the success path of an edit_file tool call.
let input = json!({ "path": "src/main.rs", "search": "0", "replace": "\"not a number\"" });
engine.run_post_edit_lsp_hook("edit_file", &input).await;
assert_eq!(engine.pending_lsp_blocks.len(), 1);
// Flush prepares the synthetic message.
let messages_before = engine.session.messages.len();
engine.flush_pending_lsp_diagnostics().await;
assert_eq!(engine.session.messages.len(), messages_before + 1);
let last = engine.session.messages.last().expect("message appended");
assert_eq!(last.role, "user");
// turn_meta is now at the tail of the content array (PR #2517).
let meta = match last.content.last() {
Some(crate::models::ContentBlock::Text { text, .. }) => text.clone(),
other => panic!("expected text block at tail, got {other:?}"),
};
assert!(meta.starts_with("<turn_meta>\n"));
let diagnostic_text = last
.content
.iter()
.find_map(|block| match block {
crate::models::ContentBlock::Text { text, .. }
if text.contains("<diagnostics file=\"") =>
{
Some(text)
}
_ => None,
})
.expect("diagnostics text block");
assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str"));
}
#[tokio::test]
async fn post_edit_hook_is_silent_when_lsp_disabled() {
let tmp = tempdir().expect("tempdir");
let workspace = tmp.path().to_path_buf();
let target = workspace.join("src").join("main.rs");
fs::create_dir_all(workspace.join("src")).unwrap();
fs::write(&target, "fn main() {}").unwrap();
let lsp_config = crate::lsp::LspConfig {
enabled: false,
..Default::default()
};
let engine_config = EngineConfig {
workspace: workspace.clone(),
lsp_config: Some(lsp_config),
..Default::default()
};
let (mut engine, _handle) = Engine::new(engine_config, &Config::default());
let input = json!({ "path": "src/main.rs", "search": "x", "replace": "y" });
engine.run_post_edit_lsp_hook("edit_file", &input).await;
assert!(engine.pending_lsp_blocks.is_empty());
let messages_before = engine.session.messages.len();
engine.flush_pending_lsp_diagnostics().await;
assert_eq!(engine.session.messages.len(), messages_before);
}
#[tokio::test]
async fn post_edit_hook_skips_unknown_tool_names() {
use crate::lsp::{Diagnostic, Language, Severity};
use std::sync::Arc;
let tmp = tempdir().expect("tempdir");
let engine_config = EngineConfig {
workspace: tmp.path().to_path_buf(),
lsp_config: Some(crate::lsp::LspConfig::default()),
..Default::default()
};
let (mut engine, _handle) = Engine::new(engine_config, &Config::default());
let fake = Arc::new(crate::lsp::tests::FakeTransport::new(vec![Diagnostic {
line: 1,
column: 1,
severity: Severity::Error,
message: "should not be reported".to_string(),
}]));
engine
.lsp_manager
.install_test_transport(Language::Rust, fake.clone())
.await;
let input = json!({ "path": "src/main.rs" });
engine.run_post_edit_lsp_hook("read_file", &input).await;
assert!(engine.pending_lsp_blocks.is_empty());
assert_eq!(fake.call_count(), 0);
}