codewhale/crates/tui/src/core/engine/tests.rs

use super::*;

use super::context::TURN_MAX_OUTPUT_TOKENS;
use crate::models::SystemBlock;
use crate::test_support::lock_test_env;
use crate::tools::plan::{PlanItemArg, PlanSnapshot, StepStatus};
use crate::tools::spec::ToolCapability;
use serde_json::json;
use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::LazyLock;
use std::time::Instant;
use tempfile::tempdir;

const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
static CAPACITY_MEMORY_ENV_LOCK: LazyLock<tokio::sync::Mutex<()>> =
    LazyLock::new(|| tokio::sync::Mutex::new(()));

struct ScopedCapacityMemoryDir {
    previous: Option<OsString>,
}

impl ScopedCapacityMemoryDir {
    fn set(path: &Path) -> Self {
        let previous = std::env::var_os("DEEPSEEK_CAPACITY_MEMORY_DIR");
        // Safety: capacity-memory tests serialize access with CAPACITY_MEMORY_ENV_LOCK
        // and restore the original value in Drop.
        unsafe {
            std::env::set_var("DEEPSEEK_CAPACITY_MEMORY_DIR", path);
        }
        Self { previous }
    }
}

impl Drop for ScopedCapacityMemoryDir {
    fn drop(&mut self) {
        // Safety: capacity-memory tests serialize access with CAPACITY_MEMORY_ENV_LOCK.
        unsafe {
            if let Some(previous) = self.previous.take() {
                std::env::set_var("DEEPSEEK_CAPACITY_MEMORY_DIR", previous);
            } else {
                std::env::remove_var("DEEPSEEK_CAPACITY_MEMORY_DIR");
            }
        }
    }
}

struct ScopedDeepSeekApiKey {
    previous: Option<OsString>,
}

impl ScopedDeepSeekApiKey {
    fn set(value: &str) -> Self {
        let previous = std::env::var_os("DEEPSEEK_API_KEY");
        // Safety: tests using this helper serialize with lock_test_env() and
        // restore the original value in Drop.
        unsafe {
            std::env::set_var("DEEPSEEK_API_KEY", value);
        }
        Self { previous }
    }
}

impl Drop for ScopedDeepSeekApiKey {
    fn drop(&mut self) {
        // Safety: tests using this helper serialize with lock_test_env().
        unsafe {
            if let Some(previous) = self.previous.take() {
                std::env::set_var("DEEPSEEK_API_KEY", previous);
            } else {
                std::env::remove_var("DEEPSEEK_API_KEY");
            }
        }
    }
}

fn build_engine_with_capacity(capacity: CapacityControllerConfig) -> Engine {
    let engine_config = EngineConfig {
        capacity,
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(engine_config, &Config::default());
    engine
}

fn catalog_tool(name: &str) -> Tool {
    Tool {
        tool_type: None,
        name: name.to_string(),
        description: String::new(),
        input_schema: json!({"type": "object"}),
        allowed_callers: None,
        defer_loading: None,
        input_examples: None,
        strict: None,
        cache_control: None,
    }
}

#[test]
fn tool_catalog_filter_applies_allow_and_deny_gates() {
    // #3027 AC1: the advertised catalog must not contain tools the execution
    // gates would deny; deny wins over allow.
    let mut catalog = vec![
        catalog_tool("read_file"),
        catalog_tool("exec_shell"),
        catalog_tool("grep_files"),
    ];
    filter_tool_catalog_for_gates(
        &mut catalog,
        Some(&["read_file".to_string(), "exec_shell".to_string()][..]),
        Some(&["exec_shell".to_string()][..]),
    );
    let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect();
    assert_eq!(names, ["read_file"]);
}

#[test]
fn tool_catalog_filter_is_inert_without_gates() {
    let mut catalog = vec![catalog_tool("read_file"), catalog_tool("exec_shell")];
    filter_tool_catalog_for_gates(&mut catalog, None, None);
    assert_eq!(catalog.len(), 2);
}

#[test]
fn structured_state_block_includes_rich_plan_artifact() {
    let state = StructuredState {
        mode_label: "Plan".to_string(),
        workspace: PathBuf::from("/workspace/codewhale"),
        cwd: None,
        working_set_summary: None,
        todo_snapshot: None,
        plan_snapshot: Some(PlanSnapshot {
            objective: Some("Make Plan mode reviewable".to_string()),
            context_summary: Some("Grounded in issue #2691".to_string()),
            sources_used: vec!["gh issue view 2691".to_string()],
            critical_files: vec!["crates/tui/src/tools/plan.rs".to_string()],
            constraints: vec!["Preserve legacy payloads".to_string()],
            recommended_approach: Some("Enrich update_plan".to_string()),
            verification_plan: Some("Run focused tests".to_string()),
            risks_and_unknowns: Some("Replay may drift".to_string()),
            handoff_packet: Some("Next agent should inspect replay".to_string()),
            items: vec![PlanItemArg {
                step: "Render rich artifact".to_string(),
                status: StepStatus::InProgress,
            }],
            ..PlanSnapshot::default()
        }),
        subagent_snapshots: Vec::new(),
    };

    let block = state.to_system_block().expect("fork state block");

    assert!(block.contains("Objective: Make Plan mode reviewable"));
    assert!(block.contains("Context: Grounded in issue #2691"));
    assert!(block.contains("Source: gh issue view 2691"));
    assert!(block.contains("Critical file: crates/tui/src/tools/plan.rs"));
    assert!(block.contains("Constraint: Preserve legacy payloads"));
    assert!(block.contains("Verification plan: Run focused tests"));
    assert!(block.contains("Handoff packet: Next agent should inspect replay"));
    assert!(block.contains("- [~] Render rich artifact"));
}

#[test]
fn env_only_auth_error_gets_recovery_hint() {
    let _guard = lock_test_env();
    let _env = ScopedDeepSeekApiKey::set("stale-env-key");
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());

    let message =
        engine.decorate_auth_error_message("Authentication failed: invalid API key".to_string());

    assert!(message.contains("DEEPSEEK_API_KEY"));
    assert!(message.contains("no saved config key is present"));
    assert!(message.contains("codewhale auth status"));
    assert!(message.contains("codewhale auth set --provider deepseek"));
}

#[test]
fn config_auth_error_does_not_blame_env() {
    let _guard = lock_test_env();
    let _env = ScopedDeepSeekApiKey::set("stale-env-key");
    let cfg = Config {
        api_key: Some("fresh-config-key".to_string()),
        ..Config::default()
    };
    let (engine, _handle) = Engine::new(EngineConfig::default(), &cfg);

    let message =
        engine.decorate_auth_error_message("Authentication failed: invalid API key".to_string());

    assert_eq!(message, "Authentication failed: invalid API key");
}

#[test]
fn plugin_tools_dir_honors_missing_custom_directory_without_fallback() {
    let missing = PathBuf::from("definitely-missing-codewhale-plugin-dir");
    let tools_config = crate::config::ToolsConfig {
        plugin_dir: Some(missing.to_string_lossy().to_string()),
        ..Default::default()
    };

    assert_eq!(plugin_tools_dir(Some(&tools_config)), missing);
}

#[test]
fn configure_plugin_tools_applies_overrides_after_discovered_plugins() {
    let tmp = tempdir().expect("tempdir");
    let plugin_dir = tmp.path().join("tools");
    fs::create_dir(&plugin_dir).expect("plugin dir");
    fs::write(
        plugin_dir.join("same-name.sh"),
        "# name: same_tool\n# description: discovered plugin\n",
    )
    .expect("plugin script");

    let mut overrides = HashMap::new();
    overrides.insert(
        "same_tool".to_string(),
        crate::config::ToolOverride::Command {
            command: "configured-command".to_string(),
            args: None,
        },
    );
    let tools_config = crate::config::ToolsConfig {
        plugin_dir: Some(plugin_dir.to_string_lossy().to_string()),
        overrides: Some(overrides),
        ..Default::default()
    };

    let ctx = crate::tools::ToolContext::new(tmp.path().to_path_buf());
    let mut registry = crate::tools::ToolRegistry::new(ctx);

    let plugin_names = configure_plugin_tools(&mut registry, Some(&tools_config));

    let tool = registry.get("same_tool").expect("same_tool registered");
    assert!(tool.description().contains("configured-command"));
    assert!(plugin_names.contains("same_tool"));
}

fn make_plan(
    read_only: bool,
    supports_parallel: bool,
    approval_required: bool,
    interactive: bool,
) -> ToolExecutionPlan {
    make_plan_at(
        0,
        read_only,
        supports_parallel,
        approval_required,
        interactive,
    )
}

fn make_plan_at(
    index: usize,
    read_only: bool,
    supports_parallel: bool,
    approval_required: bool,
    interactive: bool,
) -> ToolExecutionPlan {
    ToolExecutionPlan {
        index,
        id: format!("tool-{index}"),
        name: "grep_files".to_string(),
        input: json!({"pattern": "test"}),
        caller: None,
        interactive,
        approval_required,
        approval_description: "desc".to_string(),
        supports_parallel,
        read_only,
        blocked_error: None,
        guard_result: None,
    }
}

fn api_tool(name: &str) -> Tool {
    Tool {
        tool_type: Some("function".to_string()),
        name: name.to_string(),
        description: format!("Test tool {name}"),
        input_schema: json!({"type": "object"}),
        allowed_callers: Some(vec!["direct".to_string()]),
        defer_loading: None,
        input_examples: None,
        strict: None,
        cache_control: None,
    }
}

#[test]
fn engine_handle_cancel_tracks_latest_turn_token() {
    let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
    let stale_token = engine.cancel_token.clone();

    engine.reset_cancel_token();
    handle.cancel();

    assert!(engine.cancel_token.is_cancelled());
    assert!(handle.is_cancelled());
    assert!(!stale_token.is_cancelled());
}

#[test]
fn engine_initial_prompt_includes_configured_goal() {
    let config = EngineConfig {
        goal_objective: Some("Fix goal handoff".to_string()),
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());
    let prompt = match engine.session.system_prompt {
        Some(SystemPrompt::Text(text)) => text,
        Some(SystemPrompt::Blocks(blocks)) => blocks
            .into_iter()
            .map(|block| block.text)
            .collect::<Vec<_>>()
            .join("\n"),
        None => panic!("expected system prompt"),
    };

    assert!(prompt.contains("<session_goal>"));
    assert!(prompt.contains("Fix goal handoff"));
    assert!(
        engine
            .config
            .goal_state
            .lock()
            .expect("goal lock")
            .is_active()
    );
}

#[test]
fn refresh_system_prompt_uses_runtime_goal_state() {
    let (mut engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    {
        let mut goal = engine.config.goal_state.lock().expect("goal lock");
        goal.create("Close the runtime goal loop".to_string(), None);
    }

    engine.refresh_system_prompt();
    let prompt = match engine.session.system_prompt {
        Some(SystemPrompt::Text(text)) => text,
        Some(SystemPrompt::Blocks(blocks)) => blocks
            .into_iter()
            .map(|block| block.text)
            .collect::<Vec<_>>()
            .join("\n"),
        None => panic!("expected system prompt"),
    };

    assert!(prompt.contains("<session_goal>"));
    assert!(prompt.contains("Close the runtime goal loop"));
}

#[test]
fn parallel_batch_requires_read_only_parallel_tools() {
    let plans = vec![make_plan(true, true, false, false)];
    assert!(should_parallelize_tool_batch(&plans));

    let plans = vec![
        make_plan(true, true, false, false),
        make_plan(true, true, false, false),
    ];
    assert!(should_parallelize_tool_batch(&plans));

    let plans = vec![make_plan(false, true, false, false)];
    assert!(!should_parallelize_tool_batch(&plans));

    let plans = vec![make_plan(true, false, false, false)];
    assert!(!should_parallelize_tool_batch(&plans));

    let plans = vec![make_plan(true, true, true, false)];
    assert!(!should_parallelize_tool_batch(&plans));

    let plans = vec![make_plan(true, true, false, true)];
    assert!(!should_parallelize_tool_batch(&plans));
}

#[test]
fn tool_execution_batches_use_serial_barriers() {
    let batches = plan_tool_execution_batches(vec![
        make_plan_at(0, true, true, false, false),
        make_plan_at(1, true, true, false, false),
        make_plan_at(2, false, false, true, false),
        make_plan_at(3, true, true, false, false),
        make_plan_at(4, true, false, false, false),
        make_plan_at(5, true, true, false, false),
        make_plan_at(6, true, true, false, false),
    ]);

    assert_eq!(batches.len(), 5);

    match &batches[0] {
        ToolExecutionBatch::Parallel(plans) => {
            assert_eq!(
                plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
                vec![0, 1]
            );
        }
        ToolExecutionBatch::Serial(_) => panic!("first batch should be parallel"),
    }
    match &batches[1] {
        ToolExecutionBatch::Serial(plan) => assert_eq!(plan.index, 2),
        ToolExecutionBatch::Parallel(_) => panic!("second batch should be serial"),
    }
    match &batches[2] {
        ToolExecutionBatch::Parallel(plans) => {
            assert_eq!(
                plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
                vec![3]
            );
        }
        ToolExecutionBatch::Serial(_) => panic!("third batch should be parallel"),
    }
    match &batches[3] {
        ToolExecutionBatch::Serial(plan) => assert_eq!(plan.index, 4),
        ToolExecutionBatch::Parallel(_) => panic!("fourth batch should be serial"),
    }
    match &batches[4] {
        ToolExecutionBatch::Parallel(plans) => {
            assert_eq!(
                plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
                vec![5, 6]
            );
        }
        ToolExecutionBatch::Serial(_) => panic!("fifth batch should be parallel"),
    }
}

#[test]
fn successful_update_plan_ends_plan_mode_turn_immediately() {
    assert!(should_stop_after_plan_tool(
        AppMode::Plan,
        "update_plan",
        &Ok(ToolResult::success("planned"))
    ));
    assert!(!should_stop_after_plan_tool(
        AppMode::Agent,
        "update_plan",
        &Ok(ToolResult::success("planned"))
    ));
    assert!(!should_stop_after_plan_tool(
        AppMode::Plan,
        "request_user_input",
        &Ok(ToolResult::success("input"))
    ));
    assert!(!should_stop_after_plan_tool(
        AppMode::Plan,
        "update_plan",
        &Err(ToolError::execution_failed("failed".to_string()))
    ));
}

#[test]
fn quick_plan_requests_force_update_plan_on_first_step() {
    assert!(should_force_update_plan_first(
        AppMode::Plan,
        "Give me a quick 3-step plan to verify the UI changes."
    ));
    assert!(should_force_update_plan_first(
        AppMode::Plan,
        "Make a high-level plan for the footer work."
    ));
    assert!(!should_force_update_plan_first(
        AppMode::Plan,
        "Inspect the repo and then give me a quick plan."
    ));
    assert!(!should_force_update_plan_first(
        AppMode::Agent,
        "Give me a quick 3-step plan."
    ));
}

#[test]
fn quick_plan_turn_can_narrow_first_step_tools_to_update_plan() {
    let catalog = vec![
        Tool {
            tool_type: Some("function".to_string()),
            name: "read_file".to_string(),
            description: "Read a file".to_string(),
            input_schema: json!({"type": "object"}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(false),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
        Tool {
            tool_type: Some("function".to_string()),
            name: "update_plan".to_string(),
            description: "Publish a plan".to_string(),
            input_schema: json!({"type": "object"}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(false),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
    ];
    let active = initial_active_tools(&catalog);

    let forced = active_tools_for_step(&catalog, &active, true);
    assert_eq!(forced.len(), 1);
    assert_eq!(forced[0].name, "update_plan");

    let default = active_tools_for_step(&catalog, &active, false);
    assert_eq!(default.len(), 2);
}

#[test]
fn tool_error_messages_include_actionable_hints() {
    let path_error = ToolError::path_escape(PathBuf::from("../escape.txt"));
    let formatted = format_tool_error(&path_error, "read_file");
    assert!(formatted.contains("escapes workspace"));

    let missing_field = ToolError::missing_field("path");
    let formatted = format_tool_error(&missing_field, "read_file");
    assert!(formatted.contains("missing required field"));

    let timeout = ToolError::Timeout { seconds: 5 };
    let formatted = format_tool_error(&timeout, "exec_shell");
    assert!(formatted.contains("timed out"));

    // #3020: Plan-mode denials already explain the fix — pass through
    // verbatim, with no conflicting "Adjust approval mode" suffix.
    let plan_denied = ToolError::permission_denied(
        "'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code.",
    );
    let formatted = format_tool_error(&plan_denied, "exec_shell");
    assert_eq!(
        formatted,
        "'exec_shell' is not available in Plan mode — switch to Agent, Goal, or YOLO mode to run commands and code."
    );

    // Bare denials still get the actionable suffix.
    let bare_denied = ToolError::permission_denied("nope");
    let formatted = format_tool_error(&bare_denied, "exec_shell");
    assert!(
        formatted.contains("Adjust approval mode or request permission"),
        "{formatted}"
    );

    // "model" must not satisfy the "mode" pass-through check.
    let model_denied = ToolError::permission_denied("requested model is not allowed");
    let formatted = format_tool_error(&model_denied, "agent_open");
    assert!(
        formatted.contains("Adjust approval mode or request permission"),
        "{formatted}"
    );
}

#[test]
fn tool_exec_outcome_tracks_duration() {
    let outcome = ToolExecOutcome {
        index: 0,
        id: "tool-1".to_string(),
        name: "grep_files".to_string(),
        input: json!({"pattern": "test"}),
        started_at: Instant::now(),
        result: Ok(ToolResult::success("ok")),
    };

    assert!(outcome.started_at.elapsed().as_nanos() > 0);
}

#[test]
fn core_native_tools_stay_loaded_in_yolo_mode() {
    let always_load = HashSet::new();
    assert!(!should_default_defer_tool("exec_shell", &always_load));
    // git_blame remains deferred (read-only git history beyond log/show/diff).
    assert!(should_default_defer_tool("git_blame", &always_load));
}

#[test]
fn non_yolo_mode_retains_default_defer_policy() {
    let always_load = HashSet::new();
    assert!(!should_default_defer_tool("exec_shell", &always_load));
    assert!(!should_default_defer_tool("edit_file", &always_load));
    assert!(!should_default_defer_tool("apply_patch", &always_load));
    assert!(!should_default_defer_tool("fetch_url", &always_load));
    assert!(!should_default_defer_tool("git_diff", &always_load));
    // #2654: read-only git history joins the active set.
    assert!(!should_default_defer_tool("git_log", &always_load));
    assert!(!should_default_defer_tool("git_show", &always_load));
    assert!(!should_default_defer_tool("git_status", &always_load));
    assert!(!should_default_defer_tool("run_tests", &always_load));
    assert!(!should_default_defer_tool("agent_open", &always_load));
    // #2605: the fetch/close side of the sub-agent surface must also stay
    // active so a first `agent_eval`/`agent_close` executes instead of
    // hydrating its schema and forcing a double-invoke.
    assert!(!should_default_defer_tool("agent_eval", &always_load));
    assert!(!should_default_defer_tool("agent_close", &always_load));
    assert!(!should_default_defer_tool("read_file", &always_load));
    assert!(!should_default_defer_tool("web_search", &always_load));
    assert!(!should_default_defer_tool("write_file", &always_load));
    assert!(!should_default_defer_tool("task_shell_start", &always_load));
    assert!(!should_default_defer_tool("task_shell_wait", &always_load));
    assert!(should_default_defer_tool("git_blame", &always_load));
}

#[test]
fn model_tool_catalog_applies_native_and_mcp_deferral() {
    let always_load = HashSet::new();
    let catalog = build_model_tool_catalog(
        vec![
            api_tool("read_file"),
            api_tool("write_file"),
            api_tool("exec_shell"),
            api_tool("edit_file"),
            api_tool("project_map"),
        ],
        vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")],
        AppMode::Agent,
        &always_load,
    );

    let defer_loading = |name: &str| {
        catalog
            .iter()
            .find(|tool| tool.name == name)
            .and_then(|tool| tool.defer_loading)
    };

    assert_eq!(defer_loading("read_file"), Some(false));
    assert_eq!(defer_loading("write_file"), Some(false));
    assert_eq!(defer_loading("exec_shell"), Some(false));
    assert_eq!(defer_loading("edit_file"), Some(false));
    assert_eq!(defer_loading("project_map"), Some(true));
    assert_eq!(defer_loading("list_mcp_resources"), Some(false));
    assert_eq!(defer_loading("mcp_server_write"), Some(true));
}

#[test]
fn arcee_provider_policy_defers_risky_tools_keeps_read_only_and_tool_search() {
    let always_load = HashSet::new();
    let mut catalog = vec![
        api_tool("read_file"),
        api_tool("list_dir"),
        api_tool("git_status"),
        api_tool("git_diff"),
        api_tool("grep_files"),
        api_tool("file_search"),
        api_tool("update_plan"),
        api_tool("checklist_write"),
        api_tool("exec_shell"),
        api_tool("apply_patch"),
        api_tool("write_file"),
        api_tool("edit_file"),
        api_tool("fetch_url"),
        api_tool("web_search"),
        api_tool("tool_search_tool_regex"),
        api_tool("tool_search_tool_bm25"),
    ];

    apply_provider_tool_policy(&mut catalog, ApiProvider::Arcee, &always_load);

    let defer = |name: &str| {
        catalog
            .iter()
            .find(|tool| tool.name == name)
            .and_then(|tool| tool.defer_loading)
    };

    // Benign read-only first-turn set stays active so the opening Arcee
    // request clears Cloudflare's WAF.
    for active in [
        "read_file",
        "list_dir",
        "git_status",
        "git_diff",
        "grep_files",
        "file_search",
        "update_plan",
        "checklist_write",
    ] {
        assert_eq!(defer(active), Some(false), "{active} should stay active");
    }
    // Tool-search stays active so the deferred tail remains discoverable.
    assert_eq!(defer("tool_search_tool_regex"), Some(false));
    assert_eq!(defer("tool_search_tool_bm25"), Some(false));
    // WAF-risky / mutating tools are deferred on the first Arcee turn.
    for deferred in [
        "exec_shell",
        "apply_patch",
        "write_file",
        "edit_file",
        "fetch_url",
        "web_search",
    ] {
        assert_eq!(defer(deferred), Some(true), "{deferred} should be deferred");
    }

    let active = initial_active_tools(&catalog);
    assert!(active.contains("read_file"));
    assert!(active.contains("tool_search_tool_regex"));
    assert!(!active.contains("exec_shell"));
    assert!(!active.contains("apply_patch"));
}

#[test]
fn provider_tool_policy_is_noop_for_non_waf_providers() {
    let always_load = HashSet::new();
    let mut catalog = vec![api_tool("exec_shell"), api_tool("read_file")];

    // DeepSeek has no reduced first-turn surface: the policy must leave the
    // default deferral flags untouched (here: still unset).
    apply_provider_tool_policy(&mut catalog, ApiProvider::Deepseek, &always_load);

    assert!(catalog.iter().all(|tool| tool.defer_loading.is_none()));
}

#[test]
fn arcee_provider_policy_honors_always_load_override() {
    let mut always_load = HashSet::new();
    always_load.insert("exec_shell".to_string());
    let mut catalog = vec![api_tool("exec_shell"), api_tool("apply_patch")];

    apply_provider_tool_policy(&mut catalog, ApiProvider::Arcee, &always_load);

    let defer = |name: &str| {
        catalog
            .iter()
            .find(|tool| tool.name == name)
            .and_then(|tool| tool.defer_loading)
    };
    // A user-pinned always_load tool stays active even on Arcee.
    assert_eq!(defer("exec_shell"), Some(false));
    // Other risky tools remain deferred.
    assert_eq!(defer("apply_patch"), Some(true));
}

#[test]
fn agent_catalog_keeps_edit_file_loaded_when_fuzz_is_omitted() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    let registry = engine
        .build_turn_tool_registry_builder(
            AppMode::Agent,
            engine.config.todos.clone(),
            engine.config.plan_state.clone(),
        )
        .build(engine.build_tool_context(AppMode::Agent, false));
    let always_load = HashSet::new();
    let catalog = build_model_tool_catalog(
        registry.to_api_tools_with_cache(true),
        vec![],
        AppMode::Agent,
        &always_load,
    );
    let edit = catalog
        .iter()
        .find(|tool| tool.name == "edit_file")
        .expect("edit_file registered");

    assert_eq!(edit.defer_loading, Some(false));
    let required = edit.input_schema["required"]
        .as_array()
        .expect("edit_file schema should include required fields");
    assert!(required.iter().any(|field| field.as_str() == Some("path")));
    assert!(
        required
            .iter()
            .any(|field| field.as_str() == Some("search"))
    );
    assert!(
        required
            .iter()
            .any(|field| field.as_str() == Some("replace"))
    );
    assert!(!required.iter().any(|field| field.as_str() == Some("fuzz")));
    assert_eq!(
        edit.input_schema["properties"]["fuzz"]["type"].as_str(),
        Some("boolean")
    );

    let active_at_batch_start = initial_active_tools(&catalog);
    assert!(active_at_batch_start.contains("edit_file"));
    let mut hydrated_this_batch = HashSet::new();
    assert!(
        maybe_hydrate_requested_deferred_tool(
            "edit_file",
            &json!({
                "path": "src/foo.rs",
                "search": "before",
                "replace": "after"
            }),
            &catalog,
            &active_at_batch_start,
            &mut hydrated_this_batch,
        )
        .is_none(),
        "loaded edit_file calls without fuzz should execute instead of hydrating the schema"
    );
    assert!(hydrated_this_batch.is_empty());
}

#[test]
fn tools_always_load_overrides_default_native_deferral() {
    let always_load = HashSet::from(["git_blame".to_string()]);
    assert!(!should_default_defer_tool("git_blame", &always_load));
}

#[test]
#[ignore = "one-shot metric for scripts/measure-tool-catalog.py"]
#[allow(clippy::print_stderr)]
fn print_agent_tool_catalog_metrics() {
    let tmp = tempdir().expect("tempdir");
    let context = crate::tools::ToolContext::new(tmp.path().to_path_buf());
    let client = DeepSeekClient::new(&Config {
        api_key: Some("test-key".to_string()),
        ..Config::default()
    })
    .expect("stub client");
    let manager = crate::tools::subagent::new_shared_subagent_manager(tmp.path().to_path_buf(), 8);
    let runtime = crate::tools::subagent::SubAgentRuntime::new(
        client,
        DEFAULT_TEXT_MODEL.to_string(),
        context.clone(),
        true,
        None,
        manager.clone(),
    );
    let registry = crate::tools::ToolRegistryBuilder::new()
        .with_agent_tools(true)
        .with_todo_tool(new_shared_todo_list())
        .with_plan_tool(new_shared_plan_state())
        .with_review_tool(None, DEFAULT_TEXT_MODEL.to_string())
        .with_rlm_tool(None, DEFAULT_TEXT_MODEL.to_string())
        .with_notify_tool()
        .with_subagent_tools(manager, runtime)
        .build(context);
    let baseline_catalog = registry.to_api_tools_with_cache(true);
    let baseline_json = serde_json::to_vec(&baseline_catalog).expect("serialize baseline");

    let always_load = HashSet::new();
    let mut catalog = build_model_tool_catalog(
        baseline_catalog.clone(),
        vec![],
        AppMode::Agent,
        &always_load,
    );
    ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
    let active = initial_active_tools(&catalog);
    let active_catalog = active_tools_for_step(&catalog, &active, false);
    let active_json = serde_json::to_vec(&active_catalog).expect("serialize active");
    let reduction_percent = if baseline_json.is_empty() {
        0.0
    } else {
        100.0 * (baseline_json.len().saturating_sub(active_json.len())) as f64
            / baseline_json.len() as f64
    };

    eprintln!(
        "TOOL_CATALOG_METRICS {}",
        serde_json::json!({
            "baseline_tools": baseline_catalog.len(),
            "baseline_bytes": baseline_json.len(),
            "baseline_tokens_est": baseline_json.len().div_ceil(4),
            "active_tools": active_catalog.len(),
            "active_bytes": active_json.len(),
            "active_tokens_est": active_json.len().div_ceil(4),
            "reduction_percent": reduction_percent,
            "active_tool_names": active_catalog.iter().map(|tool| tool.name.as_str()).collect::<Vec<_>>(),
        })
    );
}

#[test]
fn deferred_edit_file_first_use_hydrates_schema_without_execution() {
    let mut edit = api_tool("edit_file");
    edit.defer_loading = Some(true);
    edit.input_schema = json!({
        "type": "object",
        "properties": {
            "path": { "type": "string" },
            "search": { "type": "string" },
            "replace": { "type": "string" }
        },
        "required": ["path", "search", "replace"]
    });

    let catalog = vec![edit];
    let active_at_batch_start = HashSet::new();
    let mut hydrated_this_batch = HashSet::new();
    let result = maybe_hydrate_requested_deferred_tool(
        "edit_file",
        &json!({
            "path": "src/foo.rs",
            "old_string": "before",
            "new_string": "after"
        }),
        &catalog,
        &active_at_batch_start,
        &mut hydrated_this_batch,
    )
    .expect("first deferred use should hydrate");

    assert!(!active_at_batch_start.contains("edit_file"));
    assert!(hydrated_this_batch.contains("edit_file"));
    assert!(result.success);
    assert!(result.content.contains("Tool `edit_file` was deferred"));
    assert!(result.content.contains("path: string"));
    assert!(result.content.contains("search: string"));
    assert!(result.content.contains("replace: string"));
    assert!(result.content.contains("old_string -> search"));
    assert!(result.content.contains("new_string -> replace"));
    assert!(result.content.contains("The tool was not executed"));

    let metadata = result.metadata.expect("metadata");
    assert_eq!(metadata["event"], "tool.schema_hydrated");
    assert_eq!(metadata["executed"], false);
    assert_eq!(metadata["retry_required"], true);

    let second_result = maybe_hydrate_requested_deferred_tool(
        "edit_file",
        &json!({"path": "src/bar.rs", "old_string": "before", "new_string": "after"}),
        &catalog,
        &active_at_batch_start,
        &mut hydrated_this_batch,
    )
    .expect("later calls in the same batch should hydrate instead of executing");
    assert_eq!(second_result.metadata.unwrap()["executed"], false);
    assert_eq!(hydrated_this_batch.len(), 1);

    let mut active_next_batch = active_at_batch_start.clone();
    active_next_batch.extend(hydrated_this_batch);
    let mut hydrated_next_batch = HashSet::new();
    assert!(
        maybe_hydrate_requested_deferred_tool(
            "edit_file",
            &json!({"path": "src/foo.rs", "search": "before", "replace": "after"}),
            &catalog,
            &active_next_batch,
            &mut hydrated_next_batch,
        )
        .is_none(),
        "tools hydrated in a previous batch should execute normally"
    );
}

#[test]
fn model_tool_catalog_defers_non_core_native_tools_in_yolo_mode() {
    let always_load = HashSet::new();
    let catalog = build_model_tool_catalog(
        vec![api_tool("read_file"), api_tool("project_map")],
        vec![api_tool("mcp_server_write")],
        AppMode::Yolo,
        &always_load,
    );

    let defer_loading = |name: &str| {
        catalog
            .iter()
            .find(|tool| tool.name == name)
            .and_then(|tool| tool.defer_loading)
    };

    assert_eq!(defer_loading("read_file"), Some(false));
    assert_eq!(defer_loading("project_map"), Some(true));
    assert_eq!(defer_loading("mcp_server_write"), Some(false));
}

#[test]
fn model_tool_catalog_sorts_each_partition_for_prefix_cache_stability() {
    // Regression for #263: deterministic byte order of the tools array is a
    // hard requirement for DeepSeek's KV prefix cache. Built-ins stay as a
    // contiguous prefix; MCP tools follow. Within each partition: alphabetical.
    let always_load = HashSet::new();
    let catalog = build_model_tool_catalog(
        vec![
            api_tool("read_file"),
            api_tool("apply_patch"),
            api_tool("exec_shell"),
        ],
        vec![api_tool("mcp_zoo_b"), api_tool("mcp_aardvark_a")],
        AppMode::Yolo,
        &always_load,
    );

    let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect();
    assert_eq!(
        names,
        vec![
            "apply_patch",
            "exec_shell",
            "read_file",
            "mcp_aardvark_a",
            "mcp_zoo_b",
        ],
        "built-ins must be alphabetical and contiguous; MCP tools follow, alphabetical",
    );
}

#[test]
fn active_tool_list_pushes_deferred_activations_to_the_tail() {
    // Regression for #263: when ToolSearch activates a deferred tool mid-
    // session, it must NOT be inserted at its catalog index — that would
    // shift every later tool's byte offset and bust the cached prefix.
    // Deferred-but-now-active tools belong at the tail.
    let mut a = api_tool("a_load_now");
    a.defer_loading = Some(false);
    let mut search = api_tool("search_via_toolsearch");
    search.defer_loading = Some(true);
    let mut b = api_tool("b_load_now");
    b.defer_loading = Some(false);

    let catalog = vec![a, search, b];
    let active: HashSet<String> = ["a_load_now", "search_via_toolsearch", "b_load_now"]
        .into_iter()
        .map(String::from)
        .collect();

    let listed = active_tools_for_step(&catalog, &active, false);
    let names: Vec<&str> = listed.iter().map(|t| t.name.as_str()).collect();
    assert_eq!(
        names,
        vec!["a_load_now", "b_load_now", "search_via_toolsearch"],
        "deferred-but-active tools must come after always-loaded tools",
    );
}

#[test]
fn deferred_tool_preflight_loads_edit_schema_without_executing_bad_aliases() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    let registry = engine
        .build_turn_tool_registry_builder(
            AppMode::Agent,
            engine.config.todos.clone(),
            engine.config.plan_state.clone(),
        )
        .build(engine.build_tool_context(AppMode::Agent, false));
    let always_load = HashSet::new();
    let mut catalog = build_model_tool_catalog(
        registry.to_api_tools_with_cache(true),
        vec![],
        AppMode::Agent,
        &always_load,
    );
    catalog
        .iter_mut()
        .find(|tool| tool.name == "edit_file")
        .expect("edit_file registered")
        .defer_loading = Some(true);
    let mut active = initial_active_tools(&catalog);
    assert!(!active.contains("edit_file"));

    let result = preflight_requested_deferred_tool(
        "edit_file",
        &json!({
            "path": "src/foo.rs",
            "old_string": "before",
            "new_string": "after"
        }),
        &catalog,
        &mut active,
    )
    .expect("deferred edit_file should preflight");

    assert!(active.contains("edit_file"));
    assert!(result.success);
    assert!(result.content.contains("Tool `edit_file` was deferred"));
    assert!(result.content.contains("The tool was not executed"));
    assert!(result.content.contains("path: string required"));
    assert!(result.content.contains("search: string required"));
    assert!(result.content.contains("replace: string required"));
    assert!(result.content.contains("old_string -> search"));
    assert!(result.content.contains("new_string -> replace"));
    assert_eq!(
        result.metadata.as_ref().unwrap()["deferred_tool_loaded"],
        json!(true)
    );
}

#[test]
fn deferred_tool_preflight_guides_rlm_open_misnamed_source_fields() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    let registry = engine
        .build_turn_tool_registry_builder(
            AppMode::Agent,
            engine.config.todos.clone(),
            engine.config.plan_state.clone(),
        )
        .build(engine.build_tool_context(AppMode::Agent, false));
    let always_load = HashSet::new();
    let mut catalog = build_model_tool_catalog(
        registry.to_api_tools_with_cache(true),
        vec![],
        AppMode::Agent,
        &always_load,
    );
    catalog
        .iter_mut()
        .find(|tool| tool.name == "rlm_open")
        .expect("rlm_open registered")
        .defer_loading = Some(true);
    let mut active = initial_active_tools(&catalog);
    assert!(!active.contains("rlm_open"));

    let result = preflight_requested_deferred_tool(
        "rlm_open",
        &json!({
            "name": "active_prompt",
            "prompt": "inspect this",
            "path": "src/lib.rs"
        }),
        &catalog,
        &mut active,
    )
    .expect("deferred rlm_open should preflight");

    assert!(active.contains("rlm_open"));
    assert!(result.success);
    assert!(result.content.contains("Tool `rlm_open` was deferred"));
    assert!(result.content.contains("The tool was not executed"));
    assert!(result.content.contains("session_object: string"));
    assert!(
        result.content.contains(
            "prompt -> file_path (local file), content (inline text), url, or session_object"
        ),
        "prompt correction includes session_object: {}",
        result.content
    );
    assert!(
        result.content.contains(
            "path -> file_path (local file), content (inline text), url, or session_object"
        ),
        "path correction includes session_object: {}",
        result.content
    );
    assert_eq!(
        result.metadata.as_ref().unwrap()["deferred_tool_loaded"],
        json!(true)
    );
}

#[test]
fn deferred_tool_preflight_guides_checklist_update_list_replacement() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    let registry = engine
        .build_turn_tool_registry_builder(
            AppMode::Agent,
            engine.config.todos.clone(),
            engine.config.plan_state.clone(),
        )
        .build(engine.build_tool_context(AppMode::Agent, false));
    let always_load = HashSet::new();
    let catalog = build_model_tool_catalog(
        registry.to_api_tools_with_cache(true),
        vec![],
        AppMode::Agent,
        &always_load,
    );
    let mut active = initial_active_tools(&catalog);
    assert!(!active.contains("checklist_update"));

    let result = preflight_requested_deferred_tool(
        "checklist_update",
        &json!({
            "todos": [
                { "content": "wire preflight", "status": "completed" }
            ]
        }),
        &catalog,
        &mut active,
    )
    .expect("deferred checklist_update should preflight");

    assert!(active.contains("checklist_update"));
    assert!(result.success);
    assert!(
        result
            .content
            .contains("Tool `checklist_update` was deferred")
    );
    assert!(result.content.contains("id: integer required"));
    assert!(result.content.contains("status: string"));
    assert!(result.content.contains("Missing required fields:"));
    assert!(result.content.contains("id, status"));
    assert!(result.content.contains("Unexpected fields:"));
    assert!(result.content.contains("todos"));
    assert!(result.content.contains("Use checklist_write"));
}

#[tokio::test]
async fn run_shell_command_op_requests_approval_and_executes_shell() {
    let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
    let handle_for_approval = handle.clone();

    let task = tokio::spawn(async move {
        engine
            .handle_run_shell_command(
                "echo bang-ok".to_string(),
                AppMode::Agent,
                false,
                false,
                crate::tui::approval::ApprovalMode::Suggest,
            )
            .await;
    });

    let mut saw_started = false;
    let mut saw_approval = false;
    let mut saw_complete = false;
    let mut saw_turn_complete = false;
    let mut rx = handle.rx_event.write().await;
    while let Some(event) = rx.recv().await {
        match event {
            Event::TurnStarted { turn_id } => {
                assert!(turn_id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
            }
            Event::ToolCallStarted { id, name, input } => {
                saw_started = true;
                assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
                assert_eq!(name, "exec_shell");
                assert_eq!(input["command"], json!("echo bang-ok"));
                assert_eq!(input["source"], json!("user"));
            }
            Event::ApprovalRequired { id, tool_name, .. } => {
                saw_approval = true;
                assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
                assert_eq!(tool_name, "exec_shell");
                handle_for_approval
                    .approve_tool_call(id)
                    .await
                    .expect("approve shell");
            }
            Event::ToolCallComplete { id, name, result } => {
                saw_complete = true;
                assert!(id.starts_with(USER_SHELL_TOOL_ID_PREFIX));
                assert_eq!(name, "exec_shell");
                let result = result.expect("shell result");
                assert!(result.success, "{result:?}");
                assert!(result.content.contains("bang-ok"), "{result:?}");
            }
            Event::TurnComplete { status, .. } => {
                saw_turn_complete = true;
                assert_eq!(status, TurnOutcomeStatus::Completed);
                break;
            }
            _ => {}
        }
    }
    drop(rx);
    task.await.expect("shell op task");

    assert!(saw_started);
    assert!(saw_approval);
    assert!(saw_complete);
    assert!(saw_turn_complete);
}

#[tokio::test]
async fn run_shell_command_op_skips_approval_when_auto_approved() {
    let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());

    engine
        .handle_run_shell_command(
            "echo bang-yolo".to_string(),
            AppMode::Yolo,
            true,
            true,
            crate::tui::approval::ApprovalMode::Auto,
        )
        .await;

    let mut saw_complete = false;
    let mut rx = handle.rx_event.write().await;
    while let Some(event) = rx.recv().await {
        match event {
            Event::ApprovalRequired { .. } => {
                panic!("auto-approved shell shortcut should not request approval");
            }
            Event::ToolCallComplete { result, .. } => {
                saw_complete = true;
                let result = result.expect("shell result");
                assert!(result.success, "{result:?}");
                assert!(result.content.contains("bang-yolo"), "{result:?}");
            }
            Event::TurnComplete { status, .. } => {
                assert_eq!(status, TurnOutcomeStatus::Completed);
                break;
            }
            _ => {}
        }
    }

    assert!(saw_complete);
}

#[tokio::test]
async fn run_shell_command_op_preserves_plan_mode_shell_block() {
    let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());

    engine
        .handle_run_shell_command(
            "echo blocked".to_string(),
            AppMode::Plan,
            false,
            false,
            crate::tui::approval::ApprovalMode::Suggest,
        )
        .await;

    let mut saw_complete = false;
    let mut saw_turn_complete = false;
    let mut rx = handle.rx_event.write().await;
    while let Some(event) = rx.recv().await {
        match event {
            Event::ApprovalRequired { .. } => {
                panic!("Plan mode shell should be blocked before approval");
            }
            Event::ToolCallComplete { name, result, .. } => {
                saw_complete = true;
                assert_eq!(name, "exec_shell");
                let err = result.expect_err("plan shell should fail");
                assert!(
                    err.to_string().contains("unavailable in Plan mode"),
                    "{err}"
                );
            }
            Event::TurnComplete { status, .. } => {
                saw_turn_complete = true;
                assert_eq!(status, TurnOutcomeStatus::Failed);
                break;
            }
            _ => {}
        }
    }

    assert!(saw_complete);
    assert!(saw_turn_complete);
}

#[test]
fn deferred_tool_preflight_skips_already_active_tools() {
    let mut tool = api_tool("deferred_tool");
    tool.defer_loading = Some(true);
    let catalog = vec![tool];
    let mut active = HashSet::from(["deferred_tool".to_string()]);

    assert!(
        preflight_requested_deferred_tool("deferred_tool", &json!({}), &catalog, &mut active,)
            .is_none(),
        "already active tools should execute normally"
    );
}

#[test]
fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
    let registry = engine
        .build_turn_tool_registry_builder(
            AppMode::Plan,
            engine.config.todos.clone(),
            engine.config.plan_state.clone(),
        )
        .build(engine.build_tool_context(AppMode::Plan, false));

    assert!(registry.contains("read_file"));
    assert!(registry.contains("list_dir"));
    assert!(!registry.contains("write_file"));
    assert!(!registry.contains("edit_file"));
    assert!(!registry.contains("exec_shell"));
    assert!(!registry.contains("exec_shell_wait"));
    assert!(!registry.contains("exec_shell_interact"));
    assert!(!registry.contains("task_shell_start"));
    assert!(!registry.contains("task_create"));
    assert!(!registry.contains("task_gate_run"));
    assert!(!registry.contains("rlm"));
    assert!(!registry.contains("fim_edit"));
    assert!(registry.contains("update_plan"));
    assert!(registry.contains("create_goal"));
    assert!(registry.contains("get_goal"));
    assert!(registry.contains("update_goal"));
    assert!(registry.contains("task_list"));
    assert!(registry.contains("task_read"));
    assert!(registry.contains("handle_read"));
    let plan_state_tools = [
        "checklist_add",
        "checklist_update",
        "checklist_write",
        "todo_add",
        "todo_update",
        "todo_write",
        "update_plan",
    ];
    let mut write_or_exec_tools: Vec<String> = registry
        .all()
        .into_iter()
        .filter(|tool| !plan_state_tools.contains(&tool.name()))
        .filter(|tool| {
            let capabilities = tool.capabilities();
            capabilities.contains(&ToolCapability::WritesFiles)
                || capabilities.contains(&ToolCapability::ExecutesCode)
        })
        .map(|tool| tool.name().to_string())
        .collect();
    write_or_exec_tools.sort();
    assert!(
        write_or_exec_tools.is_empty(),
        "Plan mode must not register file-writing or code-execution tools: {write_or_exec_tools:?}"
    );
}

/// Plan mode toggle must not change the byte representation of the tool
/// catalog head. DeepSeek's KV prefix cache includes the tools array in
/// the immutable prefix; if toggling between Plan and Agent mode changes
/// the tool bytes, every mode switch forces a full re-prefill.
///
/// This test verifies two invariants:
/// 1. Building the catalog twice for the same mode produces identical bytes.
/// 2. The head of the catalog (non-deferred tools) preserves its order
///    when deferred tools are activated mid-session.
#[test]
fn plan_mode_toggle_preserves_catalog_byte_stability() {
    let always_load = HashSet::new();

    // Build catalog for Plan mode twice — must be byte-identical.
    let plan_native = vec![
        api_tool("read_file"),
        api_tool("list_dir"),
        api_tool("write_file"),
        api_tool("edit_file"),
        api_tool("exec_shell"),
    ];
    let plan_mcp = vec![api_tool("mcp_search"), api_tool("mcp_write")];

    let catalog_a = build_model_tool_catalog(
        plan_native.clone(),
        plan_mcp.clone(),
        AppMode::Plan,
        &always_load,
    );
    let catalog_b = build_model_tool_catalog(
        plan_native.clone(),
        plan_mcp.clone(),
        AppMode::Plan,
        &always_load,
    );

    let json_a = serde_json::to_string(&catalog_a).unwrap();
    let json_b = serde_json::to_string(&catalog_b).unwrap();
    assert_eq!(
        json_a, json_b,
        "building the catalog twice for Plan mode must produce identical bytes"
    );

    // Build catalog for Agent mode twice — must be byte-identical.
    let agent_catalog_a = build_model_tool_catalog(
        plan_native.clone(),
        plan_mcp.clone(),
        AppMode::Agent,
        &always_load,
    );
    let agent_catalog_b = build_model_tool_catalog(
        plan_native.clone(),
        plan_mcp.clone(),
        AppMode::Agent,
        &always_load,
    );

    let agent_json_a = serde_json::to_string(&agent_catalog_a).unwrap();
    let agent_json_b = serde_json::to_string(&agent_catalog_b).unwrap();
    assert_eq!(
        agent_json_a, agent_json_b,
        "building the catalog twice for Agent mode must produce identical bytes"
    );

    // Verify that the non-deferred tools that are common to both modes
    // appear in the same order. Plan mode excludes execution tools, but
    // the tools that are present in both modes must have stable ordering.
    let plan_names: Vec<&str> = catalog_a
        .iter()
        .filter(|t| !t.defer_loading.unwrap_or(false))
        .map(|t| t.name.as_str())
        .collect();
    let agent_names: Vec<&str> = agent_catalog_a
        .iter()
        .filter(|t| !t.defer_loading.unwrap_or(false))
        .map(|t| t.name.as_str())
        .collect();

    // The common prefix of non-deferred tools must be identical.
    let common_len = plan_names.len().min(agent_names.len());
    assert_eq!(
        &plan_names[..common_len],
        &agent_names[..common_len],
        "non-deferred tools common to Plan and Agent must appear in the same order"
    );

    // Verify that activating a deferred tool mid-session appends to the
    // tail without reordering the head.
    let mut tools_with_deferred = plan_native.clone();
    tools_with_deferred.push({
        let mut t = api_tool("deferred_search");
        t.defer_loading = Some(true);
        t
    });
    let catalog_with_deferred = build_model_tool_catalog(
        tools_with_deferred,
        plan_mcp.clone(),
        AppMode::Agent,
        &always_load,
    );

    // Activate the deferred tool.
    let mut active: HashSet<String> = catalog_with_deferred
        .iter()
        .filter(|t| !t.defer_loading.unwrap_or(false))
        .map(|t| t.name.clone())
        .collect();
    active.insert("deferred_search".to_string());

    let listed = active_tools_for_step(&catalog_with_deferred, &active, false);
    let listed_names: Vec<&str> = listed.iter().map(|t| t.name.as_str()).collect();

    // The head (non-deferred tools) must still be in their original order.
    let head_names: Vec<&str> = catalog_with_deferred
        .iter()
        .filter(|t| !t.defer_loading.unwrap_or(false))
        .map(|t| t.name.as_str())
        .collect();
    assert!(
        listed_names.starts_with(&head_names),
        "activating a deferred tool must not reorder the catalog head: \
         expected {head_names:?} as prefix, got {listed_names:?}"
    );
    // The deferred tool must be at the tail.
    assert_eq!(
        listed_names.last(),
        Some(&"deferred_search"),
        "deferred tool must be appended at the tail"
    );
}

#[test]
fn parent_turn_registry_includes_goal_tools_for_all_modes() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());

    for mode in [AppMode::Plan, AppMode::Agent, AppMode::Yolo] {
        let registry = engine
            .build_turn_tool_registry_builder(
                mode,
                engine.config.todos.clone(),
                engine.config.plan_state.clone(),
            )
            .build(engine.build_tool_context(mode, false));

        for name in ["create_goal", "get_goal", "update_goal"] {
            assert!(
                registry.contains(name),
                "parent {mode:?} registry should expose {name}"
            );
        }
    }
}

#[test]
fn agent_mode_can_build_auto_approved_tool_context() {
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());

    assert!(
        !engine
            .build_tool_context(AppMode::Agent, false)
            .auto_approve
    );
    assert!(engine.build_tool_context(AppMode::Agent, true).auto_approve);
    assert!(engine.build_tool_context(AppMode::Yolo, false).auto_approve);
}

#[test]
fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
    // Regression for #273: the seatbelt-default policy denies all outbound
    // network (including DNS), which broke `curl`, `yt-dlp`, package managers,
    // and similar shell commands in Agent mode. Elevation must include
    // network access so the application-level NetworkPolicy stays the only
    // outbound boundary.
    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());

    let agent_ctx = engine.build_tool_context(AppMode::Agent, false);
    let agent_policy = agent_ctx
        .elevated_sandbox_policy
        .as_ref()
        .expect("Agent mode should elevate the sandbox policy");
    assert!(
        agent_policy.has_network_access(),
        "Agent mode must allow shell network access; got {agent_policy:?}",
    );

    let yolo_ctx = engine.build_tool_context(AppMode::Yolo, false);
    let yolo_policy = yolo_ctx
        .elevated_sandbox_policy
        .as_ref()
        .expect("Yolo mode should elevate the sandbox policy");
    assert!(yolo_policy.has_network_access());
    // v0.8.11: YOLO drops to DangerFullAccess (no sandbox) so the user
    // is not bounced through approval round-trips for legitimate
    // outside-workspace writes (package installs, sub-agent
    // workspaces, ~/.cache mutations, etc.). YOLO is opt-in and
    // already enables trust mode + auto-approve; the sandbox was the
    // last guardrail and contradicts the contract.
    assert!(
        matches!(yolo_policy, crate::sandbox::SandboxPolicy::DangerFullAccess),
        "Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
    );

    // Plan mode (#1077): the sandbox must actually deny workspace writes.
    // The previous WorkspaceWrite-with-empty-network policy whitelisted the
    // workspace as writable, so `python -c "open('f','w').write('x')"`
    // mutated files inside the workspace despite Plan-mode's intent. Lock
    // it to ReadOnly: no writes anywhere, no network. The shell tool stays
    // exposed for read-only inspection (`ls`, `git log`, `grep`, …) and
    // the per-platform sandbox enforces the rest.
    let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
    let plan_policy = plan_ctx
        .elevated_sandbox_policy
        .as_ref()
        .expect("Plan mode should make the shell sandbox policy explicit");
    assert!(
        matches!(plan_policy, crate::sandbox::SandboxPolicy::ReadOnly),
        "Plan mode must use ReadOnly sandbox to deny workspace writes (#1077); got {plan_policy:?}",
    );
    assert!(!plan_policy.has_network_access());
    assert!(!plan_policy.has_full_disk_write_access());
    assert!(
        plan_policy
            .get_writable_roots(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
            .is_empty(),
        "ReadOnly policy must enumerate zero writable roots; got {plan_policy:?}",
    );
    assert!(
        plan_ctx
            .shell_network_denied_hint
            .as_deref()
            .is_some_and(|hint| hint.contains("Plan mode") && hint.contains("read-only")),
    );
}

#[test]
fn sandbox_policy_for_mode_returns_correct_policy_per_mode() {
    use super::tool_setup::sandbox_policy_for_mode;
    use crate::sandbox::SandboxPolicy;

    let workspace = PathBuf::from("/tmp/example-workspace");

    // Plan: ReadOnly. The whole point of #1077.
    assert!(matches!(
        sandbox_policy_for_mode(AppMode::Plan, &workspace),
        SandboxPolicy::ReadOnly
    ));

    // Agent: WorkspaceWrite with workspace as writable root, network on.
    match sandbox_policy_for_mode(AppMode::Agent, &workspace) {
        SandboxPolicy::WorkspaceWrite {
            writable_roots,
            network_access,
            ..
        } => {
            assert_eq!(writable_roots, vec![workspace.clone()]);
            assert!(network_access, "Agent mode must allow shell network access");
        }
        other => panic!("Agent mode should be WorkspaceWrite; got {other:?}"),
    }

    // YOLO: DangerFullAccess.
    assert!(matches!(
        sandbox_policy_for_mode(AppMode::Yolo, &workspace),
        SandboxPolicy::DangerFullAccess
    ));
}

#[tokio::test]
async fn session_update_preserves_reasoning_tool_only_turn() {
    let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
    let assistant = Message {
        role: "assistant".to_string(),
        content: vec![
            ContentBlock::Thinking {
                thinking: "Need a tool before answering.".to_string(),
            },
            ContentBlock::ToolUse {
                id: "tool-1".to_string(),
                name: "read_file".to_string(),
                input: json!({"path": "Cargo.toml"}),
                caller: None,
            },
        ],
    };

    engine.add_session_message(assistant.clone()).await;

    let event = {
        let mut rx = handle.rx_event.write().await;
        rx.recv().await.expect("session update event")
    };
    let Event::SessionUpdated { messages, .. } = event else {
        panic!("expected session update event");
    };

    assert_eq!(messages, vec![assistant]);
}

#[tokio::test]
async fn set_model_reloads_instruction_sources_and_updates_session_prompt() {
    let tmp = tempdir().expect("tempdir");
    let instructions = tmp.path().join("instructions.md");
    fs::write(&instructions, "FLASH_INSTRUCTIONS_MARKER").expect("write instructions");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        model: "deepseek-v4-flash".to_string(),
        instructions: vec![instructions.clone().into()],
        ..Default::default()
    };
    let (engine, handle) = Engine::new(config, &Config::default());
    fs::write(&instructions, "PRO_INSTRUCTIONS_MARKER").expect("rewrite instructions");

    let run = tokio::spawn(engine.run());
    handle
        .send(Op::SetModel {
            model: "deepseek-v4-pro".to_string(),
            mode: AppMode::Agent,
        })
        .await
        .expect("send set model");

    let (model, prompt) = {
        let mut rx = handle.rx_event.write().await;
        loop {
            let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
                .await
                .expect("session update after model switch")
                .expect("event");
            if let Event::SessionUpdated {
                model,
                system_prompt,
                ..
            } = event
            {
                let prompt = match system_prompt.expect("system prompt") {
                    SystemPrompt::Text(text) => text,
                    SystemPrompt::Blocks(blocks) => blocks
                        .into_iter()
                        .map(|block| block.text)
                        .collect::<Vec<_>>()
                        .join("\n"),
                };
                break (model, prompt);
            }
        }
    };
    run.abort();

    assert_eq!(model, "deepseek-v4-pro");
    assert!(prompt.contains("PRO_INSTRUCTIONS_MARKER"));
    assert!(!prompt.contains("FLASH_INSTRUCTIONS_MARKER"));
}

#[tokio::test]
async fn change_mode_refreshes_session_prompt_and_updates_session() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        model: "deepseek-v4-pro".to_string(),
        ..Default::default()
    };
    let (engine, handle) = Engine::new(config, &Config::default());

    let run = tokio::spawn(engine.run());
    handle
        .send(Op::ChangeMode {
            mode: AppMode::Yolo,
        })
        .await
        .expect("send change mode");

    let (_prompt, messages) = {
        let mut rx = handle.rx_event.write().await;
        loop {
            let event = tokio::time::timeout(std::time::Duration::from_secs(1), rx.recv())
                .await
                .expect("session update after mode switch")
                .expect("event");
            if let Event::SessionUpdated {
                system_prompt,
                messages,
                ..
            } = event
            {
                let prompt = match system_prompt.expect("system prompt") {
                    SystemPrompt::Text(text) => text,
                    SystemPrompt::Blocks(blocks) => blocks
                        .into_iter()
                        .map(|block| block.text)
                        .collect::<Vec<_>>()
                        .join("\n"),
                };
                break (prompt, messages);
            }
        }
    };
    run.abort();

    assert!(
        messages.iter().all(|message| message.role != "system"),
        "mode switch must not persist appended system messages: {messages:?}"
    );
    assert!(
        messages.iter().all(|message| {
            message.content.iter().all(|block| {
                !matches!(
                    block,
                    ContentBlock::Text { text, .. }
                        if text.contains("<runtime_prompt")
                )
            })
        }),
        "runtime prompt tags should be request-time metadata, not session history"
    );
}

#[test]
fn turn_approval_mode_prefers_auto_approve_flag() {
    use crate::tui::approval::ApprovalMode;

    assert_eq!(
        agent_approval_mode_for_turn(true, ApprovalMode::Suggest),
        ApprovalMode::Auto
    );
    assert_eq!(
        approval_mode_for(
            AppMode::Agent,
            agent_approval_mode_for_turn(true, ApprovalMode::Never),
        ),
        ApprovalMode::Auto
    );
    assert_eq!(
        approval_mode_for(AppMode::Yolo, ApprovalMode::Suggest),
        ApprovalMode::Auto
    );
    assert_eq!(
        approval_mode_for(AppMode::Plan, ApprovalMode::Auto),
        ApprovalMode::Never
    );
}

#[test]
fn runtime_prompt_is_projected_without_persisting_to_session_messages() {
    use crate::tui::approval::ApprovalMode;

    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine.current_mode = AppMode::Plan;
    engine.session.approval_mode = ApprovalMode::Suggest;
    engine.session.messages = vec![Message {
        role: "user".to_string(),
        content: vec![ContentBlock::Text {
            text: "summary after compaction".to_string(),
            cache_control: None,
        }],
    }]
    .into();
    let stored = engine.session.messages.clone();

    let request_messages = engine.messages_with_turn_metadata();

    assert_eq!(&*engine.session.messages, &*stored);
    assert_eq!(request_messages.len(), stored.len() + 1);
    assert!(
        request_messages
            .iter()
            .all(|message| message.role != "system"),
        "runtime prompts must not create appended system messages"
    );
    let runtime = request_messages.last().expect("runtime prompt message");
    assert_eq!(runtime.role, "user");
    let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
    else {
        panic!("expected text runtime prompt");
    };
    assert!(text.contains("<runtime_prompt"));
    assert!(text.contains("mode=\"plan\""));
    assert!(
        text.contains("approval=\"never\""),
        "Plan mode should project its fixed never-approval policy: {text}"
    );
}

#[tokio::test]
async fn change_mode_op_updates_current_mode_and_emits_status() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        model: "deepseek-v4-pro".to_string(),
        ..Default::default()
    };
    let (engine, handle) = Engine::new(config, &Config::default());

    let run = tokio::spawn(engine.run());
    handle
        .send(Op::ChangeMode {
            mode: AppMode::Yolo,
        })
        .await
        .expect("send change mode");

    // Expect a SessionUpdated event confirming the mode change (the
    // per-turn <runtime_prompt> tag carries the mode in every request,
    // so no separate persistence of a mode_change runtime event is needed).
    let mut rx = handle.rx_event.write().await;
    let session_updated = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
        .await
        .expect("session update after mode switch")
        .expect("event");
    let Event::SessionUpdated { messages, .. } = session_updated else {
        panic!("should emit SessionUpdated after mode change, got: {session_updated:?}");
    };
    assert!(
        messages.iter().all(|message| {
            message.content.iter().all(|block| {
                !matches!(
                    block,
                    ContentBlock::Text { text, .. }
                        if text.contains("<runtime_prompt")
                )
            })
        }),
        "runtime prompt tags must not be persisted into session messages after mode change"
    );

    // Also expect a status event
    let status = tokio::time::timeout(std::time::Duration::from_secs(2), rx.recv())
        .await
        .expect("status after mode switch")
        .expect("event");
    assert!(
        matches!(status, Event::Status { .. }),
        "should emit Status after mode change, got: {status:?}"
    );

    run.abort();
}

#[test]
fn detects_context_length_errors_from_provider_payloads() {
    let msg = r#"SSE stream request failed: HTTP 400 Bad Request: {"error":{"message":"This model's maximum context length is 131072 tokens. However, you requested 153056 tokens (148960 in the messages, 4096 in the completion).","type":"invalid_request_error"}}"#;
    assert!(is_context_length_error_message(msg));
    assert!(!is_context_length_error_message(
        "SSE stream request failed: HTTP 400 Bad Request: model not found"
    ));
}

#[test]
fn context_budget_reserves_output_and_headroom() {
    // Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
    // the internal effective_max_output_tokens() call sees a stable env.
    let _lock = lock_test_env();
    // V4 has a 1M context window — the only family that comfortably hosts
    // a 256K output reservation without saturating the input budget to 0.
    let budget = context_input_budget("deepseek-v4-pro")
        .expect("deepseek-v4-pro should have a known context window");
    let v4_window: usize = 1_000_000;
    let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
    assert_eq!(budget, expected);
}

#[test]
fn effective_max_output_tokens_caps_api_request_for_large_window_models() {
    // Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
    // v4_cap and flash_cap below see the same env state.
    let _lock = lock_test_env();
    // V4 models have a 1M context window but the API request cap must stay
    // well below common provider limits (e.g., 131K total on self-hosted
    // vLLM/SGLang). The cap should never exceed 65K.
    let v4_cap = effective_max_output_tokens("deepseek-v4-pro");
    assert!(
        v4_cap <= 65_536,
        "V4 API request cap should be ≤64K, got {v4_cap}"
    );
    assert!(
        v4_cap > 0,
        "V4 API request cap should be positive, got {v4_cap}"
    );

    let flash_cap = effective_max_output_tokens("deepseek-v4-flash");
    assert_eq!(v4_cap, flash_cap);
}

struct ScopedDeepSeekMaxOutputTokens {
    previous: Option<OsString>,
}

impl ScopedDeepSeekMaxOutputTokens {
    fn set(value: &str) -> Self {
        let previous = std::env::var_os("DEEPSEEK_MAX_OUTPUT_TOKENS");
        // Safety: tests using this helper serialize with lock_test_env() and
        // restore the original value in Drop.
        unsafe {
            std::env::set_var("DEEPSEEK_MAX_OUTPUT_TOKENS", value);
        }
        Self { previous }
    }

    fn unset() -> Self {
        let previous = std::env::var_os("DEEPSEEK_MAX_OUTPUT_TOKENS");
        // Safety: see set().
        unsafe {
            std::env::remove_var("DEEPSEEK_MAX_OUTPUT_TOKENS");
        }
        Self { previous }
    }
}

impl Drop for ScopedDeepSeekMaxOutputTokens {
    fn drop(&mut self) {
        // Safety: tests using this helper serialize with lock_test_env().
        unsafe {
            if let Some(previous) = self.previous.take() {
                std::env::set_var("DEEPSEEK_MAX_OUTPUT_TOKENS", previous);
            } else {
                std::env::remove_var("DEEPSEEK_MAX_OUTPUT_TOKENS");
            }
        }
    }
}

#[test]
fn effective_max_output_tokens_env_override_returns_positive_value() {
    let _lock = lock_test_env();
    let _guard = ScopedDeepSeekMaxOutputTokens::set("16384");

    // Override applies regardless of model — V4 hosted, V4 flash, sub-500K
    // self-hosted all return the env value verbatim.
    assert_eq!(effective_max_output_tokens("deepseek-v4-pro"), 16_384);
    assert_eq!(effective_max_output_tokens("deepseek-v4-flash"), 16_384);
    assert_eq!(effective_max_output_tokens("qwen3-32b-256k"), 16_384);
}

#[test]
fn effective_max_output_tokens_env_override_rejects_zero_and_invalid() {
    let _lock = lock_test_env();
    // Establish the heuristic baseline with the env unset.
    let baseline = {
        let _guard = ScopedDeepSeekMaxOutputTokens::unset();
        effective_max_output_tokens("deepseek-v4-pro")
    };
    assert!(baseline > 0);

    // 0, non-numeric, and empty values must all fall through to the heuristic
    // rather than producing a zero/garbage cap that would silently break
    // request budgeting.
    for raw in ["0", "abc", "", "  ", "-1"] {
        let _guard = ScopedDeepSeekMaxOutputTokens::set(raw);
        assert_eq!(
            effective_max_output_tokens("deepseek-v4-pro"),
            baseline,
            "env={raw:?} should fall through to heuristic"
        );
    }
}

#[test]
fn internal_context_budget_tiers_reserved_output_by_window() {
    // Serialize with other tests that mutate DEEPSEEK_MAX_OUTPUT_TOKENS so
    // both branches below see a stable env.
    let _lock = lock_test_env();
    // Large-context (>=500K) models reserve the full TURN_MAX_OUTPUT_TOKENS
    // headroom so long V4 sessions don't compact prematurely.
    let internal_budget =
        context_input_budget("deepseek-v4-pro").expect("V4 should have a known context window");
    let v4_window: usize = 1_000_000;
    let expected_internal = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
    assert_eq!(internal_budget, expected_internal);

    // Sub-500K windows cross into the effective-cap branch: a 256K self-hosted
    // deployment must yield a usable positive budget rather than None. The
    // previous formula reserved the full 262K and computed 256K - 262K - 1K,
    // which underflowed to None and silently disabled preflight/recovery.
    let small_window_budget = context_input_budget("qwen3-32b-256k")
        .expect("a 256K-suffix model must yield Some budget via the effective-cap branch");
    let effective_output = effective_max_output_tokens("qwen3-32b-256k") as usize;
    let expected_small = 256_000 - effective_output - 1_024;
    assert_eq!(small_window_budget, expected_small);
}

#[test]
fn v4_tool_outputs_keep_large_file_reads_in_context() {
    let content = "0123456789abcdef\n".repeat(2_000);
    let output = ToolResult::success(content.clone());

    let v4_context = compact_tool_result_for_context("deepseek-v4-pro", "exec_shell", &output);
    assert_eq!(v4_context, content.trim());

    let legacy_context =
        compact_tool_result_for_context("deepseek-v3.2-128k", "exec_shell", &output);
    assert!(legacy_context.contains("output compacted to protect context"));
    assert!(legacy_context.len() < v4_context.len());
}

#[test]
fn subagent_results_are_summarized_before_parent_context_insertion() {
    let long_result = "verified detail\n".repeat(1_000);
    let output = ToolResult::success(
        json!({
            "agent_id": "agent_1234abcd",
            "agent_type": "explore",
            "assignment": {
                "objective": "Inspect the RLM rendering path and report the smallest fix."
            },
            "model": "deepseek-v4-flash",
            "status": "Completed",
            "result": long_result,
            "steps_taken": 12,
            "duration_ms": 3456
        })
        .to_string(),
    );

    let context = compact_tool_result_for_context("deepseek-v4-pro", "agent_eval", &output);

    assert!(context.contains("[sub-agent result summarized for parent context]"));
    assert!(context.contains("agent_1234abcd (explore) status=Completed"));
    assert!(context.contains("Inspect the RLM rendering path"));
    assert!(context.contains("steps=12"));
    assert!(context.len() < output.content.len());
    assert!(context.contains("self-report"));
    assert!(context.contains("verify side effects"));
    assert!(context.contains("read_file") && context.contains("list_dir"));
    assert!(context.contains("handle_read"));
}

#[test]
fn run_verifiers_results_are_structured_before_context_insertion() {
    let noisy_failure = "node lint failure detail\n".repeat(300);
    let noisy_success = "successful check output\n".repeat(300);
    let output = ToolResult::success(
        json!({
            "success": false,
            "profile": "auto",
            "level": "quick",
            "workspace": "/repo",
            "gate_count": 3,
            "passed": 1,
            "failed": 1,
            "skipped": 1,
            "summary": "1 passed, 1 failed, 1 skipped",
            "gates": [
                {
                    "name": "rust-check",
                    "ecosystem": "rust",
                    "status": "passed",
                    "command": "cargo check --workspace --locked",
                    "cwd": "/repo",
                    "exit_code": 0,
                    "duration_ms": 110,
                    "stdout": noisy_success.clone(),
                    "stderr": "",
                    "stdout_truncated": false,
                    "stderr_truncated": false,
                    "skipped_reason": null
                },
                {
                    "name": "node-lint",
                    "ecosystem": "node",
                    "status": "failed",
                    "command": "npm run lint",
                    "cwd": "/repo",
                    "exit_code": 1,
                    "duration_ms": 220,
                    "stdout": "",
                    "stderr": noisy_failure,
                    "stdout_truncated": false,
                    "stderr_truncated": false,
                    "skipped_reason": null
                },
                {
                    "name": "python-pytest",
                    "ecosystem": "python",
                    "status": "skipped",
                    "command": "",
                    "cwd": "/repo",
                    "exit_code": null,
                    "duration_ms": 0,
                    "stdout": "",
                    "stderr": "",
                    "stdout_truncated": false,
                    "stderr_truncated": false,
                    "skipped_reason": "pytest is not installed"
                }
            ]
        })
        .to_string(),
    );

    let context = compact_tool_result_for_context("deepseek-v4-pro", "run_verifiers", &output);

    assert!(context.contains("[run_verifiers result summarized for context]"));
    assert!(context.contains("summary: 1 passed, 1 failed, 1 skipped"));
    assert!(context.contains("selection: profile=auto, level=quick"));
    assert!(context.contains("- node-lint (node): failed exit=1"));
    assert!(context.contains("command: npm run lint"));
    assert!(context.contains("- python-pytest (python): skipped"));
    assert!(context.contains("pytest is not installed"));
    assert!(context.contains("- rust-check (rust): passed exit=0"));
    assert!(context.len() < output.content.len());
    assert!(
        !context.contains(&noisy_success),
        "successful gate stdout should not be copied into parent context"
    );
}

#[test]
fn run_tests_results_are_structured_before_context_insertion() {
    let stdout = "running test suite\n".repeat(500);
    let stderr = "error[E0425]: cannot find value `missing`\n".repeat(500);
    let output = ToolResult::success(
        json!({
            "success": false,
            "exit_code": 101,
            "stdout": stdout,
            "stderr": stderr,
            "command": "(cd /repo && cargo test --workspace --all-features)"
        })
        .to_string(),
    );

    let context = compact_tool_result_for_context("deepseek-v4-pro", "run_tests", &output);

    assert!(context.contains("[run_tests result summarized for context]"));
    assert!(context.contains("status: failed, exit_code: 101"));
    assert!(context.contains("cargo test --workspace --all-features"));
    assert!(context.contains("error[E0425]"));
    assert!(context.contains("running test suite"));
    assert!(context.len() < output.content.len());
}

#[test]
fn task_gate_run_results_are_structured_before_context_insertion() {
    let output = ToolResult::success(
        json!({
            "gate": {
                "id": "gate_abcd1234",
                "gate": "clippy",
                "command": "cargo clippy -p codewhale-tui --all-targets --all-features --locked -- -D warnings",
                "cwd": "/repo",
                "exit_code": 1,
                "status": "failed",
                "classification": "compile_failure",
                "duration_ms": 5000,
                "summary": "warning promoted to error in verifier.rs",
                "log_path": "/repo/.codewhale/runtime/gate.log",
                "recorded_at": "2026-06-01T12:00:00Z"
            },
            "stdout_summary": "",
            "stderr_summary": "warning promoted to error"
        })
        .to_string(),
    );

    let context = compact_tool_result_for_context("deepseek-v4-pro", "task_gate_run", &output);

    assert!(context.contains("[task_gate_run result summarized for context]"));
    assert!(context.contains("gate: clippy, status: failed, exit_code: 1"));
    assert!(context.contains("cargo clippy -p codewhale-tui"));
    assert!(context.contains("summary: warning promoted to error"));
    assert!(context.contains("log_path: /repo/.codewhale/runtime/gate.log"));
}

#[test]
fn refresh_system_prompt_leaves_working_set_out_of_system_prompt() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("please inspect src/lib.rs", tmp.path());

    engine.refresh_system_prompt();

    let prompt = match &engine.session.system_prompt {
        Some(SystemPrompt::Text(text)) => text.clone(),
        Some(SystemPrompt::Blocks(blocks)) => blocks
            .iter()
            .map(|block| block.text.as_str())
            .collect::<Vec<_>>()
            .join("\n"),
        None => panic!("expected system prompt"),
    };
    assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}

#[test]
fn working_set_reaches_model_as_turn_metadata() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("please inspect src/lib.rs", tmp.path());
    let user_msg =
        engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string());
    engine.session.add_message(user_msg);

    let messages = engine.messages_with_turn_metadata();
    let last_block = messages
        .first()
        .and_then(|message| message.content.last())
        .expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };
    assert!(text.starts_with("<turn_meta>\n"));
    assert!(text.contains(WORKING_SET_SUMMARY_MARKER));
    assert!(text.contains("src/lib.rs"));
}

#[test]
fn turn_metadata_includes_current_local_date_without_working_set() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        model: "deepseek-v4-flash".to_string(),
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string());
    engine.session.add_message(user_msg);

    let messages = engine.messages_with_turn_metadata();
    let last_block = messages
        .first()
        .and_then(|message| message.content.last())
        .expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };

    let today = chrono::Local::now().format("%Y-%m-%d").to_string();
    assert!(text.starts_with("<turn_meta>\n"));
    assert!(text.contains(&format!("Current local date: {today}")));
    assert!(text.contains("Current model: deepseek-v4-flash"));
}

#[test]
fn turn_metadata_includes_auto_model_route() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());

    let user_msg = engine.user_text_message_with_turn_metadata_for_route(
        "debug this regression".to_string(),
        AppMode::Agent,
        "deepseek-v4-pro",
        true,
        Some("max"),
        true,
    );
    let last_block = user_msg.content.last().expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };

    assert!(text.contains("Current model: deepseek-v4-pro"));
    assert!(text.contains("Auto model route: deepseek-v4-pro"));
    assert!(text.contains("Auto reasoning effort: max"));
    assert!(!text.contains("debug this regression"));
}

#[test]
fn turn_metadata_includes_current_mode() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());

    let user_msg = engine.user_text_message_with_turn_metadata_for_route(
        "test mode metadata".to_string(),
        AppMode::Yolo,
        "deepseek-v4-flash",
        false,
        None,
        false,
    );
    // turn_meta was relocated to the tail of the user message in #2517
    // to keep the leading bytes (user input) stable across date / model
    // route / working-set changes.
    let last_block = user_msg.content.last().expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };

    assert!(
        text.contains("Current mode: YOLO mode - full tool access without approvals"),
        "turn metadata should include the current mode label, got: {text}"
    );
}

#[test]
fn turn_metadata_mode_updates_with_change_mode_op() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());

    // In agent mode by default. The turn_meta block now sits at the
    // *tail* of the user message (see #2517) so we read `content.last()`.
    let msg = engine.user_text_message_with_turn_metadata("hello".to_string());
    let last_block = msg.content.last().expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };
    assert!(
        text.contains("Agent mode"),
        "initial mode should be Agent, got: {text}"
    );

    // Switch to YOLO — user_text_message_with_turn_metadata should reflect the new mode
    engine.current_mode = AppMode::Yolo;
    let msg = engine.user_text_message_with_turn_metadata("hello again".to_string());
    let last_block = msg.content.last().expect("turn metadata block");
    let ContentBlock::Text { text, .. } = last_block else {
        panic!("expected text metadata block");
    };
    assert!(
        text.contains("YOLO mode"),
        "mode after change should be YOLO, got: {text}"
    );
}

#[test]
fn current_mode_field_assignment_takes_effect_synchronously() {
    // Basic unit-level invariant: the current_mode field mutates as expected
    // and the per-turn <runtime_prompt> tag reflects the current mode.
    // Op::ChangeMode dispatch through the run loop is exercised by the
    // integration test change_mode_op_updates_current_mode_and_emits_status.
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        model: "deepseek-v4-pro".to_string(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    assert_eq!(engine.current_mode, AppMode::Agent);

    // Verify runtime tag in Agent mode
    let agent_messages = engine.messages_with_turn_metadata();
    let agent_tag = agent_messages.last().expect("runtime tag message");
    let ContentBlock::Text {
        text: agent_text, ..
    } = agent_tag.content.first().expect("text block")
    else {
        panic!("expected text runtime tag in Agent mode");
    };
    assert!(
        agent_text.contains("mode=\"agent\""),
        "Agent mode should produce runtime tag with mode=\"agent\", got: {agent_text}"
    );

    // Switch to YOLO
    engine.current_mode = AppMode::Yolo;
    assert_eq!(engine.current_mode, AppMode::Yolo);

    // Verify runtime tag reflects the YOLO mode with auto approval
    let yolo_messages = engine.messages_with_turn_metadata();
    let yolo_tag = yolo_messages.last().expect("runtime tag message");
    let ContentBlock::Text {
        text: yolo_text, ..
    } = yolo_tag.content.first().expect("text block")
    else {
        panic!("expected text runtime tag in YOLO mode");
    };
    assert!(
        yolo_text.contains("mode=\"yolo\""),
        "YOLO mode should produce runtime tag with mode=\"yolo\", got: {yolo_text}"
    );
    assert!(
        yolo_text.contains("approval=\"auto\""),
        "YOLO mode should project auto approval in runtime tag, got: {yolo_text}"
    );
}

#[test]
fn user_text_message_keeps_current_turn_input_after_turn_metadata() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());

    let user_msg =
        engine.user_text_message_with_turn_metadata("explain the cache metrics".to_string());

    // User text is now at position 0, turn_meta at position 1.
    let first_text = user_msg
        .content
        .iter()
        .find_map(|block| {
            if let ContentBlock::Text { text, .. } = block {
                Some(text.as_str())
            } else {
                None
            }
        })
        .expect("user text block");
    assert_eq!(first_text, "explain the cache metrics");
}

#[test]
fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("inspect src/lib.rs", tmp.path());

    let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
    engine.session.add_message(first_user.clone());
    let first_request = engine.messages_with_turn_metadata();
    assert_eq!(
        &first_request[..engine.session.messages.len()],
        &engine.session.messages[..]
    );
    assert_eq!(first_request.len(), engine.session.messages.len() + 1);
    assert_eq!(first_request.first(), Some(&first_user));
    assert_eq!(
        first_request.last().map(|message| message.role.as_str()),
        Some("user")
    );

    engine.session.add_message(Message {
        role: "assistant".to_string(),
        content: vec![ContentBlock::Text {
            text: "I inspected it.".to_string(),
            cache_control: None,
        }],
    });
    engine
        .session
        .working_set
        .observe_user_message("now summarize it", tmp.path());
    let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string());
    engine.session.add_message(second_user);

    let second_request = engine.messages_with_turn_metadata();
    assert_eq!(
        &second_request[..engine.session.messages.len()],
        &engine.session.messages[..]
    );
    assert_eq!(second_request.len(), engine.session.messages.len() + 1);
    assert_eq!(second_request.first(), Some(&first_user));
    let runtime = second_request.last().expect("runtime prompt");
    let ContentBlock::Text { text, .. } = runtime.content.first().expect("runtime prompt text")
    else {
        panic!("expected runtime prompt text");
    };
    assert!(text.contains("<runtime_prompt"));
}

/// v0.8.11 regression: tool-result messages serialize to role="tool" on
/// the wire but are stored as role="user" internally. `<turn_meta>` must
/// be stored only on actual user-text messages. Request-time runtime metadata
/// is appended separately and must not mutate tool-result messages.
#[test]
fn turn_metadata_skips_tool_result_messages() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("inspect src/lib.rs", tmp.path());

    // Real user message — should be eligible for injection.
    let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
    engine.session.add_message(user_msg);
    // Assistant tool-call.
    engine.session.add_message(Message {
        role: "assistant".to_string(),
        content: vec![ContentBlock::ToolUse {
            id: "call_42".to_string(),
            name: "read_file".to_string(),
            input: serde_json::json!({"path": "src/lib.rs"}),
            caller: None,
        }],
    });
    // Tool result, stored as role="user" internally.
    engine.session.add_message(Message {
        role: "user".to_string(),
        content: vec![ContentBlock::ToolResult {
            tool_use_id: "call_42".to_string(),
            content: "pub fn sample() {}".to_string(),
            is_error: None,
            content_blocks: None,
        }],
    });

    let messages = engine.messages_with_turn_metadata();

    // The stored trailing message is the tool result and MUST be untouched —
    // no Text block sneaking in front of the ToolResult block.
    let trailing = messages
        .get(messages.len().saturating_sub(2))
        .expect("stored trailing message");
    assert_eq!(trailing.role, "user");
    assert_eq!(trailing.content.len(), 1);
    assert!(matches!(
        trailing.content.first(),
        Some(ContentBlock::ToolResult { .. })
    ));

    // The earlier real user message carries user text first, turn_meta last.
    let real_user = messages.first().expect("first user message");
    assert_eq!(real_user.role, "user");
    let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
    else {
        panic!("expected Text block on real user message");
    };
    assert_eq!(text, "inspect src/lib.rs");
    // turn_meta is at the tail of the content array.
    let last_block = real_user.content.last().expect("turn_meta block");
    let ContentBlock::Text { text: meta, .. } = last_block else {
        panic!("expected Text block for turn_meta at tail");
    };
    assert!(meta.starts_with("<turn_meta>\n"));
    assert!(meta.contains("src/lib.rs"));
    assert!(
        matches!(
            messages.last().and_then(|message| message.content.first()),
            Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
        ),
        "request projection should append transient runtime metadata"
    );
}

/// User text must appear before turn_meta in the content array so that
/// the leading bytes of each user message stay stable across date changes.
/// DeepSeek's KV prefix cache matches byte sequences from the start of
/// each message; placing the volatile date-bearing turn_meta at position
/// 0 would invalidate the entire user message prefix at every date
/// boundary. Moving it to the tail preserves the user-input prefix.
#[test]
fn user_message_turn_meta_is_appended_not_prepended() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());

    let msg = engine.user_text_message_with_turn_metadata("hello world".to_string());
    assert_eq!(msg.role, "user");
    assert_eq!(msg.content.len(), 2);

    // First content block: user text.
    let ContentBlock::Text { text, .. } = &msg.content[0] else {
        panic!("expected Text block at position 0");
    };
    assert_eq!(text, "hello world");

    // Second content block: turn_meta.
    let ContentBlock::Text { text: meta, .. } = &msg.content[1] else {
        panic!("expected Text block for turn_meta at position 1");
    };
    assert!(
        meta.starts_with("<turn_meta>\n"),
        "turn_meta must be at the tail"
    );
    assert!(
        meta.contains("Current local date:"),
        "turn_meta must contain the date"
    );
}

/// When the turn is mid-execution and the trailing user message is a
/// tool result, no turn_meta is injected into that tool-result message. The
/// working_set surfaces again on the next stored user-text message.
#[test]
fn turn_metadata_skips_when_only_tool_results_trail() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("inspect src/lib.rs", tmp.path());

    // Only a tool-result message in history — simulates the corner case
    // where the prior real user message has already been compacted away
    // but a tool-result is still pending. We must not retroactively
    // inject.
    engine.session.add_message(Message {
        role: "user".to_string(),
        content: vec![ContentBlock::ToolResult {
            tool_use_id: "call_42".to_string(),
            content: "pub fn sample() {}".to_string(),
            is_error: None,
            content_blocks: None,
        }],
    });

    let messages = engine.messages_with_turn_metadata();

    // Stored tool-result message is unchanged: no Text prefix, content length == 1.
    let only = messages.first().expect("stored tool result message");
    assert_eq!(only.content.len(), 1);
    assert!(matches!(
        only.content.first(),
        Some(ContentBlock::ToolResult { .. })
    ));
    assert_eq!(messages.len(), 2);
    assert!(
        matches!(
            messages.last().and_then(|message| message.content.first()),
            Some(ContentBlock::Text { text, .. }) if text.contains("<runtime_prompt")
        ),
        "request projection should still append transient runtime metadata"
    );
}

#[test]
fn refresh_system_prompt_is_noop_when_unchanged() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());

    engine.refresh_system_prompt();
    let first_hash = engine.session.last_system_prompt_hash;
    let first_prompt = engine.session.system_prompt.clone();
    engine.refresh_system_prompt();

    assert_eq!(engine.session.last_system_prompt_hash, first_hash);
    assert_eq!(engine.session.system_prompt, first_prompt);
}

#[test]
fn engine_prompt_respects_hidden_thinking_config() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        locale_tag: "zh-Hans".to_string(),
        show_thinking: false,
        ..Default::default()
    };
    let (engine, _handle) = Engine::new(config, &Config::default());
    let prompt = match engine.session.system_prompt.as_ref() {
        Some(SystemPrompt::Text(text)) => text,
        Some(SystemPrompt::Blocks(_)) => panic!("expected text system prompt"),
        None => panic!("expected system prompt"),
    };

    assert!(prompt.contains("## Hidden Thinking Language"));
    assert!(prompt.contains("reasoning_content"));
    assert!(prompt.contains("English"));
    assert!(!prompt.contains("## 语言再次提醒"));
}

fn sync_runtime_system_prompt_override(engine: &mut Engine, system_prompt: SystemPrompt) {
    engine.session.compaction_summary_prompt =
        extract_compaction_summary_prompt(Some(system_prompt.clone()));
    engine.session.system_prompt = Some(system_prompt);
    engine.session.system_prompt_override = true;
}

#[test]
fn text_system_prompt_override_via_runtime_sync_survives_refresh() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    let prompt = SystemPrompt::Text("TANGERINE-7".to_string());
    let expected = Some(prompt.clone());

    sync_runtime_system_prompt_override(&mut engine, prompt);
    engine.refresh_system_prompt();

    assert_eq!(engine.session.system_prompt, expected);
}

#[test]
fn blocks_system_prompt_override_via_runtime_sync_survives_mode_change_refresh() {
    let tmp = tempdir().expect("tempdir");
    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    let prompt = SystemPrompt::Blocks(vec![SystemBlock {
        block_type: "text".to_string(),
        text: "TANGERINE-7".to_string(),
        cache_control: None,
    }]);
    let expected = Some(prompt.clone());

    sync_runtime_system_prompt_override(&mut engine, prompt);
    engine.refresh_system_prompt();

    assert_eq!(engine.session.system_prompt, expected);
}

#[test]
fn compaction_summary_stays_in_stable_system_prompt() {
    let tmp = tempdir().expect("tempdir");
    fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
    fs::write(tmp.path().join("src/main.rs"), "fn main() {}").expect("write");

    let config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(config, &Config::default());
    engine
        .session
        .working_set
        .observe_user_message("continue in src/main.rs", tmp.path());
    engine.refresh_system_prompt();
    engine.merge_compaction_summary(Some(SystemPrompt::Blocks(vec![SystemBlock {
        block_type: "text".to_string(),
        text: format!("{COMPACTION_SUMMARY_MARKER}\nsummary"),
        cache_control: None,
    }])));

    let prompt = match &engine.session.system_prompt {
        Some(SystemPrompt::Text(text)) => text.clone(),
        Some(SystemPrompt::Blocks(blocks)) => blocks
            .iter()
            .map(|block| block.text.as_str())
            .collect::<Vec<_>>()
            .join("\n"),
        None => panic!("expected system prompt"),
    };

    assert!(prompt.contains(COMPACTION_SUMMARY_MARKER));
    assert!(!prompt.contains(WORKING_SET_SUMMARY_MARKER));
}

#[tokio::test]
async fn pre_request_refresh_skips_compaction_below_normal_threshold() {
    let capacity = CapacityControllerConfig {
        enabled: true,
        low_risk_max: 0.0,
        medium_risk_max: 1.0,
        min_turns_before_guardrail: 0,
        ..Default::default()
    };

    let mut engine = build_engine_with_capacity(capacity.clone());
    engine.config.capacity = capacity.clone();
    engine.capacity_controller = CapacityController::new(capacity);
    engine.turn_counter = 5;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);
    engine.session.model = "deepseek-v4-pro".to_string();
    engine.config.model = "deepseek-v4-pro".to_string();

    for i in 0..20 {
        engine.session.messages.push(Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
                text: format!("small message {i}"),
                cache_control: None,
            }],
        });
    }

    let before = engine.estimated_input_tokens();
    let before_len = engine.session.messages.len();
    let turn = TurnContext::new(10);
    let applied = engine
        .run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
        .await;
    let after = engine.estimated_input_tokens();

    assert!(!applied);
    assert_eq!(after, before);
    assert_eq!(engine.session.messages.len(), before_len);
}

#[tokio::test]
async fn pre_request_refresh_invoked_when_medium_risk() {
    let capacity = CapacityControllerConfig {
        enabled: true,
        low_risk_max: 0.0,
        medium_risk_max: 1.0,
        min_turns_before_guardrail: 0,
        ..Default::default()
    };

    let mut engine = build_engine_with_capacity(capacity.clone());
    engine.config.capacity = capacity.clone();
    engine.capacity_controller = CapacityController::new(capacity);
    engine.turn_counter = 5;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);

    // Pin the model to an explicit 128k-context variant so the pressure ratio stays
    // stable regardless of changes to the workspace-wide default model.
    engine.session.model = "deepseek-v3.2-128k".to_string();
    engine.config.model = "deepseek-v3.2-128k".to_string();

    let long = "x".repeat(5_000);
    for _ in 0..900 {
        engine.session.messages.push(Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
                text: long.clone(),
                cache_control: None,
            }],
        });
    }

    let before = engine.estimated_input_tokens();
    let turn = TurnContext::new(10);
    let applied = engine
        .run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
        .await;
    let after = engine.estimated_input_tokens();

    assert!(applied);
    assert!(after < before);
}

#[tokio::test]
async fn post_tool_replay_invoked_when_high_non_severe_risk() {
    let tmp = tempdir().expect("tempdir");
    fs::write(tmp.path().join("sample.txt"), "hello replay").expect("write");

    let capacity = CapacityControllerConfig {
        enabled: true,
        low_risk_max: 0.0,
        medium_risk_max: 0.0,
        severe_min_slack: -10.0,
        severe_violation_ratio: 2.0,
        min_turns_before_guardrail: 0,
        ..Default::default()
    };

    let mut engine = build_engine_with_capacity(capacity.clone());
    engine.session.workspace = tmp.path().to_path_buf();
    engine.config.workspace = tmp.path().to_path_buf();
    engine.config.capacity = capacity.clone();
    engine.capacity_controller = CapacityController::new(capacity);
    engine.turn_counter = 4;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);

    let mut turn = TurnContext::new(10);
    let mut tool_call = TurnToolCall::new(
        "tool_read_1".to_string(),
        "read_file".to_string(),
        json!({ "path": "sample.txt" }),
    );
    tool_call.set_result(
        "hello replay".to_string(),
        std::time::Duration::from_millis(1),
    );
    turn.record_tool_call(tool_call);

    let registry = ToolRegistryBuilder::new()
        .with_read_only_file_tools()
        .build(engine.build_tool_context(AppMode::Agent, false));

    let restarted = engine
        .run_capacity_post_tool_checkpoint(
            &turn,
            Some(&registry),
            Arc::new(RwLock::new(())),
            None,
            0,
            0,
        )
        .await;

    assert!(!restarted);
    let has_verification_note = engine.session.messages.iter().any(|msg| {
        msg.content.iter().any(|block| match block {
            ContentBlock::ToolResult { content, .. } => content.contains("[verification replay]"),
            _ => false,
        })
    });
    assert!(has_verification_note);
}

#[tokio::test]
async fn error_escalation_triggers_replan_when_severe_or_repeated_failures() {
    let _env_lock = CAPACITY_MEMORY_ENV_LOCK.lock().await;
    let tmp = tempdir().expect("tempdir");
    let _env = ScopedCapacityMemoryDir::set(tmp.path());

    let capacity = CapacityControllerConfig {
        enabled: true,
        low_risk_max: 0.0,
        medium_risk_max: 0.0,
        min_turns_before_guardrail: 0,
        ..Default::default()
    };

    let mut engine = build_engine_with_capacity(capacity.clone());
    engine.config.capacity = capacity.clone();
    engine.capacity_controller = CapacityController::new(capacity);
    engine.turn_counter = 6;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);

    for i in 0..10 {
        engine.session.messages.push(Message {
            role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(),
            content: vec![ContentBlock::Text {
                text: format!("noise message {i}"),
                cache_control: None,
            }],
        });
    }
    engine.session.messages.push(Message {
        role: "user".to_string(),
        content: vec![ContentBlock::Text {
            text: "Please finish task".to_string(),
            cache_control: None,
        }],
    });

    let before_len = engine.session.messages.len();
    let turn = TurnContext::new(10);
    let restarted = engine
        .run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
        .await;

    assert!(restarted);
    assert!(engine.session.messages.len() < before_len);
    assert!(engine.session.messages.len() <= 2);

    let records = load_last_k_capacity_records(&engine.session.id, 1).expect("load memory");
    assert!(!records.is_empty());
    assert!(!records[0].canonical_state.goal.is_empty());
}

/// v0.8.11: `CapacityControllerConfig::default()` ships with
/// `enabled = false`. The capacity controller's destructive
/// interventions (TargetedContextRefresh silently runs compaction;
/// VerifyAndReplan clears the session message log) silently rewrote
/// or nuked the user's transcript ("resetting plan" footer +
/// black-screen symptom). v0.8.11 commits to "trust the model with
/// the full 1M-token context, only compact on explicit user
/// /compact" — auto-managing the prefix contradicts that posture.
/// Power users can still opt in via `capacity.enabled = true`.
#[tokio::test]
async fn capacity_disabled_by_default_keeps_messages_intact() {
    let _env_lock = CAPACITY_MEMORY_ENV_LOCK.lock().await;
    let tmp = tempdir().expect("tempdir");
    let _env = ScopedCapacityMemoryDir::set(tmp.path());

    // Default config — what real users get.
    let mut engine = build_engine_with_capacity(CapacityControllerConfig::default());
    assert!(
        !engine.config.capacity.enabled,
        "capacity controller must be off by default in v0.8.11+"
    );
    engine.turn_counter = 6;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);

    for i in 0..10 {
        engine.session.messages.push(Message {
            role: if i % 2 == 0 { "user" } else { "assistant" }.to_string(),
            content: vec![ContentBlock::Text {
                text: format!("noise message {i}"),
                cache_control: None,
            }],
        });
    }
    engine.session.messages.push(Message {
        role: "user".to_string(),
        content: vec![ContentBlock::Text {
            text: "Please finish task".to_string(),
            cache_control: None,
        }],
    });

    let before_len = engine.session.messages.len();
    let turn = TurnContext::new(10);
    let restarted = engine
        .run_capacity_error_escalation_checkpoint(&turn, 2, 2, &[])
        .await;

    // Capacity is disabled → no replan, no message clear.
    assert!(!restarted);
    assert_eq!(engine.session.messages.len(), before_len);
}

#[tokio::test]
async fn controller_disabled_keeps_behavior_unchanged() {
    let capacity = CapacityControllerConfig {
        enabled: false,
        ..Default::default()
    };

    let mut engine = build_engine_with_capacity(capacity.clone());
    engine.config.capacity = capacity.clone();
    engine.capacity_controller = CapacityController::new(capacity);
    engine.turn_counter = 3;
    engine
        .capacity_controller
        .mark_turn_start(engine.turn_counter);

    let long = "y".repeat(5_000);
    for _ in 0..120 {
        engine.session.messages.push(Message {
            role: "user".to_string(),
            content: vec![ContentBlock::Text {
                text: long.clone(),
                cache_control: None,
            }],
        });
    }

    let before = engine.estimated_input_tokens();
    let before_len = engine.session.messages.len();
    let turn = TurnContext::new(10);
    let applied = engine
        .run_capacity_pre_request_checkpoint(&turn, None, AppMode::Agent)
        .await;
    let after = engine.estimated_input_tokens();
    let after_len = engine.session.messages.len();

    assert!(!applied);
    assert_eq!(before, after);
    assert_eq!(before_len, after_len);
}

#[test]
fn caller_policy_defaults_to_direct() {
    let tool = Tool {
        tool_type: None,
        name: "read_file".to_string(),
        description: "Read".to_string(),
        input_schema: json!({"type":"object"}),
        allowed_callers: Some(vec!["direct".to_string()]),
        defer_loading: Some(false),
        input_examples: None,
        strict: None,
        cache_control: None,
    };
    let direct = ToolCaller {
        caller_type: "direct".to_string(),
        tool_id: None,
    };
    let code = ToolCaller {
        caller_type: "code_execution_20250825".to_string(),
        tool_id: Some("srvtoolu_1".to_string()),
    };
    assert!(caller_allowed_for_tool(Some(&direct), Some(&tool)));
    assert!(!caller_allowed_for_tool(Some(&code), Some(&tool)));
    assert!(caller_allowed_for_tool(None, Some(&tool)));
}

#[test]
fn tool_search_activates_discovered_deferred_tools() {
    let mut catalog = vec![
        Tool {
            tool_type: None,
            name: "read_file".to_string(),
            description: "Read files".to_string(),
            input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(true),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
        Tool {
            tool_type: None,
            name: "grep_files".to_string(),
            description: "Search files".to_string(),
            input_schema: json!({"type":"object","properties":{"pattern":{"type":"string"}}}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(true),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
    ];
    let always_load = HashSet::new();
    ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
    let mut active = initial_active_tools(&catalog);
    let result = execute_tool_search(
        TOOL_SEARCH_BM25_NAME,
        &json!({"query":"read file"}),
        &catalog,
        &mut active,
    )
    .expect("search succeeds");
    assert!(result.success);
    assert!(active.contains("read_file"));
}

fn tool_search_catalog_with_matches(count: usize) -> Vec<Tool> {
    let mut catalog = (0..count)
        .map(|idx| Tool {
            tool_type: None,
            name: format!("matching_tool_{idx:03}"),
            description: "Matching deferred test tool".to_string(),
            input_schema: json!({"type":"object","properties":{"query":{"type":"string"}}}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(true),
            input_examples: None,
            strict: None,
            cache_control: None,
        })
        .collect::<Vec<_>>();
    let always_load = HashSet::new();
    ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);
    catalog
}

fn tool_search_reference_count(result: &ToolResult) -> usize {
    result
        .metadata
        .as_ref()
        .and_then(|metadata| metadata.get("tool_references"))
        .and_then(|references| references.as_array())
        .map_or(0, Vec::len)
}

#[test]
fn tool_search_defaults_to_twenty_results_for_regex_and_bm25() {
    let catalog = tool_search_catalog_with_matches(25);

    for tool_name in [TOOL_SEARCH_REGEX_NAME, TOOL_SEARCH_BM25_NAME] {
        let mut active = initial_active_tools(&catalog);
        let result = execute_tool_search(
            tool_name,
            &json!({"query":"matching"}),
            &catalog,
            &mut active,
        )
        .expect("search succeeds");

        assert_eq!(tool_search_reference_count(&result), 20);
    }
}

#[test]
fn tool_search_respects_and_caps_max_results() {
    let catalog = tool_search_catalog_with_matches(120);

    let mut active = initial_active_tools(&catalog);
    let limited = execute_tool_search(
        TOOL_SEARCH_BM25_NAME,
        &json!({"query":"matching","max_results":7}),
        &catalog,
        &mut active,
    )
    .expect("search succeeds");
    assert_eq!(tool_search_reference_count(&limited), 7);

    let mut active = initial_active_tools(&catalog);
    let capped = execute_tool_search(
        TOOL_SEARCH_REGEX_NAME,
        &json!({"query":"matching","max_results":999}),
        &catalog,
        &mut active,
    )
    .expect("search succeeds");
    assert_eq!(tool_search_reference_count(&capped), 100);
}

#[test]
fn tool_search_schema_exposes_max_results_default_and_cap() {
    let mut catalog = Vec::new();
    let always_load = HashSet::new();
    ensure_advanced_tooling(&mut catalog, AppMode::Agent, &always_load);

    for tool_name in [TOOL_SEARCH_REGEX_NAME, TOOL_SEARCH_BM25_NAME] {
        let tool = catalog
            .iter()
            .find(|tool| tool.name == tool_name)
            .expect("tool search definition exists");
        let schema = &tool.input_schema["properties"]["max_results"];

        assert_eq!(schema["default"], 20);
        assert_eq!(schema["maximum"], 100);
        assert_eq!(schema["minimum"], 1);
    }
}

#[tokio::test]
async fn code_execution_runs_python_and_returns_result_payload() {
    let tmp = tempdir().expect("tempdir");
    let result =
        execute_code_execution_tool(&json!({"code":"print('hello from code exec')"}), tmp.path())
            .await
            .expect("code execution should run");
    assert!(result.content.contains("hello from code exec"));
    assert!(result.content.contains("return_code"));
}

#[test]
fn plan_mode_catalog_skips_code_execution_tool_but_agent_keeps_it() {
    let mut plan_catalog = vec![api_tool("read_file")];
    let always_load = HashSet::new();
    ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan, &always_load);
    assert!(
        !plan_catalog
            .iter()
            .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
        "Plan mode must not expose code_execution"
    );

    let mut agent_catalog = vec![api_tool("read_file")];
    ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent, &always_load);
    assert!(
        agent_catalog
            .iter()
            .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
        "Agent mode should still expose code_execution"
    );
}

#[test]
fn deferred_tool_requests_are_auto_activated() {
    use std::collections::HashSet;

    let catalog = vec![Tool {
        tool_type: None,
        name: "exec_shell".to_string(),
        description: "Run shell commands".to_string(),
        input_schema: json!({"type":"object","properties":{"cmd":{"type":"string"}}}),
        allowed_callers: Some(vec!["direct".to_string()]),
        defer_loading: Some(true),
        input_examples: None,
        strict: None,
        cache_control: None,
    }];

    let mut active = HashSet::new();
    assert!(!active.contains("exec_shell"));
    assert!(maybe_activate_requested_deferred_tool(
        "exec_shell",
        &catalog,
        &mut active
    ));
    assert!(active.contains("exec_shell"));
}

#[test]
fn missing_tool_error_message_offers_suggestions() {
    let catalog = vec![
        Tool {
            tool_type: None,
            name: "read_file".to_string(),
            description: "Read file contents".to_string(),
            input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(false),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
        Tool {
            tool_type: None,
            name: "grep_files".to_string(),
            description: "Search file contents".to_string(),
            input_schema: json!({"type":"object","properties":{"pattern":{"type":"string"}}}),
            allowed_callers: Some(vec!["direct".to_string()]),
            defer_loading: Some(false),
            input_examples: None,
            strict: None,
            cache_control: None,
        },
    ];

    let message = missing_tool_error_message("reed_file", &catalog);
    assert!(message.contains("Did you mean:"));
    assert!(message.contains("read_file"));
    assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}

#[test]
fn missing_tool_error_message_includes_discovery_guidance_when_no_match() {
    let catalog = vec![Tool {
        tool_type: None,
        name: "read_file".to_string(),
        description: "Read file contents".to_string(),
        input_schema: json!({"type":"object","properties":{"path":{"type":"string"}}}),
        allowed_callers: Some(vec!["direct".to_string()]),
        defer_loading: Some(false),
        input_examples: None,
        strict: None,
        cache_control: None,
    }];

    let message = missing_tool_error_message("totally_unknown_tool", &catalog);
    assert!(message.contains("not available in the current tool catalog"));
    assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}

#[test]
fn missing_shell_tool_error_message_names_allow_shell_gate() {
    let catalog = vec![api_tool("read_file")];

    for tool_name in [
        "exec_shell",
        "exec_shell_wait",
        "exec_shell_interact",
        "task_shell_start",
        "task_shell_wait",
    ] {
        let message = missing_tool_error_message(tool_name, &catalog);
        assert!(message.contains("not available in the current tool catalog"));
        assert!(
            message.contains("allow_shell = false"),
            "{tool_name}: {message}"
        );
        assert!(message.contains("allow_shell"), "{tool_name}: {message}");
        assert!(
            message.contains("/config allow_shell true"),
            "{tool_name}: {message}"
        );
        assert!(message.contains("--save"), "{tool_name}: {message}");
        assert!(message.contains("Agent mode"), "{tool_name}: {message}");
        assert!(
            message.contains("approval gating"),
            "{tool_name}: {message}"
        );
        assert!(!message.contains("YOLO"), "{tool_name}: {message}");
        assert!(!message.contains("auto-approve"), "{tool_name}: {message}");
        assert!(
            message.contains(TOOL_SEARCH_BM25_NAME),
            "{tool_name}: {message}"
        );
    }
}

#[test]
fn missing_shell_tool_error_message_keeps_allow_shell_hint_with_suggestions() {
    let catalog = vec![api_tool("exec")];

    let message = missing_tool_error_message("exec_shell", &catalog);

    assert!(message.contains("Did you mean:"));
    assert!(message.contains("exec"));
    assert!(message.contains("allow_shell = false"));
    assert!(message.contains("allow_shell"));
    assert!(message.contains("/config allow_shell true"));
    assert!(message.contains("--save"));
    assert!(message.contains("Agent mode"));
    assert!(!message.contains("YOLO"));
    assert!(!message.contains("auto-approve"));
    assert!(message.contains(TOOL_SEARCH_BM25_NAME));
}

#[test]
fn filter_tool_call_delta_strips_bracket_marker() {
    let mut in_block = false;
    let visible = filter_tool_call_delta(
        "intro [TOOL_CALL]\n{\"tool\":\"x\"}\n[/TOOL_CALL] outro",
        &mut in_block,
    );
    assert!(!in_block);
    assert!(!visible.contains("[TOOL_CALL]"));
    assert!(!visible.contains("[/TOOL_CALL]"));
    assert!(!visible.contains("\"tool\":\"x\""));
    assert!(visible.contains("intro"));
    assert!(visible.contains("outro"));
}

#[test]
fn filter_tool_call_delta_strips_deepseek_xml_marker() {
    let mut in_block = false;
    let visible = filter_tool_call_delta(
        "before <codewhale:tool_call name=\"x\">payload</codewhale:tool_call> after",
        &mut in_block,
    );
    assert!(!in_block);
    for marker in TOOL_CALL_START_MARKERS {
        assert!(
            !visible.contains(marker),
            "visible text leaked start marker `{marker}`: {visible:?}"
        );
    }
    assert!(visible.contains("before"));
    assert!(visible.contains("after"));
}

#[test]
fn filter_tool_call_delta_strips_generic_tool_call_marker() {
    let mut in_block = false;
    let visible = filter_tool_call_delta(
        "lead <tool_call>\n{\"name\":\"do\"}\n</tool_call> tail",
        &mut in_block,
    );
    assert!(!in_block);
    assert!(!visible.contains("<tool_call"));
    assert!(!visible.contains("</tool_call>"));
    assert!(visible.contains("lead"));
    assert!(visible.contains("tail"));
}

#[test]
fn filter_tool_call_delta_strips_invoke_marker() {
    let mut in_block = false;
    let visible = filter_tool_call_delta(
        "alpha <invoke name=\"x\"><parameter name=\"k\">v</parameter></invoke> beta",
        &mut in_block,
    );
    assert!(!in_block);
    assert!(!visible.contains("<invoke "));
    assert!(!visible.contains("</invoke>"));
    assert!(visible.contains("alpha"));
    assert!(visible.contains("beta"));
}

#[test]
fn filter_tool_call_delta_strips_function_calls_marker() {
    let mut in_block = false;
    let visible = filter_tool_call_delta(
        "head <function_calls>\n{\"name\":\"x\"}\n</function_calls> tail",
        &mut in_block,
    );
    assert!(!in_block);
    assert!(!visible.contains("<function_calls>"));
    assert!(!visible.contains("</function_calls>"));
    assert!(visible.contains("head"));
    assert!(visible.contains("tail"));
}

#[test]
fn filter_tool_call_delta_handles_chunk_split_marker() {
    let mut in_block = false;
    // First chunk opens the wrapper but does not close it.
    let visible_a = filter_tool_call_delta("hello <tool_call>partial", &mut in_block);
    assert!(in_block, "filter must remember it is mid-wrapper");
    assert_eq!(visible_a, "hello ");

    // Second chunk continues inside the wrapper, then closes it and adds tail.
    let visible_b = filter_tool_call_delta("payload</tool_call> tail", &mut in_block);
    assert!(!in_block);
    assert_eq!(visible_b, " tail");
}

#[test]
fn filter_tool_call_delta_unmatched_open_suppresses_remainder() {
    let mut in_block = false;
    let visible = filter_tool_call_delta("ok [TOOL_CALL]rest of stream", &mut in_block);
    assert_eq!(visible, "ok ");
    assert!(
        in_block,
        "unmatched open must leave filter in tool-call mode"
    );
}

#[test]
fn filter_tool_call_delta_passes_through_clean_text() {
    let mut in_block = false;
    let input = "no markers here, just prose with code `<not a tag>`.";
    let visible = filter_tool_call_delta(input, &mut in_block);
    assert!(!in_block);
    assert_eq!(visible, input);
}

#[test]
fn contains_fake_tool_wrapper_detects_each_marker() {
    for marker in TOOL_CALL_START_MARKERS {
        let needle = format!("noise {marker} more noise");
        assert!(
            contains_fake_tool_wrapper(&needle),
            "marker `{marker}` should be detected"
        );
    }
}

#[test]
fn contains_fake_tool_wrapper_returns_false_on_clean_text() {
    assert!(!contains_fake_tool_wrapper(
        "plain assistant text without wrappers"
    ));
    assert!(!contains_fake_tool_wrapper(
        "`<tool` lookalike but not a real start marker"
    ));
}

#[test]
fn fake_wrapper_notice_is_compact_and_actionable() {
    // Keep this short so it fits cleanly in a single status line.
    assert!(FAKE_WRAPPER_NOTICE.len() < 120);
    assert!(FAKE_WRAPPER_NOTICE.contains("API tool channel"));
}

// ---- final_tool_input: bug-class regression for "<command>" placeholder ----
//
// Background: a streamed tool block carries its `input` in two pieces — an
// initial value at `ContentBlockStart` (often `{}`), then `InputJsonDelta`
// chunks that build up `input_buffer`. The TUI used to fire `ToolCallStarted`
// from `ContentBlockStart` with the empty initial input and never re-emit
// once args were known, so cells rendered the literal text `<command>` /
// `<file>` placeholders. The fix relocates the emission to `ContentBlockStop`
// and routes the input through `final_tool_input`, which prefers the parsed
// buffer over a stale empty placeholder.
fn tool_state(initial: serde_json::Value, buffer: &str) -> ToolUseState {
    ToolUseState {
        id: "t1".into(),
        name: "exec_shell".into(),
        input: initial,
        caller: None,
        input_buffer: buffer.into(),
    }
}

#[test]
fn final_tool_input_prefers_parsed_buffer_over_empty_initial() {
    // The exact regression: ContentBlockStart delivered `{}`, then args
    // streamed in via InputJsonDelta. The emitted ToolCallStarted must
    // carry the parsed buffer, not the placeholder.
    let state = tool_state(json!({}), r#"{"command": "ls -la"}"#);
    assert_eq!(final_tool_input(&state), json!({"command": "ls -la"}));
}

#[test]
fn final_tool_input_falls_back_to_initial_when_buffer_empty() {
    // Models occasionally embed args directly in the start frame and never
    // send any InputJsonDelta. We must still report those args.
    let state = tool_state(json!({"command": "echo hi"}), "");
    assert_eq!(final_tool_input(&state), json!({"command": "echo hi"}));
}

#[test]
fn final_tool_input_repairs_unparseable_buffer() {
    // The arg_repair module converts unparseable input to an empty object
    // {} so dispatch always proceeds. The buffer wins over the initial input.
    let state = tool_state(json!({"command": "echo hi"}), "{not json");
    assert_eq!(final_tool_input(&state), json!({}));
}

// === #103 transparent stream-retry policy =====================================

#[test]
fn stream_retry_zero_content_then_error_is_transparently_retried() {
    // Case 2 from issue #103: stream yielded ZERO content then errored.
    // The decoder hit Err on the very first poll → engine should retry
    // because DeepSeek hasn't billed and the user has seen nothing.
    assert!(
        super::should_transparently_retry_stream(false, 0, false),
        "first attempt with no content must be eligible for transparent retry"
    );
    assert!(
        super::should_transparently_retry_stream(false, 1, false),
        "second attempt (one prior retry) with no content must still be eligible"
    );
}

#[test]
fn stream_retry_after_content_received_surfaces_error() {
    // Case 3 from issue #103: stream yielded content then errored. We must
    // NOT transparently retry — the model has emitted billed output tokens
    // and the UI has streamed deltas; resending would double-bill and the
    // user would see the same prefix twice.
    assert!(
        !super::should_transparently_retry_stream(true, 0, false),
        "any content received → no transparent retry, even with full budget"
    );
    assert!(
        !super::should_transparently_retry_stream(true, 1, false),
        "any content received → no transparent retry on subsequent attempts"
    );
}

#[test]
fn stream_retry_budget_caps_transparent_retries_at_two() {
    // Case 4 from issue #103: after MAX_TRANSPARENT_STREAM_RETRIES attempts
    // we stop trying transparently and let the outer error path surface.
    // (The outer per-turn `stream_retry_attempts` retry is a separate layer
    // and is still in effect at the whole-turn level.)
    assert!(
        super::should_transparently_retry_stream(
            false,
            super::MAX_TRANSPARENT_STREAM_RETRIES - 1,
            false,
        ),
        "one short of the cap should still retry"
    );
    assert!(
        !super::should_transparently_retry_stream(
            false,
            super::MAX_TRANSPARENT_STREAM_RETRIES,
            false,
        ),
        "at the cap, no further transparent retries"
    );
    assert!(
        !super::should_transparently_retry_stream(
            false,
            super::MAX_TRANSPARENT_STREAM_RETRIES + 5,
            false,
        ),
        "well past the cap, definitely no transparent retries"
    );
}

#[test]
fn stream_retry_respects_cancellation() {
    // Cancellation overrides every other condition. If the user pressed
    // Esc / Ctrl-C, do not silently re-issue the request behind their back.
    assert!(
        !super::should_transparently_retry_stream(false, 0, true),
        "cancelled turn must not be transparently retried"
    );
    assert!(
        !super::should_transparently_retry_stream(false, 1, true),
        "cancelled turn must not be transparently retried even with budget"
    );
}

// === #2990 sleep-resume policy ================================================

#[test]
fn sleep_gap_requires_wallclock_to_outrun_monotonic_clock() {
    use std::time::Duration;
    // No divergence: ordinary network failure, clocks agree.
    assert!(
        !super::sleep_gap_detected(Duration::from_secs(30), Duration::from_secs(30)),
        "equal elapsed times must not register as a sleep gap"
    );
    // Divergence below the threshold: NTP slew / scheduling jitter.
    assert!(
        !super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(14)),
        "9s of divergence is below the 10s threshold"
    );
    // Divergence above the threshold: the host was suspended.
    assert!(
        super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(16)),
        "11s of divergence must register as a sleep gap"
    );
    // Wall clock went backwards (NTP step): saturating_sub → zero gap.
    assert!(
        !super::sleep_gap_detected(Duration::from_secs(60), Duration::from_secs(5)),
        "wall clock behind monotonic must never register as a sleep gap"
    );
}

#[test]
fn sleep_resume_retries_even_after_content_streamed() {
    // The whole point of #2990: unlike the #103 transparent retry, a
    // detected sleep gap retries regardless of streamed content — the
    // partial output predates the sleep and the user was not watching.
    assert!(
        super::should_resume_after_sleep(true, 0, false),
        "detected sleep with full budget must resume"
    );
    assert!(
        super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES - 1, false),
        "detected sleep one short of the budget must still resume"
    );
}

#[test]
fn sleep_resume_requires_a_detected_gap() {
    // Without a sleep gap this layer stays out of the way entirely, so the
    // deliberate no-retry-after-content policy for ordinary flakes (#103)
    // is preserved.
    assert!(
        !super::should_resume_after_sleep(false, 0, false),
        "no sleep gap → never resume via this layer"
    );
}

#[test]
fn sleep_resume_respects_budget_and_cancellation() {
    assert!(
        !super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES, false),
        "budget exhausted → surface the failure instead of looping"
    );
    assert!(
        !super::should_resume_after_sleep(true, 0, true),
        "cancelled turn must not be resumed behind the user's back"
    );
}

#[test]
fn stream_retry_threshold_relaxed_to_five() {
    // Case 1+4 from issue #103: the consecutive-error threshold for marking
    // the turn failed was relaxed from 3 → 5 in v0.6.7 because the new
    // HTTP/2 keepalive defaults make spurious decode errors rarer.
    // This test pins the constant so a future regression to 3 fails loudly.
    assert_eq!(
        super::MAX_STREAM_ERRORS_BEFORE_FAIL,
        5,
        "the consecutive-stream-error threshold should be 5; \
         lowering it back to 3 will fail mid-turn under transient flakiness"
    );
    // And a regression guard on the transparent-retry cap.
    assert_eq!(
        super::MAX_TRANSPARENT_STREAM_RETRIES,
        2,
        "transparent-retry cap should be 2; raising it risks hammering the \
         provider on real outages"
    );
}

// === Issue #66: error taxonomy wired through engine + audit + capacity ===

/// A failed-tool audit entry must carry the typed `category` and `severity`
/// fields derived from the underlying `ToolError`. This is what makes
/// downstream tooling able to bucket failures without scraping the message
/// string.
#[test]
fn tool_failure_audit_payload_carries_category_and_severity() {
    use crate::error_taxonomy::ErrorEnvelope;
    use crate::tools::spec::ToolError;

    let error = ToolError::Timeout { seconds: 30 };
    let envelope: ErrorEnvelope = error.clone().into();
    let payload = json!({
        "event": "tool.result",
        "tool_id": "tool-1",
        "tool_name": "exec_shell",
        "success": false,
        "error": error.to_string(),
        "category": envelope.category.to_string(),
        "severity": envelope.severity.to_string(),
    });

    assert_eq!(payload["category"], "timeout");
    assert_eq!(payload["severity"], "warning");
    assert_eq!(payload["success"], false);
}

/// Capacity escalation sees `ErrorCategory::InvalidInput` as a context-overflow
/// signal that must escalate even on the first failure (no consecutive
/// requirement). The previous string-matching path scanned the message for
/// "context length" — categories give us a typed contract instead.
#[test]
fn capacity_escalation_treats_invalid_input_as_overflow_signal() {
    use crate::error_taxonomy::ErrorCategory;

    // Replays the categorization branches inside
    // `run_capacity_error_escalation_checkpoint`. Keeping the assertions on
    // the typed surface (slice of `ErrorCategory`) means this test fails
    // loudly if a future refactor reverts to substring matching.
    let categories: &[ErrorCategory] = &[ErrorCategory::InvalidInput];
    let has_context_overflow = categories.contains(&ErrorCategory::InvalidInput);
    assert!(has_context_overflow);

    let only_transient = !categories.is_empty()
        && categories.iter().all(|c| {
            matches!(
                c,
                ErrorCategory::Network | ErrorCategory::RateLimit | ErrorCategory::Timeout
            )
        });
    assert!(!only_transient);
}

/// Transient categories (network / rate limit / timeout) must NOT escalate by
/// themselves — those resolve via the existing retry loop and shouldn't
/// trigger a capacity-driven replan.
#[test]
fn capacity_escalation_skips_pure_transient_categories() {
    use crate::error_taxonomy::ErrorCategory;

    let categories: &[ErrorCategory] = &[
        ErrorCategory::Network,
        ErrorCategory::RateLimit,
        ErrorCategory::Timeout,
    ];
    let has_context_overflow = categories.contains(&ErrorCategory::InvalidInput);
    assert!(!has_context_overflow);

    let only_transient = !categories.is_empty()
        && categories.iter().all(|c| {
            matches!(
                c,
                ErrorCategory::Network | ErrorCategory::RateLimit | ErrorCategory::Timeout
            )
        });
    assert!(only_transient);
}

// ── #136: post-edit LSP diagnostics hook ─────────────────────────────────

#[test]
fn edited_paths_for_edit_file_returns_path() {
    let input = json!({ "path": "src/foo.rs", "search": "x", "replace": "y" });
    let paths = edited_paths_for_tool("edit_file", &input);
    assert_eq!(paths, vec![PathBuf::from("src/foo.rs")]);
}

#[test]
fn edited_paths_for_write_file_returns_path() {
    let input = json!({ "path": "src/bar.rs", "content": "fn main() {}" });
    let paths = edited_paths_for_tool("write_file", &input);
    assert_eq!(paths, vec![PathBuf::from("src/bar.rs")]);
}

#[test]
fn edited_paths_for_apply_patch_with_changes_returns_each_path() {
    let input = json!({
        "changes": [
            { "path": "a.rs", "content": "" },
            { "path": "b.rs", "content": "" }
        ]
    });
    let paths = edited_paths_for_tool("apply_patch", &input);
    assert_eq!(paths, vec![PathBuf::from("a.rs"), PathBuf::from("b.rs")]);
}

#[test]
fn edited_paths_for_apply_patch_with_diff_text_extracts_paths() {
    let input = json!({
        "patch": "--- a/foo.rs\n+++ b/foo.rs\n@@ -1 +1 @@\n-let x: i32 = 0;\n+let x: i32 = \"oops\";\n"
    });
    let paths = edited_paths_for_tool("apply_patch", &input);
    assert_eq!(paths, vec![PathBuf::from("foo.rs")]);
}

#[test]
fn edited_paths_for_apply_patch_with_invalid_diff_returns_empty() {
    let input = json!({
        "patch": "@@ -1 +1 @@\n-old\n+new\n"
    });
    let paths = edited_paths_for_tool("apply_patch", &input);
    assert!(paths.is_empty());
}

#[test]
fn edited_paths_for_unknown_tool_returns_empty() {
    let input = json!({ "path": "irrelevant.rs" });
    let paths = edited_paths_for_tool("read_file", &input);
    assert!(paths.is_empty());
    let paths = edited_paths_for_tool("grep_files", &input);
    assert!(paths.is_empty());
}

#[test]
fn parse_patch_paths_skips_dev_null() {
    let patch = "--- a/keep.rs\n+++ b/keep.rs\n@@ -1 +1 @@\n-old\n+new\n--- a/deleted.rs\n+++ /dev/null\n@@ -1 +0,0 @@\n-delete me\n";
    let paths = edited_paths_for_tool("apply_patch", &json!({ "patch": patch }));
    assert_eq!(paths, vec![PathBuf::from("keep.rs")]);
}

#[tokio::test]
async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
    use crate::lsp::{Diagnostic, Language, Severity};
    use std::sync::Arc;

    let tmp = tempdir().expect("tempdir");
    let workspace = tmp.path().to_path_buf();
    let target = workspace.join("src").join("main.rs");
    fs::create_dir_all(workspace.join("src")).unwrap();
    fs::write(&target, "let x: i32 = \"not a number\";").unwrap();

    let lsp_config = crate::lsp::LspConfig::default();
    let engine_config = EngineConfig {
        workspace: workspace.clone(),
        lsp_config: Some(lsp_config),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(engine_config, &Config::default());

    // Install a fake transport that always reports a type error.
    let fake = Arc::new(crate::lsp::tests::FakeTransport::new(vec![Diagnostic {
        line: 1,
        column: 14,
        severity: Severity::Error,
        message: "expected i32, found &str".to_string(),
    }]));
    engine
        .lsp_manager
        .install_test_transport(Language::Rust, fake)
        .await;

    // Simulate the success path of an edit_file tool call.
    let input = json!({ "path": "src/main.rs", "search": "0", "replace": "\"not a number\"" });
    engine.run_post_edit_lsp_hook("edit_file", &input).await;
    assert_eq!(engine.pending_lsp_blocks.len(), 1);

    // Flush prepares the synthetic message.
    let messages_before = engine.session.messages.len();
    engine.flush_pending_lsp_diagnostics().await;
    assert_eq!(engine.session.messages.len(), messages_before + 1);

    let last = engine.session.messages.last().expect("message appended");
    assert_eq!(last.role, "user");
    // turn_meta is now at the tail of the content array (PR #2517).
    let meta = match last.content.last() {
        Some(crate::models::ContentBlock::Text { text, .. }) => text.clone(),
        other => panic!("expected text block at tail, got {other:?}"),
    };
    assert!(meta.starts_with("<turn_meta>\n"));
    let diagnostic_text = last
        .content
        .iter()
        .find_map(|block| match block {
            crate::models::ContentBlock::Text { text, .. }
                if text.contains("<diagnostics file=\"") =>
            {
                Some(text)
            }
            _ => None,
        })
        .expect("diagnostics text block");
    assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str"));
}

#[tokio::test]
async fn post_edit_hook_is_silent_when_lsp_disabled() {
    let tmp = tempdir().expect("tempdir");
    let workspace = tmp.path().to_path_buf();
    let target = workspace.join("src").join("main.rs");
    fs::create_dir_all(workspace.join("src")).unwrap();
    fs::write(&target, "fn main() {}").unwrap();

    let lsp_config = crate::lsp::LspConfig {
        enabled: false,
        ..Default::default()
    };
    let engine_config = EngineConfig {
        workspace: workspace.clone(),
        lsp_config: Some(lsp_config),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(engine_config, &Config::default());

    let input = json!({ "path": "src/main.rs", "search": "x", "replace": "y" });
    engine.run_post_edit_lsp_hook("edit_file", &input).await;
    assert!(engine.pending_lsp_blocks.is_empty());

    let messages_before = engine.session.messages.len();
    engine.flush_pending_lsp_diagnostics().await;
    assert_eq!(engine.session.messages.len(), messages_before);
}

#[tokio::test]
async fn post_edit_hook_skips_unknown_tool_names() {
    use crate::lsp::{Diagnostic, Language, Severity};
    use std::sync::Arc;

    let tmp = tempdir().expect("tempdir");
    let engine_config = EngineConfig {
        workspace: tmp.path().to_path_buf(),
        lsp_config: Some(crate::lsp::LspConfig::default()),
        ..Default::default()
    };
    let (mut engine, _handle) = Engine::new(engine_config, &Config::default());
    let fake = Arc::new(crate::lsp::tests::FakeTransport::new(vec![Diagnostic {
        line: 1,
        column: 1,
        severity: Severity::Error,
        message: "should not be reported".to_string(),
    }]));
    engine
        .lsp_manager
        .install_test_transport(Language::Rust, fake.clone())
        .await;

    let input = json!({ "path": "src/main.rs" });
    engine.run_post_edit_lsp_hook("read_file", &input).await;
    assert!(engine.pending_lsp_blocks.is_empty());
    assert_eq!(fake.call_count(), 0);
}