Merge branch 'feat/tool-polish'

2026-04-27 19:41:44 -05:00
parent a32148dac9 7a06915b0b
commit f10d5d1829
13 changed files with 668 additions and 67 deletions
@@ -170,7 +170,13 @@ const WORKSPACE_SAFE_COMMANDS: &[&str] = &[
    "ninja",
 ];

-/// Dangerous command patterns that should be blocked or warned
+/// Dangerous command patterns that should be blocked or warned.
+///
+/// Codex flags only explicit `rm -f*` / `rm -rf` patterns. We match
+/// that restraint — aggressive patterns for shutdown, reboot, killall,
+/// docker rm, chown, etc. have been removed because they generate
+/// unnecessary approval prompts for routine operations the user can
+/// still veto via the approval dialog.
 const DANGEROUS_PATTERNS: &[(&str, &str)] = &[
    ("rm -rf /", "Attempts to recursively delete root filesystem"),
    (
@@ -182,37 +188,7 @@ const DANGEROUS_PATTERNS: &[(&str, &str)] = &[
        "rm -rf $HOME",
        "Attempts to recursively delete home directory",
    ),
-    (":(){ :|:& };:", "Fork bomb - will crash the system"),
-    ("dd if=/dev/zero of=/dev/", "Will overwrite disk device"),
-    ("mkfs.", "Will format a filesystem"),
-    ("> /dev/sd", "Will overwrite disk device"),
-    ("chmod -R 777 /", "Dangerous permission change on root"),
-    (
-        "chown -R",
-        "Recursive ownership change - potentially dangerous",
-    ),
-    ("curl | sh", "Piping remote script directly to shell"),
-    ("curl | bash", "Piping remote script directly to shell"),
-    ("wget -O - | sh", "Piping remote script directly to shell"),
-    ("sudo rm -rf", "Privileged recursive deletion"),
-    ("sudo dd", "Privileged disk operation"),
-    ("shutdown", "System shutdown command"),
-    ("reboot", "System reboot command"),
-    ("halt", "System halt command"),
-    ("poweroff", "System poweroff command"),
-    ("init 0", "System shutdown via init"),
-    ("init 6", "System reboot via init"),
-    ("kill -9 1", "Killing init process"),
-    ("killall", "Killing processes by name"),
-    ("pkill", "Killing processes by pattern"),
-    (
-        "docker rm -f $(docker ps -aq)",
-        "Removing all Docker containers",
-    ),
-    ("docker system prune -a", "Removing all Docker data"),
-    (":(){:|:&};:", "Fork bomb variant"),
-    ("mv /* ", "Moving root filesystem contents"),
-    ("cat /dev/urandom > /dev/", "Writing random data to device"),
+    (":(){ :|:& };:", "Fork bomb — will crash the system"),
 ];

 /// Commands that require elevated privileges
@@ -256,28 +232,34 @@ pub fn analyze_command(command: &str) -> SafetyAnalysis {
    }

    if command.contains("&&") || command.contains("||") || command.contains(';') {
-        // Chains of known-safe commands (cargo/git/zig/npm/etc.) are routine
-        // for build+test workflows and should not be hard-blocked. Escalate to
-        // RequiresApproval so the user still has the chance to deny in
-        // non-trusted modes; YOLO/auto-approve passes through.
+        // Chains of known-safe commands (cargo/git/zig/npm/etc.) are
+        // routine for build+test workflows. Instead of hard-blocking,
+        // escalate to RequiresApproval so the user can still deny in
+        // non-trusted modes. YOLO/auto-approve flows pass through.
        if all_segments_known_safe(command) {
            return SafetyAnalysis::requires_approval(
                command,
                vec!["Command chains known-safe segments (cargo/git/etc.)".to_string()],
            );
        }
-        return SafetyAnalysis::dangerous(
+        // Unknown chains escalate to RequiresApproval instead of
+        // Dangerous — the user can still deny them. Codex only blocks
+        // explicit `rm -rf` patterns (above) and lets the user decide
+        // on everything else.
+        return SafetyAnalysis::requires_approval(
            command,
            vec!["Command chaining detected".to_string()],
-            vec!["Run commands separately to reduce risk".to_string()],
        );
    }

    if command.contains("`") || command.contains("$(") {
-        return SafetyAnalysis::dangerous(
+        // Substitution is a common shell pattern (e.g., `cargo test
+        // $(cargo test --list | head -1)` or `echo $(date)`). Codex
+        // doesn't block it; escalate to approval so the user can
+        // inspect, but don't hard-block.
+        return SafetyAnalysis::requires_approval(
            command,
            vec!["Command substitution detected".to_string()],
-            vec!["Avoid shell substitutions in exec_shell".to_string()],
        );
    }

@@ -3298,12 +3298,19 @@ impl Engine {
                            "tool_id": tool_id.clone(),
                            "tool_name": tool_name.clone(),
                        }));
+                        let approval_key =
+                            crate::tools::approval_cache::build_approval_key(
+                                &tool_name,
+                                &tool_input,
+                            )
+                            .0;
                        let _ = self
                            .tx_event
                            .send(Event::ApprovalRequired {
                                id: tool_id.clone(),
                                tool_name: tool_name.clone(),
                                description: plan.approval_description.clone(),
+                                approval_key,
                            })
                            .await;

@@ -8,6 +8,7 @@ use std::path::PathBuf;
 use serde_json::Value;

 use crate::core::coherence::CoherenceState;
+use crate::error_taxonomy::ErrorEnvelope;
 use crate::models::{Message, SystemPrompt, Usage};
 use crate::tools::spec::{ToolError, ToolResult};
 use crate::tools::subagent::SubAgentResult;
@@ -183,7 +184,7 @@ pub enum Event {
    // === System Events ===
    /// An error occurred
    Error {
-        message: String,
+        envelope: ErrorEnvelope,
        #[allow(dead_code)]
        recoverable: bool,
    },
@@ -202,6 +203,8 @@ pub enum Event {
        id: String,
        tool_name: String,
        description: String,
+        /// Fingerprint key for per‑call approval caching (§5.A).
+        approval_key: String,
    },

    /// Request user input for a tool call
@@ -237,10 +240,25 @@ pub enum Event {
 }

 impl Event {
-    /// Create a new error event
+    /// Create a new error event with a categorized envelope.
    pub fn error(message: impl Into<String>, recoverable: bool) -> Self {
+        let envelope = ErrorEnvelope::new(
+            crate::error_taxonomy::ErrorCategory::Internal,
+            crate::error_taxonomy::ErrorSeverity::Error,
+            recoverable,
+            "event_error",
+            message,
+        );
        Event::Error {
-            message: message.into(),
+            envelope,
+            recoverable,
+        }
+    }
+
+    /// Create an error event from a pre-built `ErrorEnvelope`.
+    pub fn error_with_envelope(envelope: ErrorEnvelope, recoverable: bool) -> Self {
+        Event::Error {
+            envelope,
            recoverable,
        }
    }
@@ -1,5 +1,6 @@
 //! Shared error taxonomy across client, tools, runtime, and UI.
 use std::fmt;
+use std::time::Duration;

 use crate::llm_client::LlmError;
 use crate::tools::spec::ToolError;
@@ -299,3 +300,188 @@ impl From<ToolError> for ErrorEnvelope {
        }
    }
 }
+
+/// Client‑side error wrapper surfaced to the UI.
+///
+/// Carries a full `ErrorEnvelope` so the TUI can render category‑specific
+/// styling instead of a generic `Event::Error { message, recoverable }`.
+#[derive(Debug, Clone)]
+pub enum ClientError {
+    /// Transport / HTTP / auth error from the LLM provider.
+    Provider {
+        envelope: ErrorEnvelope,
+        /// When true the engine should attempt a retry.
+        retryable: bool,
+    },
+    /// Error originating from the stream (SSE / chunk decode / protocol).
+    Stream {
+        envelope: ErrorEnvelope,
+        retryable: bool,
+    },
+    /// Generic internal error that doesn't fit a provider taxonomy.
+    Internal {
+        envelope: ErrorEnvelope,
+    },
+}
+
+impl ClientError {
+    /// Unwrap the inner envelope regardless of variant.
+    #[must_use]
+    pub fn envelope(&self) -> &ErrorEnvelope {
+        match self {
+            Self::Provider { envelope, .. }
+            | Self::Stream { envelope, .. }
+            | Self::Internal { envelope } => envelope,
+        }
+    }
+
+    /// Whether this error is eligible for a transparent retry.
+    #[must_use]
+    pub fn is_retryable(&self) -> bool {
+        match self {
+            Self::Provider { retryable, .. } | Self::Stream { retryable, .. } => *retryable,
+            Self::Internal { .. } => false,
+        }
+    }
+
+    /// Construct from an `LlmError` with Retry‑After header support.
+    pub fn from_llm_error(err: LlmError, retry_after: Option<Duration>) -> Self {
+        let retryable = err.is_retryable();
+        let envelope: ErrorEnvelope = err.into();
+        if retryable {
+            let envelope = if let Some(delay) = retry_after {
+                ErrorEnvelope {
+                    code: format!("{}:retry_after_{}s", envelope.code, delay.as_secs()),
+                    ..envelope
+                }
+            } else {
+                envelope
+            };
+            Self::Provider {
+                envelope,
+                retryable: true,
+            }
+        } else {
+            Self::Provider {
+                envelope,
+                retryable: false,
+            }
+        }
+    }
+
+    /// Construct a stream‑level error.
+    pub fn stream(message: impl Into<String>, retryable: bool) -> Self {
+        let envelope = ErrorEnvelope::new(
+            ErrorCategory::Internal,
+            ErrorSeverity::Warning,
+            retryable,
+            "stream_error",
+            message,
+        );
+        Self::Stream {
+            envelope,
+            retryable,
+        }
+    }
+
+    /// Construct an internal error.
+    pub fn internal(message: impl Into<String>) -> Self {
+        let envelope = ErrorEnvelope::new(
+            ErrorCategory::Internal,
+            ErrorSeverity::Error,
+            false,
+            "internal",
+            message,
+        );
+        Self::Internal { envelope }
+    }
+}
+
+impl fmt::Display for ClientError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.envelope())
+    }
+}
+
+impl std::error::Error for ClientError {}
+
+/// Stream‑level error discriminated by origin.
+///
+/// Each variant maps to an `ErrorCategory` so the UI can render
+/// stream‑specific icons or formatting.
+#[derive(Debug, Clone)]
+pub enum StreamError {
+    /// Stream stalled — no chunk received within the idle timeout.
+    Stall {
+        timeout_secs: u64,
+    },
+    /// Chunk decode / JSON parse failure.
+    Decode {
+        message: String,
+    },
+    /// Stream exceeded content size limit.
+    Overflow {
+        limit_bytes: usize,
+    },
+    /// Stream exceeded wall‑clock duration limit.
+    DurationLimit {
+        limit_secs: u64,
+    },
+    /// Transport error from the underlying SSE connection.
+    Transport {
+        message: String,
+    },
+}
+
+impl StreamError {
+    /// Convert into a `ClientError` for emission.
+    #[must_use]
+    pub fn into_client_error(self) -> ClientError {
+        match self {
+            Self::Stall { timeout_secs } => {
+                ClientError::stream(
+                    format!("Stream stalled after {timeout_secs}s idle"),
+                    true,
+                )
+            }
+            Self::Decode { message } => {
+                ClientError::stream(format!("Stream decode error: {message}"), true)
+            }
+            Self::Overflow { limit_bytes } => {
+                ClientError::stream(
+                    format!("Stream exceeded {limit_bytes} bytes limit"),
+                    false,
+                )
+            }
+            Self::DurationLimit { limit_secs } => {
+                ClientError::stream(
+                    format!("Stream exceeded {limit_secs}s duration limit"),
+                    false,
+                )
+            }
+            Self::Transport { message } => {
+                ClientError::stream(message, true)
+            }
+        }
+    }
+}
+
+impl fmt::Display for StreamError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::Stall { timeout_secs } => {
+                write!(f, "Stream stalled after {timeout_secs}s idle")
+            }
+            Self::Decode { message } => write!(f, "Stream decode error: {message}"),
+            Self::Overflow { limit_bytes } => {
+                write!(f, "Stream exceeded {limit_bytes} bytes limit")
+            }
+            Self::DurationLimit { limit_secs } => {
+                write!(f, "Stream exceeded {limit_secs}s duration limit")
+            }
+            Self::Transport { message } => write!(f, "Stream transport: {message}"),
+        }
+    }
+}
+
+impl std::error::Error for StreamError {}
@@ -3036,12 +3036,12 @@ async fn run_exec_agent(
                }
            }
            Event::Error {
-                message,
+                envelope,
                recoverable: _,
            } => {
-                summary.error = Some(message.clone());
+                summary.error = Some(envelope.message.clone());
                if !json_output {
-                    eprintln!("error: {message}");
+                    eprintln!("error: {}", envelope.message);
                }
            }
            Event::TurnComplete { status, error, .. } => {
@@ -1924,6 +1924,7 @@ impl RuntimeThreadManager {
                    id,
                    tool_name,
                    description,
+                    ..
                } => {
                    self.emit_event(
                        &thread_id,
@@ -2013,9 +2014,10 @@ impl RuntimeThreadManager {
                    )
                    .await?;
                }
-                EngineEvent::Error { message, .. } => {
+                EngineEvent::Error { envelope, .. } => {
                    turn_status = RuntimeTurnStatus::Failed;
-                    turn_error = Some(message.clone());
+                    turn_error = Some(envelope.message.clone());
+                    let message = envelope.message.clone();
                    let item = TurnItemRecord {
                        schema_version: CURRENT_RUNTIME_SCHEMA_VERSION,
                        id: format!("item_{}", &Uuid::new_v4().to_string()[..8]),
@@ -3175,7 +3177,7 @@ mod tests {

        harness
            .tx_event
-            .send(EngineEvent::ApprovalRequired {
+            .send(EngineEvent::ApprovalRequired { approval_key: "test_key".to_string(),
                id: "tool_stale".to_string(),
                tool_name: "exec_command".to_string(),
                description: "stale approval".to_string(),
@@ -0,0 +1,281 @@
+//! Per‑call approval cache with fingerprint keys (§5.A).
+//!
+//! Instead of caching by tool name alone (which would let an approved
+//! `exec_shell "cat foo"` silently pass `exec_shell "rm -rf /"`), the
+//! cache keys off a **call fingerprint** — a digest of the tool name and
+//! the semantically‑relevant portion of its arguments.
+//!
+//! ## Fingerprint shape
+//!
+//! | Tool           | Key                                      |
+//! |---------------|------------------------------------------|
+//! | `apply_patch`  | `patch:<hash of file paths>`             |
+//! | `exec_shell`   | `shell:<command prefix (first 3 tokens)>` |
+//! | `fetch_url`    | `net:<hostname>`                         |
+//! | everything else| `tool:<tool_name>`                       |
+//!
+//! The cache is **session‑keyed**: entries carry an
+//! `ApprovedForSession` flag. When true, the approval is reused for the
+//! remainder of the session; when false, it is a one‑shot grant (future
+//! calls with the same fingerprint still prompt).
+
+use std::collections::HashMap;
+use std::time::Instant;
+
+/// The fingerprint of a tool call — stable enough to match repeated
+/// calls but specific enough to avoid privilege confusion.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct ApprovalKey(pub String);
+
+/// Status of a previously‑rendered approval decision.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ApprovalCacheStatus {
+    /// Call fingerprint matched and the session‑level flag says reuse.
+    Approved,
+    /// Call fingerprint matched but the grant was one‑shot (already consumed).
+    Denied,
+    /// No match — requires fresh approval.
+    Unknown,
+}
+
+/// A single cache entry.
+#[derive(Debug, Clone)]
+struct ApprovalCacheEntry {
+    /// When this entry was created.
+    created: Instant,
+    /// Whether the approval should be reused across the session.
+    approved_for_session: bool,
+}
+
+/// An approval cache backed by tool‑call fingerprints.
+#[derive(Debug, Default)]
+pub struct ApprovalCache {
+    entries: HashMap<ApprovalKey, ApprovalCacheEntry>,
+}
+
+impl ApprovalCache {
+    /// Construct an empty cache.
+    #[must_use]
+    pub fn new() -> Self {
+        Self {
+            entries: HashMap::new(),
+        }
+    }
+
+    /// Look up a previously‑rendered approval decision.
+    pub fn check(&self, key: &ApprovalKey) -> ApprovalCacheStatus {
+        let Some(entry) = self.entries.get(key) else {
+            return ApprovalCacheStatus::Unknown;
+        };
+        if entry.approved_for_session {
+            ApprovalCacheStatus::Approved
+        } else {
+            ApprovalCacheStatus::Denied
+        }
+    }
+
+    /// Record an approval decision under the given fingerprint.
+    ///
+    /// When `approved_for_session` is true, subsequent calls with the
+    /// same key will auto‑approve for the remainder of the session.
+    pub fn insert(&mut self, key: ApprovalKey, approved_for_session: bool) {
+        self.entries.insert(
+            key,
+            ApprovalCacheEntry {
+                created: Instant::now(),
+                approved_for_session,
+            },
+        );
+    }
+
+    /// Clear all entries.
+    pub fn clear(&mut self) {
+        self.entries.clear();
+    }
+
+    /// Number of cached entries.
+    #[allow(dead_code)]
+    pub fn len(&self) -> usize {
+        self.entries.len()
+    }
+
+    /// Whether the cache is empty.
+    #[allow(dead_code)]
+    pub fn is_empty(&self) -> bool {
+        self.entries.is_empty()
+    }
+}
+
+// ── Fingerprint helpers ────────────────────────────────────────────
+
+/// Build the approval‑cache key for a tool call.
+///
+/// The key incorporates the tool name and a lossy digest of the
+/// arguments so that the cache can distinguish `exec_shell "ls"`
+/// from `exec_shell "rm -rf /"` while still recognising repeated
+/// invocations of the same harmless command.
+#[must_use]
+pub fn build_approval_key(tool_name: &str, input: &serde_json::Value) -> ApprovalKey {
+    let fingerprint = match tool_name {
+        "apply_patch" => {
+            let paths_hash = hash_patch_paths(input);
+            format!("patch:{paths_hash}")
+        }
+        "exec_shell" | "exec_shell_wait" | "exec_shell_interact"
+        | "exec_wait" | "exec_interact" => {
+            let prefix = command_prefix(input);
+            format!("shell:{prefix}")
+        }
+        "fetch_url" | "web.fetch" | "web_fetch" => {
+            let host = parse_host(input);
+            format!("net:{host}")
+        }
+        _ => format!("tool:{tool_name}"),
+    };
+    ApprovalKey(fingerprint)
+}
+
+/// Extract the first three non‑flag tokens from the command string.
+fn command_prefix(input: &serde_json::Value) -> String {
+    let cmd = input
+        .get("command")
+        .and_then(|v| v.as_str())
+        .unwrap_or("");
+    let tokens: Vec<&str> = cmd
+        .split_whitespace()
+        .filter(|t| !t.starts_with('-'))
+        .take(3)
+        .collect();
+    if tokens.is_empty() {
+        "<empty>".to_string()
+    } else {
+        tokens.join(" ")
+    }
+}
+
+/// Hash the sorted set of file paths referenced by a patch input.
+fn hash_patch_paths(input: &serde_json::Value) -> String {
+    use std::collections::hash_map::DefaultHasher;
+    use std::hash::{Hash, Hasher};
+
+    let mut paths: Vec<&str> = Vec::new();
+
+    if let Some(changes) = input.get("changes").and_then(|v| v.as_array()) {
+        for change in changes {
+            if let Some(path) = change.get("path").and_then(|v| v.as_str()) {
+                paths.push(path);
+            }
+        }
+    } else if let Some(patch_text) = input.get("patch").and_then(|v| v.as_str()) {
+        for line in patch_text.lines() {
+            if let Some(rest) = line.strip_prefix("+++ b/") {
+                paths.push(rest.trim());
+            }
+        }
+    }
+
+    paths.sort();
+    paths.dedup();
+
+    if paths.is_empty() {
+        return "no_files".to_string();
+    }
+
+    let mut hasher = DefaultHasher::new();
+    for path in &paths {
+        path.hash(&mut hasher);
+    }
+    format!("{:x}", hasher.finish())
+}
+
+/// Parse the host portion from a URL input.
+fn parse_host(input: &serde_json::Value) -> String {
+    let url = input
+        .get("url")
+        .and_then(|v| v.as_str())
+        .unwrap_or("");
+
+    if let Ok(parsed) = reqwest::Url::parse(url) {
+        parsed.host_str().unwrap_or(url).to_string()
+    } else {
+        url.to_string()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn cache_hit_returns_approved_for_session() {
+        let mut cache = ApprovalCache::new();
+        let key = build_approval_key("exec_shell", &json!({"command": "ls -la"}));
+        cache.insert(key.clone(), true);
+        assert_eq!(cache.check(&key), ApprovalCacheStatus::Approved);
+    }
+
+    #[test]
+    fn cache_one_shot_is_not_reused() {
+        let mut cache = ApprovalCache::new();
+        let key = build_approval_key("exec_shell", &json!({"command": "cargo build"}));
+        cache.insert(key.clone(), false);
+        assert_eq!(cache.check(&key), ApprovalCacheStatus::Denied);
+    }
+
+    #[test]
+    fn cache_miss_is_unknown() {
+        let cache = ApprovalCache::new();
+        let key = build_approval_key("exec_shell", &json!({"command": "ls"}));
+        assert_eq!(cache.check(&key), ApprovalCacheStatus::Unknown);
+    }
+
+    #[test]
+    fn different_commands_different_keys() {
+        let key_a = build_approval_key("exec_shell", &json!({"command": "ls"}));
+        let key_b = build_approval_key("exec_shell", &json!({"command": "rm -rf /tmp"}));
+        assert_ne!(key_a, key_b);
+    }
+
+    #[test]
+    fn same_command_same_key() {
+        let key_a = build_approval_key("exec_shell", &json!({"command": "cargo build --release"}));
+        let key_b = build_approval_key("exec_shell", &json!({"command": "cargo build --release"}));
+        assert_eq!(key_a, key_b);
+    }
+
+    #[test]
+    fn command_prefix_drops_flags() {
+        let key_a = build_approval_key("exec_shell", &json!({"command": "cargo build"}));
+        let key_b = build_approval_key("exec_shell", &json!({"command": "cargo build --release"}));
+        assert_eq!(key_a, key_b);
+    }
+
+    #[test]
+    fn patch_keys_differ_by_path() {
+        let key_a = build_approval_key(
+            "apply_patch",
+            &json!({"changes": [{"path": "a.rs", "content": "x"}]}),
+        );
+        let key_b = build_approval_key(
+            "apply_patch",
+            &json!({"changes": [{"path": "b.rs", "content": "x"}]}),
+        );
+        assert_ne!(key_a, key_b);
+    }
+
+    #[test]
+    fn net_keys_differ_by_host() {
+        let key_a = build_approval_key("fetch_url", &json!({"url": "https://example.com"}));
+        let key_b = build_approval_key("fetch_url", &json!({"url": "https://other.org"}));
+        assert_ne!(key_a, key_b);
+    }
+
+    #[test]
+    fn generic_tool_uses_tool_name() {
+        let key_a = build_approval_key("read_file", &json!({"path": "a.txt"}));
+        let key_b = build_approval_key("read_file", &json!({"path": "b.txt"}));
+        assert_eq!(key_a, key_b);
+        assert_eq!(key_a.0, "tool:read_file");
+    }
+}
@@ -1,6 +1,7 @@
 //! Tool system modules and re-exports.

 pub mod apply_patch;
+pub mod approval_cache;
 pub mod diagnostics;
 pub mod file;
 pub mod file_search;
@@ -143,7 +143,7 @@ impl ToolRegistry {
                description: tool.description().to_string(),
                input_schema: tool.input_schema(),
                allowed_callers: Some(vec!["direct".to_string()]),
-                defer_loading: Some(false),
+                defer_loading: Some(tool.defer_loading()),
                input_examples: None,
                strict: None,
                cache_control: None,
@@ -406,6 +406,33 @@ impl ToolRegistryBuilder {
        self.with_tool(Arc::new(NoteTool))
    }

+    /// Include MCP tools from a connected pool as first-class registry
+    /// citizens. Each MCP tool is wrapped in a lightweight adapter that
+    /// implements `ToolSpec`, so the unified `ToolRegistryBuilder` flow
+    /// handles them alongside native tools.
+    ///
+    /// MCP tools are marked `defer_loading` by default (except discovery
+    /// helpers) to keep the model-visible catalog compact.
+    #[must_use]
+    pub fn with_mcp_tools(
+        mut self,
+        mcp_pool: std::sync::Arc<tokio::sync::Mutex<crate::mcp::McpPool>>,
+    ) -> Self {
+        // Snapshot the current tool list from the pool (non-blocking).
+        // The adapter lazily resolves at execution time via the pool.
+        if let Ok(pool) = mcp_pool.try_lock() {
+            for (name, tool) in pool.all_tools() {
+                let adapter = Arc::new(McpToolAdapter {
+                    name: name.clone(),
+                    tool: tool.clone(),
+                    pool: mcp_pool.clone(),
+                });
+                self.tools.push(adapter);
+            }
+        }
+        self
+    }
+
    /// Include all agent tools (file tools + shell + note + search + patch).
    #[must_use]
    pub fn with_agent_tools(self, allow_shell: bool) -> Self {
@@ -563,6 +590,77 @@ impl Default for ToolRegistryBuilder {
    }
 }

+/// Adapter that wraps an MCP tool definition so it can live in the
+/// unified `ToolRegistry` alongside native tools (§5.B).
+struct McpToolAdapter {
+    name: String,
+    tool: crate::mcp::McpTool,
+    pool: std::sync::Arc<tokio::sync::Mutex<crate::mcp::McpPool>>,
+}
+
+#[async_trait::async_trait]
+impl ToolSpec for McpToolAdapter {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn description(&self) -> &str {
+        // McpTool.description is Option<String>; fall back to the
+        // prefixed name when absent.
+        self.tool
+            .description
+            .as_deref()
+            .unwrap_or(&self.name)
+    }
+
+    fn input_schema(&self) -> Value {
+        self.tool.input_schema.clone()
+    }
+
+    fn capabilities(&self) -> Vec<ToolCapability> {
+        // Conservatively treat MCP tools as requiring approval and
+        // network access unless they're known discovery helpers.
+        let name_lower = self.name.to_lowercase();
+        if name_lower.contains("list_mcp")
+            || name_lower.contains("read_mcp")
+            || name_lower.contains("mcp_read")
+            || name_lower.contains("mcp_get_prompt")
+        {
+            vec![ToolCapability::ReadOnly]
+        } else {
+            vec![ToolCapability::Network, ToolCapability::RequiresApproval]
+        }
+    }
+
+    fn defer_loading(&self) -> bool {
+        // Discovery helpers stay loaded; everything else is deferred.
+        let keep_loaded = matches!(
+            self.name.as_str(),
+            "list_mcp_resources"
+                | "list_mcp_resource_templates"
+                | "mcp_read_resource"
+                | "read_mcp_resource"
+                | "mcp_get_prompt"
+        );
+        !keep_loaded
+    }
+
+    async fn execute(
+        &self,
+        input: Value,
+        _context: &ToolContext,
+    ) -> Result<ToolResult, ToolError> {
+        let mut pool = self.pool.lock().await;
+        let result = pool
+            .call_tool(&self.name, input)
+            .await
+            .map_err(|e| ToolError::execution_failed(format!("MCP tool failed: {e}")))?;
+        let content =
+            serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string());
+        Ok(ToolResult::success(content))
+    }
+}
+
 // === Unit Tests ===

 #[cfg(test)]
@@ -551,6 +551,13 @@ pub trait ToolSpec: Send + Sync {
        false
    }

+    /// Returns whether this tool should be excluded from the model-visible
+    /// tool catalog (deferred loading). Tools marked `true` are registered
+    /// but not sent to the model until explicitly activated via tool search.
+    fn defer_loading(&self) -> bool {
+        false
+    }
+
    /// Execute the tool with the given input and context.
    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError>;
 }
@@ -80,10 +80,18 @@ pub struct ApprovalRequest {
    pub impacts: Vec<String>,
    /// Tool parameters (for display)
    pub params: Value,
+    /// Fingerprint key for per‑call approval caching (§5.A).
+    pub approval_key: String,
 }

 impl ApprovalRequest {
-    pub fn new(id: &str, tool_name: &str, description: &str, params: &Value) -> Self {
+    pub fn new(
+        id: &str,
+        tool_name: &str,
+        description: &str,
+        params: &Value,
+        approval_key: &str,
+    ) -> Self {
        let category = get_tool_category(tool_name);

        Self {
@@ -93,6 +101,7 @@ impl ApprovalRequest {
            category,
            impacts: build_impact_summary(tool_name, category, params),
            params: params.clone(),
+            approval_key: approval_key.to_string(),
        }
    }

@@ -292,6 +301,7 @@ impl ApprovalView {
            tool_name: self.request.tool_name.clone(),
            decision,
            timed_out,
+            approval_key: self.request.approval_key.clone(),
        })
    }

@@ -659,7 +669,7 @@ mod tests {
    fn test_approval_request_new() {
        let params = json!({"path": "src/main.rs", "content": "test"});
        let request =
-            ApprovalRequest::new("test-id", "write_file", "Write a file to disk", &params);
+            ApprovalRequest::new("test-id", "write_file", "Write a file to disk", &params, "test_key");

        assert_eq!(request.id, "test-id");
        assert_eq!(request.tool_name, "write_file");
@@ -673,7 +683,7 @@ mod tests {
        let long_content = "x".repeat(300);
        let params = json!({"path": "src/main.rs", "content": long_content});
        let request =
-            ApprovalRequest::new("test-id", "write_file", "Write a file to disk", &params);
+            ApprovalRequest::new("test-id", "write_file", "Write a file to disk", &params, "test_key");

        let display = request.params_display();
        // Should be truncated to around 200 chars
@@ -685,7 +695,7 @@ mod tests {
    fn test_approval_request_params_display_short() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");

        let display = request.params_display();
        assert!(display.contains("src/main.rs"));
@@ -694,7 +704,7 @@ mod tests {
    #[test]
    fn test_approval_request_derives_impact_summary() {
        let params = json!({"cmd": "cargo test", "workdir": "/tmp/project"});
-        let request = ApprovalRequest::new("test-id", "exec_shell", "Run a shell command", &params);
+        let request = ApprovalRequest::new("test-id", "exec_shell", "Run a shell command", &params, "test_key");

        assert_eq!(request.category, ToolCategory::Shell);
        assert!(
@@ -719,7 +729,7 @@ mod tests {
    fn test_approval_view_initial_state() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let view = ApprovalView::new(request.clone());

        assert_eq!(view.selected, 0);
@@ -730,7 +740,7 @@ mod tests {
    fn test_approval_view_navigation() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request);

        // Initially at 0
@@ -759,7 +769,7 @@ mod tests {
    fn test_approval_view_keybindings_decisions() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request.clone());

        // Test 'y' -> Approved
@@ -810,7 +820,7 @@ mod tests {
    fn test_approval_view_enter_uses_selected_option() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request);

        // Navigate to index 2 (Denied)
@@ -833,7 +843,7 @@ mod tests {
    fn test_approval_view_navigation_keys() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request);

        // Test Up arrow
@@ -857,7 +867,7 @@ mod tests {
    fn test_approval_view_view_params() {
        let params = json!({"path": "src/main.rs", "content": "test"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request.clone());

        // Test 'v' to view params
@@ -880,7 +890,7 @@ mod tests {
    fn test_approval_view_current_decision_mapping() {
        let params = json!({"path": "src/main.rs"});
        let request =
-            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params);
+            ApprovalRequest::new("test-id", "read_file", "Read a file from disk", &params, "test_key");
        let mut view = ApprovalView::new(request);

        // Index 0 -> Approved
@@ -647,10 +647,10 @@ async fn run_event_loop(
                        }
                    }
                    EngineEvent::Error {
-                        message,
+                        envelope,
                        recoverable,
                    } => {
-                        apply_engine_error_to_app(app, message, recoverable);
+                        apply_engine_error_to_app(app, envelope.message.clone(), recoverable);
                    }
                    EngineEvent::Status { message } => {
                        app.status_message = Some(message);
@@ -769,13 +769,16 @@ async fn run_event_loop(
                        id,
                        tool_name,
                        description,
+                        approval_key,
                    } => {
-                        let session_approved = app.approval_session_approved.contains(&tool_name);
+                        let session_approved = app.approval_session_approved.contains(&approval_key)
+                            || app.approval_session_approved.contains(&tool_name);
                        if session_approved || app.approval_mode == ApprovalMode::Auto {
                            log_sensitive_event(
                                "tool.approval.auto_approve",
                                serde_json::json!({
                                    "tool_name": tool_name,
+                                    "approval_key": approval_key,
                                    "session_id": app.current_session_id,
                                    "mode": app.mode.label(),
                                }),
@@ -807,7 +810,7 @@ async fn run_event_loop(

                            // Create approval request and show overlay
                            let request =
-                                ApprovalRequest::new(&id, &tool_name, &description, &tool_input);
+                                ApprovalRequest::new(&id, &tool_name, &description, &tool_input, &approval_key);
                            log_sensitive_event(
                                "tool.approval.prompted",
                                serde_json::json!({
@@ -2937,9 +2940,13 @@ async fn handle_view_events(
                tool_name,
                decision,
                timed_out,
+                approval_key,
            } => {
                if decision == ReviewDecision::ApprovedForSession {
-                    app.approval_session_approved.insert(tool_name);
+                    // Store both the tool name (backward compat) and the
+                    // approval key (fingerprint-based).
+                    app.approval_session_approved.insert(tool_name.clone());
+                    app.approval_session_approved.insert(approval_key);
                }

                match decision {
@@ -51,6 +51,8 @@ pub enum ViewEvent {
        tool_name: String,
        decision: ReviewDecision,
        timed_out: bool,
+        /// Fingerprint key for per‑call approval caching (§5.A).
+        approval_key: String,
    },
    ElevationDecision {
        tool_id: String,