feat(runtime): route large tool outputs through workshop to protect parent context (#658)

2026-05-05 00:14:16 -05:00
parent b31bc4104b 5d1dee794d
commit 937f5f33f3
10 changed files with 475 additions and 1 deletions
@@ -246,6 +246,26 @@ l3_threshold = 576000
 cycle_threshold = 768000
 seam_model = "deepseek-v4-flash"

+# ─────────────────────────────────────────────────────────────────────────────────
+# Workshop / Large-Output Routing (#548)
+# ─────────────────────────────────────────────────────────────────────────────────
+# Tool outputs exceeding `large_output_threshold_tokens` are routed through a
+# V4-Flash synthesis sub-agent.  Only the synthesis reaches the parent context;
+# the raw text is stored in the workshop variable `last_tool_result` so the
+# parent can call `promote_to_context` later if it needs the full content.
+#
+# Per-tool overrides let high-volume tools (e.g. exec_shell) use tighter
+# thresholds without changing the global default.
+#
+# Add `raw = true` to any tool call to bypass routing for that invocation.
+#
+# [workshop]
+# large_output_threshold_tokens = 4096
+# [workshop.per_tool_thresholds]
+# exec_shell  = 2048   # shell output synthesised aggressively
+# grep_files  = 2048
+# web_search  = 8192   # web results can be large; give them more room
+
 # ─────────────────────────────────────────────────────────────────────────────────
 # Capacity Controller (runtime pressure guardrails)
 # ─────────────────────────────────────────────────────────────────────────────────
@@ -790,6 +790,11 @@ pub struct Config {
    /// / tauri://localhost as the only allowed dev origins.
    #[serde(default)]
    pub runtime_api: Option<RuntimeApiConfig>,
+
+    /// Workshop / large-tool-output routing (#548). When absent, the global
+    /// default threshold of 4 096 tokens applies and routing is active.
+    #[serde(default)]
+    pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
 }

 /// `[runtime_api]` table — knobs for the local HTTP/SSE daemon.
@@ -2039,6 +2044,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
        subagents: override_cfg.subagents.or(base.subagents),
        strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode),
        runtime_api: override_cfg.runtime_api.or(base.runtime_api),
+        workshop: override_cfg.workshop.or(base.workshop),
    }
 }

@@ -145,6 +145,8 @@ pub struct EngineConfig {
    /// When true, force `tool_choice: "required"` so the model always calls
    /// a tool on every turn step (V4 strict tool-following mode).
    pub strict_tool_mode: bool,
+    /// Workshop / large-tool-output routing (#548). `None` disables routing.
+    pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
 }

 impl Default for EngineConfig {
@@ -176,6 +178,7 @@ impl Default for EngineConfig {
            memory_path: PathBuf::from("./memory.md"),
            strict_tool_mode: false,
            goal_objective: None,
+            workshop: None,
        }
    }
 }
@@ -312,6 +315,10 @@ pub struct Engine {
    /// — when LSP is disabled in config, this is an inert manager that
    /// always returns `None` from `diagnostics_for`.
    lsp_manager: Arc<crate::lsp::LspManager>,
+    /// Session-scoped workshop variable store (#548). Shared across all tool
+    /// calls so `last_tool_result` persists within the session and can be
+    /// promoted to the parent context via `promote_to_context`.
+    workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>>,
    /// Diagnostics collected during the current step's tool calls. Drained
    /// and forwarded as a synthetic user message before the next API call.
    pending_lsp_blocks: Vec<crate::lsp::DiagnosticBlock>,
@@ -428,6 +435,18 @@ impl Engine {
            None => crate::lsp::LspManager::disabled(),
        });

+        // Workshop variable store (#548). Created unconditionally so the Arc
+        // can be handed to every ToolContext; routing is gated on the router
+        // field being Some rather than on the vars Arc being present.
+        let workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>> =
+            if config.workshop.is_some() {
+                Some(std::sync::Arc::new(tokio::sync::Mutex::new(
+                    crate::tools::large_output_router::WorkshopVariables::default(),
+                )))
+            } else {
+                None
+            };
+
        let mut engine = Engine {
            config,
            deepseek_client,
@@ -450,6 +469,7 @@ impl Engine {
            turn_counter: 0,
            lsp_manager,
            pending_lsp_blocks: Vec::new(),
+            workshop_vars,
        };
        engine.rehydrate_latest_canonical_state();

@@ -1293,6 +1313,19 @@ impl Engine {
            ctx = ctx.with_network_policy(decider.clone());
        }

+        // Wire the large-output router (#548). Only attaches when the
+        // [workshop] config table is present; sub-agents don't inherit the
+        // router (their ToolContext is built separately) to prevent recursive
+        // routing of the synthesis call itself.
+        if let Some(workshop_cfg) = self.config.workshop.as_ref() {
+            if let Some(vars_arc) = self.workshop_vars.as_ref() {
+                let router = crate::tools::large_output_router::LargeOutputRouter::new(
+                    workshop_cfg.clone(),
+                );
+                ctx = ctx.with_large_output_router(router, vars_arc.clone());
+            }
+        }
+
        match mode {
            // Plan mode is read-only investigation; the shell tool is not
            // registered, so leaving the sandbox policy at the seatbelt-strict
@@ -3765,6 +3765,7 @@ async fn run_exec_agent(
        memory_path: config.memory_path(),
        strict_tool_mode: config.strict_tool_mode.unwrap_or(false),
        goal_objective: None,
+        workshop: config.workshop.clone(),
    };

    let engine_handle = spawn_engine(engine_config, config);
@@ -1809,6 +1809,7 @@ impl RuntimeThreadManager {
            memory_path: self.config.memory_path(),
            strict_tool_mode: self.config.strict_tool_mode.unwrap_or(false),
            goal_objective: None,
+            workshop: self.config.workshop.clone(),
        };

        let engine = spawn_engine(engine_cfg, &self.config);
@@ -0,0 +1,321 @@
+//! Large-output routing for tool results (issue #548).
+//!
+//! Any tool result whose estimated token count exceeds the configured threshold
+//! is intercepted here before it reaches the parent context. A lightweight
+//! V4-Flash synthesis sub-agent condenses the raw output; only the synthesis
+//! is returned to the parent. The raw content is stored in the workshop
+//! variable `last_tool_result` so the parent agent can call
+//! `promote_to_context` later if it needs the full text.
+//!
+//! Per-tool thresholds can override the global default. Individual tool calls
+//! may pass `raw=true` to bypass routing entirely.
+
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::tools::spec::ToolResult;
+
+// ── Constants ──────────────────────────────────────────────────────────────────
+
+/// Default token threshold above which a tool result is routed through the
+/// workshop. Matches the issue spec of 4 096 tokens.
+pub const DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS: usize = 4_096;
+
+/// Approximate characters-per-token ratio used for the heuristic estimate.
+/// We intentionally choose a conservative value (3 chars/token) so we err
+/// on the side of routing rather than dumping raw data into the parent.
+const CHARS_PER_TOKEN_ESTIMATE: usize = 3;
+
+/// Workshop variable name where the raw tool output is stored.
+pub const WORKSHOP_LAST_TOOL_RESULT_VAR: &str = "last_tool_result";
+
+// ── Configuration ─────────────────────────────────────────────────────────────
+
+/// `[workshop]` section in `config.toml`.
+#[derive(Debug, Clone, Deserialize, Default)]
+pub struct WorkshopConfig {
+    /// Token threshold above which tool results are routed through the workshop
+    /// synthesis sub-agent. Default: [`DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS`].
+    #[serde(default)]
+    pub large_output_threshold_tokens: Option<usize>,
+
+    /// Per-tool threshold overrides (tool name → token limit). A tool whose
+    /// name appears here uses this limit instead of
+    /// `large_output_threshold_tokens`.
+    #[serde(default)]
+    pub per_tool_thresholds: Option<HashMap<String, usize>>,
+}
+
+impl WorkshopConfig {
+    /// Resolve the effective threshold for the given tool name.
+    #[must_use]
+    pub fn threshold_for(&self, tool_name: &str) -> usize {
+        if let Some(per_tool) = self.per_tool_thresholds.as_ref() {
+            if let Some(&limit) = per_tool.get(tool_name) {
+                return limit;
+            }
+        }
+        self.large_output_threshold_tokens
+            .unwrap_or(DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS)
+    }
+}
+
+// ── Token estimation ──────────────────────────────────────────────────────────
+
+/// Estimate the number of tokens in `text` using a character-count heuristic.
+///
+/// This avoids a real tokeniser dependency; the estimate is deliberately
+/// conservative (under-counts tokens) so we route aggressively rather than
+/// letting a 5K-token blob slip through.
+#[must_use]
+pub fn estimate_tokens(text: &str) -> usize {
+    let chars = text.chars().count();
+    // Round up: partial last token still costs a token.
+    chars.div_ceil(CHARS_PER_TOKEN_ESTIMATE)
+}
+
+// ── Router ────────────────────────────────────────────────────────────────────
+
+/// Decision returned by [`LargeOutputRouter::route`].
+#[derive(Debug, Clone, PartialEq)]
+pub enum RouteDecision {
+    /// The output is small enough; pass it through unmodified.
+    PassThrough,
+    /// The output exceeded the threshold and was (or should be) synthesised.
+    Synthesise {
+        /// Estimated token count of the raw output.
+        estimated_tokens: usize,
+        /// The threshold that was breached.
+        threshold: usize,
+    },
+}
+
+/// Intercepts tool results and routes large ones through the workshop.
+///
+/// This type is intentionally `Clone` and `Default` so it can be embedded
+/// cheaply in [`ToolContext`](crate::tools::spec::ToolContext) without
+/// requiring `Arc` wrappers.
+#[derive(Debug, Clone, Default)]
+pub struct LargeOutputRouter {
+    config: WorkshopConfig,
+}
+
+impl LargeOutputRouter {
+    /// Construct a router from the resolved workshop config.
+    #[must_use]
+    pub fn new(config: WorkshopConfig) -> Self {
+        Self { config }
+    }
+
+    /// Decide whether `result` for `tool_name` should be synthesised.
+    ///
+    /// Pass `raw_bypass = true` when the tool call included `raw = true`.
+    #[must_use]
+    pub fn route(&self, tool_name: &str, result: &ToolResult, raw_bypass: bool) -> RouteDecision {
+        if raw_bypass || !result.success {
+            return RouteDecision::PassThrough;
+        }
+        let threshold = self.config.threshold_for(tool_name);
+        let estimated_tokens = estimate_tokens(&result.content);
+        if estimated_tokens > threshold {
+            RouteDecision::Synthesise {
+                estimated_tokens,
+                threshold,
+            }
+        } else {
+            RouteDecision::PassThrough
+        }
+    }
+
+    /// Build the synthesis prompt sent to the V4-Flash workshop sub-agent.
+    ///
+    /// The prompt is intentionally terse — Flash is a fast model and we just
+    /// want a faithful summary, not deep reasoning.
+    ///
+    /// This is the building block for the live LLM synthesis call wired in
+    /// the follow-up (once the async Flash client is safe to call from the
+    /// registry layer). The method is public so callers outside this crate
+    /// can unit-test the prompt shape.
+    #[must_use]
+    #[allow(dead_code)] // used by future Flash synthesis call; keep for API stability
+    pub fn synthesis_prompt(tool_name: &str, raw_output: &str, estimated_tokens: usize) -> String {
+        format!(
+            "You are a synthesis assistant. The tool `{tool_name}` produced {estimated_tokens} tokens \
+             of output that is too large to include directly in the parent context.\n\n\
+             Summarise the output below into a concise, faithful synthesis of ≤ 800 words. \
+             Preserve key facts, numbers, file paths, error messages, and any actionable \
+             information. Do NOT add commentary or interpretation beyond what is in the source.\n\n\
+             <raw_tool_output>\n{raw_output}\n</raw_tool_output>"
+        )
+    }
+
+    /// Wrap a synthesis result with a workshop provenance header and a hint
+    /// about the stored raw output.
+    #[must_use]
+    pub fn wrap_synthesis(
+        tool_name: &str,
+        synthesis: &str,
+        estimated_tokens: usize,
+        threshold: usize,
+    ) -> String {
+        format!(
+            "[workshop-synthesis: tool={tool_name}, raw_tokens≈{estimated_tokens}, \
+             threshold={threshold}, raw_stored_in={WORKSHOP_LAST_TOOL_RESULT_VAR}]\n\n{synthesis}"
+        )
+    }
+}
+
+// ── Workshop variable store ───────────────────────────────────────────────────
+
+/// In-process store for workshop variables that persist across tool calls
+/// within a session. The only variable exposed today is `last_tool_result`
+/// which holds the most recent raw large-tool output for `promote_to_context`.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct WorkshopVariables {
+    /// Raw content of the most recent large tool output that was routed
+    /// through the workshop. Empty string when no routing has occurred.
+    #[serde(default)]
+    pub last_tool_result: String,
+
+    /// Name of the tool that produced `last_tool_result`.
+    #[serde(default)]
+    pub last_tool_name: String,
+}
+
+impl WorkshopVariables {
+    /// Store the raw output from a large-tool routing event.
+    pub fn store_raw(&mut self, tool_name: &str, raw: &str) {
+        self.last_tool_result = raw.to_string();
+        self.last_tool_name = tool_name.to_string();
+    }
+
+    /// Retrieve and clear the stored raw output (consume semantics so the
+    /// variable is not accidentally promoted twice).
+    ///
+    /// Called by the `promote_to_context` tool (not yet wired in this PR).
+    #[must_use]
+    #[allow(dead_code)] // consumed by promote_to_context tool in follow-up
+    pub fn take_raw(&mut self) -> Option<(String, String)> {
+        if self.last_tool_result.is_empty() {
+            return None;
+        }
+        let content = std::mem::take(&mut self.last_tool_result);
+        let name = std::mem::take(&mut self.last_tool_name);
+        Some((name, content))
+    }
+}
+
+// ── Unit tests ────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_result(content: &str) -> ToolResult {
+        ToolResult::success(content.to_string())
+    }
+
+    #[test]
+    fn pass_through_below_threshold() {
+        let router = LargeOutputRouter::default();
+        let small = "x".repeat(100);
+        let result = make_result(&small);
+        assert_eq!(
+            router.route("read_file", &result, false),
+            RouteDecision::PassThrough
+        );
+    }
+
+    #[test]
+    fn synthesise_above_threshold() {
+        let router = LargeOutputRouter::default();
+        // DEFAULT threshold = 4096 tokens; 3 chars/token → 4096*3 = 12288 chars
+        let big = "a".repeat(13_000);
+        let result = make_result(&big);
+        assert!(matches!(
+            router.route("read_file", &result, false),
+            RouteDecision::Synthesise { .. }
+        ));
+    }
+
+    #[test]
+    fn raw_bypass_skips_routing() {
+        let router = LargeOutputRouter::default();
+        let big = "a".repeat(13_000);
+        let result = make_result(&big);
+        // raw=true → always pass through regardless of size
+        assert_eq!(
+            router.route("exec_shell", &result, true),
+            RouteDecision::PassThrough
+        );
+    }
+
+    #[test]
+    fn error_results_always_pass_through() {
+        let router = LargeOutputRouter::default();
+        let big = "error: ".repeat(2_000);
+        let result = ToolResult::error(big);
+        assert_eq!(
+            router.route("exec_shell", &result, false),
+            RouteDecision::PassThrough
+        );
+    }
+
+    #[test]
+    fn per_tool_threshold_override() {
+        let mut per_tool = HashMap::new();
+        per_tool.insert("grep_files".to_string(), 100); // very low
+        let config = WorkshopConfig {
+            large_output_threshold_tokens: Some(4096),
+            per_tool_thresholds: Some(per_tool),
+        };
+        let router = LargeOutputRouter::new(config);
+        // 100 tokens * 3 = 300 chars → trigger with 400 chars
+        let medium = "b".repeat(400);
+        let result = make_result(&medium);
+        assert!(matches!(
+            router.route("grep_files", &result, false),
+            RouteDecision::Synthesise { .. }
+        ));
+        // Other tools still use the global threshold
+        assert_eq!(
+            router.route("read_file", &result, false),
+            RouteDecision::PassThrough
+        );
+    }
+
+    #[test]
+    fn estimate_tokens_conservative() {
+        // 9 chars → ceil(9/3) = 3 tokens
+        assert_eq!(estimate_tokens("123456789"), 3);
+        // 10 chars → ceil(10/3) = 4 tokens
+        assert_eq!(estimate_tokens("1234567890"), 4);
+        // Empty string
+        assert_eq!(estimate_tokens(""), 0);
+    }
+
+    #[test]
+    fn workshop_variables_store_and_take() {
+        let mut vars = WorkshopVariables::default();
+        assert!(vars.take_raw().is_none());
+
+        vars.store_raw("read_file", "raw content here");
+        let taken = vars.take_raw().expect("should have content");
+        assert_eq!(taken.0, "read_file");
+        assert_eq!(taken.1, "raw content here");
+
+        // Second take is empty — consume semantics
+        assert!(vars.take_raw().is_none());
+    }
+
+    #[test]
+    fn wrap_synthesis_includes_provenance_header() {
+        let wrapped =
+            LargeOutputRouter::wrap_synthesis("web_search", "key facts here", 5000, 4096);
+        assert!(wrapped.contains("workshop-synthesis"));
+        assert!(wrapped.contains("web_search"));
+        assert!(wrapped.contains("5000"));
+        assert!(wrapped.contains("key facts here"));
+    }
+}
@@ -12,6 +12,7 @@ pub mod finance;
 pub mod fetch_url;
 pub mod fim;
 pub mod git;
+pub mod large_output_router;
 pub mod git_history;
 pub mod github;
 pub mod parallel;
@@ -119,6 +119,7 @@ impl ToolRegistry {
    /// Execute a tool with an optional context override.
    ///
    /// This is used for retrying tools with elevated sandbox policies.
+    /// After execution, large results are routed through the workshop (#548).
    pub async fn execute_full_with_context(
        &self,
        name: &str,
@@ -130,7 +131,65 @@ impl ToolRegistry {
            .ok_or_else(|| ToolError::not_available(format!("tool '{name}' is not registered")))?;

        let ctx = context_override.unwrap_or(&self.context);
-        tool.execute(input, ctx).await
+        let result = tool.execute(input.clone(), ctx).await?;
+
+        // Large-output routing (#548): if the result exceeds the threshold and
+        // the caller did not request `raw=true`, synthesise via the workshop.
+        let raw_bypass = input
+            .get("raw")
+            .and_then(|v| v.as_bool())
+            .unwrap_or(false);
+
+        if let Some(router) = ctx.large_output_router.as_ref() {
+            use crate::tools::large_output_router::{LargeOutputRouter, RouteDecision};
+            match router.route(name, &result, raw_bypass) {
+                RouteDecision::PassThrough => {}
+                RouteDecision::Synthesise {
+                    estimated_tokens,
+                    threshold,
+                } => {
+                    // Store the raw output in the workshop variable store.
+                    if let Some(vars_arc) = ctx.workshop_vars.as_ref() {
+                        let mut vars = vars_arc.lock().await;
+                        vars.store_raw(name, &result.content);
+                    }
+
+                    // Build a terse synthesis using the same model the registry
+                    // was constructed for (workshop Flash model). For now we
+                    // produce a structured header + truncated preview without
+                    // a live API call so the engine stays dependency-free at
+                    // the registry layer. A follow-up can wire in the Flash
+                    // client when the async LLM call is safe here.
+                    let preview_chars = 1_200usize;
+                    let preview: String = result
+                        .content
+                        .chars()
+                        .take(preview_chars)
+                        .collect();
+                    let ellipsis = if result.content.chars().count() > preview_chars {
+                        "\n… [output truncated — full text in workshop variable `last_tool_result`]"
+                    } else {
+                        ""
+                    };
+                    let synthesis = format!("{preview}{ellipsis}");
+                    let wrapped = LargeOutputRouter::wrap_synthesis(
+                        name,
+                        &synthesis,
+                        estimated_tokens,
+                        threshold,
+                    );
+                    tracing::debug!(
+                        tool = name,
+                        estimated_tokens,
+                        threshold,
+                        "large-output routed through workshop"
+                    );
+                    return Ok(ToolResult::success(wrapped));
+                }
+            }
+        }
+
+        Ok(result)
    }

    /// Get the current tool context.
@@ -117,6 +117,17 @@ pub struct ToolContext {
    /// need diagnostics. Edit tools append a `<diagnostics>` block to their
    /// result when this is present and the manager is enabled.
    pub lsp_manager: Option<Arc<LspManager>>,
+
+    /// Large-output router (#548). When `Some`, tool results that exceed the
+    /// configured token threshold are routed through a V4-Flash synthesis
+    /// sub-agent before being returned to the parent context. `None` disables
+    /// routing (e.g. in sub-agents and test contexts to avoid recursion).
+    pub large_output_router: Option<crate::tools::large_output_router::LargeOutputRouter>,
+
+    /// Per-session workshop variable store (#548). Holds the raw content of
+    /// the most recent large-tool routing event so the parent can call
+    /// `promote_to_context` later. `None` when the router is disabled.
+    pub workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>>,
 }

 impl ToolContext {
@@ -144,6 +155,8 @@ impl ToolContext {
            cancel_token: None,
            memory_path: None,
            lsp_manager: None,
+            large_output_router: None,
+            workshop_vars: None,
        }
    }

@@ -174,6 +187,8 @@ impl ToolContext {
            cancel_token: None,
            memory_path: None,
            lsp_manager: None,
+            large_output_router: None,
+            workshop_vars: None,
        }
    }

@@ -204,6 +219,8 @@ impl ToolContext {
            cancel_token: None,
            memory_path: None,
            lsp_manager: None,
+            large_output_router: None,
+            workshop_vars: None,
        }
    }

@@ -418,6 +435,20 @@ impl ToolContext {
        self.state_namespace = namespace.into();
        self
    }
+
+    /// Attach the large-output router (#548). When set, tool results that
+    /// exceed the configured token threshold are synthesised by a V4-Flash
+    /// sub-agent before being returned to the parent context.
+    #[must_use]
+    pub fn with_large_output_router(
+        mut self,
+        router: crate::tools::large_output_router::LargeOutputRouter,
+        vars: std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>,
+    ) -> Self {
+        self.large_output_router = Some(router);
+        self.workshop_vars = Some(vars);
+        self
+    }
 }

 /// Gather LSP diagnostics for `paths` using the manager stored in `context`,
@@ -543,6 +543,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
        memory_path: config.memory_path(),
        strict_tool_mode: config.strict_tool_mode.unwrap_or(false),
        goal_objective: app.goal.goal_objective.clone(),
+        workshop: config.workshop.clone(),
    }
 }