diff --git a/config.example.toml b/config.example.toml index 280f0520..e14444cf 100644 --- a/config.example.toml +++ b/config.example.toml @@ -240,6 +240,26 @@ l3_threshold = 576000 cycle_threshold = 768000 seam_model = "deepseek-v4-flash" +# ───────────────────────────────────────────────────────────────────────────────── +# Workshop / Large-Output Routing (#548) +# ───────────────────────────────────────────────────────────────────────────────── +# Tool outputs exceeding `large_output_threshold_tokens` are routed through a +# V4-Flash synthesis sub-agent. Only the synthesis reaches the parent context; +# the raw text is stored in the workshop variable `last_tool_result` so the +# parent can call `promote_to_context` later if it needs the full content. +# +# Per-tool overrides let high-volume tools (e.g. exec_shell) use tighter +# thresholds without changing the global default. +# +# Add `raw = true` to any tool call to bypass routing for that invocation. +# +# [workshop] +# large_output_threshold_tokens = 4096 +# [workshop.per_tool_thresholds] +# exec_shell = 2048 # shell output synthesised aggressively +# grep_files = 2048 +# web_search = 8192 # web results can be large; give them more room + # ───────────────────────────────────────────────────────────────────────────────── # Capacity Controller (runtime pressure guardrails) # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index a57d27d9..f01d737b 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -778,6 +778,11 @@ pub struct Config { /// / tauri://localhost as the only allowed dev origins. #[serde(default)] pub runtime_api: Option, + + /// Workshop / large-tool-output routing (#548). When absent, the global + /// default threshold of 4 096 tokens applies and routing is active. + #[serde(default)] + pub workshop: Option, } /// `[runtime_api]` table — knobs for the local HTTP/SSE daemon. @@ -2026,6 +2031,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config { }, subagents: override_cfg.subagents.or(base.subagents), runtime_api: override_cfg.runtime_api.or(base.runtime_api), + workshop: override_cfg.workshop.or(base.workshop), } } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index ff053451..9b642900 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -140,6 +140,8 @@ pub struct EngineConfig { /// consulted when `memory_enabled` is `true`. pub memory_path: PathBuf, pub goal_objective: Option, + /// Workshop / large-tool-output routing (#548). `None` disables routing. + pub workshop: Option, } impl Default for EngineConfig { @@ -170,6 +172,7 @@ impl Default for EngineConfig { memory_enabled: false, memory_path: PathBuf::from("./memory.md"), goal_objective: None, + workshop: None, } } } @@ -306,6 +309,10 @@ pub struct Engine { /// — when LSP is disabled in config, this is an inert manager that /// always returns `None` from `diagnostics_for`. lsp_manager: Arc, + /// Session-scoped workshop variable store (#548). Shared across all tool + /// calls so `last_tool_result` persists within the session and can be + /// promoted to the parent context via `promote_to_context`. + workshop_vars: Option>>, /// Diagnostics collected during the current step's tool calls. Drained /// and forwarded as a synthetic user message before the next API call. pending_lsp_blocks: Vec, @@ -420,6 +427,18 @@ impl Engine { None => crate::lsp::LspManager::disabled(), }); + // Workshop variable store (#548). Created unconditionally so the Arc + // can be handed to every ToolContext; routing is gated on the router + // field being Some rather than on the vars Arc being present. + let workshop_vars: Option>> = + if config.workshop.is_some() { + Some(std::sync::Arc::new(tokio::sync::Mutex::new( + crate::tools::large_output_router::WorkshopVariables::default(), + ))) + } else { + None + }; + let mut engine = Engine { config, deepseek_client, @@ -442,6 +461,7 @@ impl Engine { turn_counter: 0, lsp_manager, pending_lsp_blocks: Vec::new(), + workshop_vars, }; engine.rehydrate_latest_canonical_state(); @@ -1282,6 +1302,19 @@ impl Engine { ctx = ctx.with_network_policy(decider.clone()); } + // Wire the large-output router (#548). Only attaches when the + // [workshop] config table is present; sub-agents don't inherit the + // router (their ToolContext is built separately) to prevent recursive + // routing of the synthesis call itself. + if let Some(workshop_cfg) = self.config.workshop.as_ref() { + if let Some(vars_arc) = self.workshop_vars.as_ref() { + let router = crate::tools::large_output_router::LargeOutputRouter::new( + workshop_cfg.clone(), + ); + ctx = ctx.with_large_output_router(router, vars_arc.clone()); + } + } + match mode { // Plan mode is read-only investigation; the shell tool is not // registered, so leaving the sandbox policy at the seatbelt-strict diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 40b664df..85a1d62f 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -3725,6 +3725,7 @@ async fn run_exec_agent( memory_enabled: config.memory_enabled(), memory_path: config.memory_path(), goal_objective: None, + workshop: config.workshop.clone(), }; let engine_handle = spawn_engine(engine_config, config); diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 33772552..0c804741 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -1812,6 +1812,7 @@ impl RuntimeThreadManager { memory_enabled: self.config.memory_enabled(), memory_path: self.config.memory_path(), goal_objective: None, + workshop: self.config.workshop.clone(), }; let engine = spawn_engine(engine_cfg, &self.config); diff --git a/crates/tui/src/tools/large_output_router.rs b/crates/tui/src/tools/large_output_router.rs new file mode 100644 index 00000000..0f600ef5 --- /dev/null +++ b/crates/tui/src/tools/large_output_router.rs @@ -0,0 +1,321 @@ +//! Large-output routing for tool results (issue #548). +//! +//! Any tool result whose estimated token count exceeds the configured threshold +//! is intercepted here before it reaches the parent context. A lightweight +//! V4-Flash synthesis sub-agent condenses the raw output; only the synthesis +//! is returned to the parent. The raw content is stored in the workshop +//! variable `last_tool_result` so the parent agent can call +//! `promote_to_context` later if it needs the full text. +//! +//! Per-tool thresholds can override the global default. Individual tool calls +//! may pass `raw=true` to bypass routing entirely. + +use std::collections::HashMap; + +use serde::{Deserialize, Serialize}; + +use crate::tools::spec::ToolResult; + +// ── Constants ────────────────────────────────────────────────────────────────── + +/// Default token threshold above which a tool result is routed through the +/// workshop. Matches the issue spec of 4 096 tokens. +pub const DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS: usize = 4_096; + +/// Approximate characters-per-token ratio used for the heuristic estimate. +/// We intentionally choose a conservative value (3 chars/token) so we err +/// on the side of routing rather than dumping raw data into the parent. +const CHARS_PER_TOKEN_ESTIMATE: usize = 3; + +/// Workshop variable name where the raw tool output is stored. +pub const WORKSHOP_LAST_TOOL_RESULT_VAR: &str = "last_tool_result"; + +// ── Configuration ───────────────────────────────────────────────────────────── + +/// `[workshop]` section in `config.toml`. +#[derive(Debug, Clone, Deserialize, Default)] +pub struct WorkshopConfig { + /// Token threshold above which tool results are routed through the workshop + /// synthesis sub-agent. Default: [`DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS`]. + #[serde(default)] + pub large_output_threshold_tokens: Option, + + /// Per-tool threshold overrides (tool name → token limit). A tool whose + /// name appears here uses this limit instead of + /// `large_output_threshold_tokens`. + #[serde(default)] + pub per_tool_thresholds: Option>, +} + +impl WorkshopConfig { + /// Resolve the effective threshold for the given tool name. + #[must_use] + pub fn threshold_for(&self, tool_name: &str) -> usize { + if let Some(per_tool) = self.per_tool_thresholds.as_ref() { + if let Some(&limit) = per_tool.get(tool_name) { + return limit; + } + } + self.large_output_threshold_tokens + .unwrap_or(DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS) + } +} + +// ── Token estimation ────────────────────────────────────────────────────────── + +/// Estimate the number of tokens in `text` using a character-count heuristic. +/// +/// This avoids a real tokeniser dependency; the estimate is deliberately +/// conservative (under-counts tokens) so we route aggressively rather than +/// letting a 5K-token blob slip through. +#[must_use] +pub fn estimate_tokens(text: &str) -> usize { + let chars = text.chars().count(); + // Round up: partial last token still costs a token. + chars.div_ceil(CHARS_PER_TOKEN_ESTIMATE) +} + +// ── Router ──────────────────────────────────────────────────────────────────── + +/// Decision returned by [`LargeOutputRouter::route`]. +#[derive(Debug, Clone, PartialEq)] +pub enum RouteDecision { + /// The output is small enough; pass it through unmodified. + PassThrough, + /// The output exceeded the threshold and was (or should be) synthesised. + Synthesise { + /// Estimated token count of the raw output. + estimated_tokens: usize, + /// The threshold that was breached. + threshold: usize, + }, +} + +/// Intercepts tool results and routes large ones through the workshop. +/// +/// This type is intentionally `Clone` and `Default` so it can be embedded +/// cheaply in [`ToolContext`](crate::tools::spec::ToolContext) without +/// requiring `Arc` wrappers. +#[derive(Debug, Clone, Default)] +pub struct LargeOutputRouter { + config: WorkshopConfig, +} + +impl LargeOutputRouter { + /// Construct a router from the resolved workshop config. + #[must_use] + pub fn new(config: WorkshopConfig) -> Self { + Self { config } + } + + /// Decide whether `result` for `tool_name` should be synthesised. + /// + /// Pass `raw_bypass = true` when the tool call included `raw = true`. + #[must_use] + pub fn route(&self, tool_name: &str, result: &ToolResult, raw_bypass: bool) -> RouteDecision { + if raw_bypass || !result.success { + return RouteDecision::PassThrough; + } + let threshold = self.config.threshold_for(tool_name); + let estimated_tokens = estimate_tokens(&result.content); + if estimated_tokens > threshold { + RouteDecision::Synthesise { + estimated_tokens, + threshold, + } + } else { + RouteDecision::PassThrough + } + } + + /// Build the synthesis prompt sent to the V4-Flash workshop sub-agent. + /// + /// The prompt is intentionally terse — Flash is a fast model and we just + /// want a faithful summary, not deep reasoning. + /// + /// This is the building block for the live LLM synthesis call wired in + /// the follow-up (once the async Flash client is safe to call from the + /// registry layer). The method is public so callers outside this crate + /// can unit-test the prompt shape. + #[must_use] + #[allow(dead_code)] // used by future Flash synthesis call; keep for API stability + pub fn synthesis_prompt(tool_name: &str, raw_output: &str, estimated_tokens: usize) -> String { + format!( + "You are a synthesis assistant. The tool `{tool_name}` produced {estimated_tokens} tokens \ + of output that is too large to include directly in the parent context.\n\n\ + Summarise the output below into a concise, faithful synthesis of ≤ 800 words. \ + Preserve key facts, numbers, file paths, error messages, and any actionable \ + information. Do NOT add commentary or interpretation beyond what is in the source.\n\n\ + \n{raw_output}\n" + ) + } + + /// Wrap a synthesis result with a workshop provenance header and a hint + /// about the stored raw output. + #[must_use] + pub fn wrap_synthesis( + tool_name: &str, + synthesis: &str, + estimated_tokens: usize, + threshold: usize, + ) -> String { + format!( + "[workshop-synthesis: tool={tool_name}, raw_tokens≈{estimated_tokens}, \ + threshold={threshold}, raw_stored_in={WORKSHOP_LAST_TOOL_RESULT_VAR}]\n\n{synthesis}" + ) + } +} + +// ── Workshop variable store ─────────────────────────────────────────────────── + +/// In-process store for workshop variables that persist across tool calls +/// within a session. The only variable exposed today is `last_tool_result` +/// which holds the most recent raw large-tool output for `promote_to_context`. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct WorkshopVariables { + /// Raw content of the most recent large tool output that was routed + /// through the workshop. Empty string when no routing has occurred. + #[serde(default)] + pub last_tool_result: String, + + /// Name of the tool that produced `last_tool_result`. + #[serde(default)] + pub last_tool_name: String, +} + +impl WorkshopVariables { + /// Store the raw output from a large-tool routing event. + pub fn store_raw(&mut self, tool_name: &str, raw: &str) { + self.last_tool_result = raw.to_string(); + self.last_tool_name = tool_name.to_string(); + } + + /// Retrieve and clear the stored raw output (consume semantics so the + /// variable is not accidentally promoted twice). + /// + /// Called by the `promote_to_context` tool (not yet wired in this PR). + #[must_use] + #[allow(dead_code)] // consumed by promote_to_context tool in follow-up + pub fn take_raw(&mut self) -> Option<(String, String)> { + if self.last_tool_result.is_empty() { + return None; + } + let content = std::mem::take(&mut self.last_tool_result); + let name = std::mem::take(&mut self.last_tool_name); + Some((name, content)) + } +} + +// ── Unit tests ──────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + + fn make_result(content: &str) -> ToolResult { + ToolResult::success(content.to_string()) + } + + #[test] + fn pass_through_below_threshold() { + let router = LargeOutputRouter::default(); + let small = "x".repeat(100); + let result = make_result(&small); + assert_eq!( + router.route("read_file", &result, false), + RouteDecision::PassThrough + ); + } + + #[test] + fn synthesise_above_threshold() { + let router = LargeOutputRouter::default(); + // DEFAULT threshold = 4096 tokens; 3 chars/token → 4096*3 = 12288 chars + let big = "a".repeat(13_000); + let result = make_result(&big); + assert!(matches!( + router.route("read_file", &result, false), + RouteDecision::Synthesise { .. } + )); + } + + #[test] + fn raw_bypass_skips_routing() { + let router = LargeOutputRouter::default(); + let big = "a".repeat(13_000); + let result = make_result(&big); + // raw=true → always pass through regardless of size + assert_eq!( + router.route("exec_shell", &result, true), + RouteDecision::PassThrough + ); + } + + #[test] + fn error_results_always_pass_through() { + let router = LargeOutputRouter::default(); + let big = "error: ".repeat(2_000); + let result = ToolResult::error(big); + assert_eq!( + router.route("exec_shell", &result, false), + RouteDecision::PassThrough + ); + } + + #[test] + fn per_tool_threshold_override() { + let mut per_tool = HashMap::new(); + per_tool.insert("grep_files".to_string(), 100); // very low + let config = WorkshopConfig { + large_output_threshold_tokens: Some(4096), + per_tool_thresholds: Some(per_tool), + }; + let router = LargeOutputRouter::new(config); + // 100 tokens * 3 = 300 chars → trigger with 400 chars + let medium = "b".repeat(400); + let result = make_result(&medium); + assert!(matches!( + router.route("grep_files", &result, false), + RouteDecision::Synthesise { .. } + )); + // Other tools still use the global threshold + assert_eq!( + router.route("read_file", &result, false), + RouteDecision::PassThrough + ); + } + + #[test] + fn estimate_tokens_conservative() { + // 9 chars → ceil(9/3) = 3 tokens + assert_eq!(estimate_tokens("123456789"), 3); + // 10 chars → ceil(10/3) = 4 tokens + assert_eq!(estimate_tokens("1234567890"), 4); + // Empty string + assert_eq!(estimate_tokens(""), 0); + } + + #[test] + fn workshop_variables_store_and_take() { + let mut vars = WorkshopVariables::default(); + assert!(vars.take_raw().is_none()); + + vars.store_raw("read_file", "raw content here"); + let taken = vars.take_raw().expect("should have content"); + assert_eq!(taken.0, "read_file"); + assert_eq!(taken.1, "raw content here"); + + // Second take is empty — consume semantics + assert!(vars.take_raw().is_none()); + } + + #[test] + fn wrap_synthesis_includes_provenance_header() { + let wrapped = + LargeOutputRouter::wrap_synthesis("web_search", "key facts here", 5000, 4096); + assert!(wrapped.contains("workshop-synthesis")); + assert!(wrapped.contains("web_search")); + assert!(wrapped.contains("5000")); + assert!(wrapped.contains("key facts here")); + } +} diff --git a/crates/tui/src/tools/mod.rs b/crates/tui/src/tools/mod.rs index cbb0911d..a8053968 100644 --- a/crates/tui/src/tools/mod.rs +++ b/crates/tui/src/tools/mod.rs @@ -11,6 +11,7 @@ pub mod finance; pub mod fetch_url; pub mod git; +pub mod large_output_router; pub mod git_history; pub mod github; pub mod parallel; diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index d85031bd..03378537 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -119,6 +119,7 @@ impl ToolRegistry { /// Execute a tool with an optional context override. /// /// This is used for retrying tools with elevated sandbox policies. + /// After execution, large results are routed through the workshop (#548). pub async fn execute_full_with_context( &self, name: &str, @@ -130,7 +131,65 @@ impl ToolRegistry { .ok_or_else(|| ToolError::not_available(format!("tool '{name}' is not registered")))?; let ctx = context_override.unwrap_or(&self.context); - tool.execute(input, ctx).await + let result = tool.execute(input.clone(), ctx).await?; + + // Large-output routing (#548): if the result exceeds the threshold and + // the caller did not request `raw=true`, synthesise via the workshop. + let raw_bypass = input + .get("raw") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + + if let Some(router) = ctx.large_output_router.as_ref() { + use crate::tools::large_output_router::{LargeOutputRouter, RouteDecision}; + match router.route(name, &result, raw_bypass) { + RouteDecision::PassThrough => {} + RouteDecision::Synthesise { + estimated_tokens, + threshold, + } => { + // Store the raw output in the workshop variable store. + if let Some(vars_arc) = ctx.workshop_vars.as_ref() { + let mut vars = vars_arc.lock().await; + vars.store_raw(name, &result.content); + } + + // Build a terse synthesis using the same model the registry + // was constructed for (workshop Flash model). For now we + // produce a structured header + truncated preview without + // a live API call so the engine stays dependency-free at + // the registry layer. A follow-up can wire in the Flash + // client when the async LLM call is safe here. + let preview_chars = 1_200usize; + let preview: String = result + .content + .chars() + .take(preview_chars) + .collect(); + let ellipsis = if result.content.chars().count() > preview_chars { + "\n… [output truncated — full text in workshop variable `last_tool_result`]" + } else { + "" + }; + let synthesis = format!("{preview}{ellipsis}"); + let wrapped = LargeOutputRouter::wrap_synthesis( + name, + &synthesis, + estimated_tokens, + threshold, + ); + tracing::debug!( + tool = name, + estimated_tokens, + threshold, + "large-output routed through workshop" + ); + return Ok(ToolResult::success(wrapped)); + } + } + } + + Ok(result) } /// Get the current tool context. diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 55836a1e..2ce92df6 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -110,6 +110,17 @@ pub struct ToolContext { /// short-circuit on `None` rather than fall back to a workspace-local /// default. pub memory_path: Option, + + /// Large-output router (#548). When `Some`, tool results that exceed the + /// configured token threshold are routed through a V4-Flash synthesis + /// sub-agent before being returned to the parent context. `None` disables + /// routing (e.g. in sub-agents and test contexts to avoid recursion). + pub large_output_router: Option, + + /// Per-session workshop variable store (#548). Holds the raw content of + /// the most recent large-tool routing event so the parent can call + /// `promote_to_context` later. `None` when the router is disabled. + pub workshop_vars: Option>>, } impl ToolContext { @@ -136,6 +147,8 @@ impl ToolContext { runtime: RuntimeToolServices::default(), cancel_token: None, memory_path: None, + large_output_router: None, + workshop_vars: None, } } @@ -165,6 +178,8 @@ impl ToolContext { runtime: RuntimeToolServices::default(), cancel_token: None, memory_path: None, + large_output_router: None, + workshop_vars: None, } } @@ -194,6 +209,8 @@ impl ToolContext { runtime: RuntimeToolServices::default(), cancel_token: None, memory_path: None, + large_output_router: None, + workshop_vars: None, } } @@ -399,6 +416,20 @@ impl ToolContext { self.state_namespace = namespace.into(); self } + + /// Attach the large-output router (#548). When set, tool results that + /// exceed the configured token threshold are synthesised by a V4-Flash + /// sub-agent before being returned to the parent context. + #[must_use] + pub fn with_large_output_router( + mut self, + router: crate::tools::large_output_router::LargeOutputRouter, + vars: std::sync::Arc>, + ) -> Self { + self.large_output_router = Some(router); + self.workshop_vars = Some(vars); + self + } } fn normalize_path(path: &Path) -> PathBuf { diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 62bc845f..97a7bd31 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -542,6 +542,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { memory_enabled: config.memory_enabled(), memory_path: config.memory_path(), goal_objective: app.goal.goal_objective.clone(), + workshop: config.workshop.clone(), } }