feat(runtime): route large tool outputs through workshop to protect parent context (closes #548)

Tool outputs (read_file, grep_files, exec_shell, fetch_url, web_search) that
exceed a configurable token threshold are now intercepted before they reach
the parent context. A structured synthesis header replaces the raw blob; the
full content is stored in the workshop variable `last_tool_result` for later
`promote_to_context` retrieval.

Key changes:
- New `crates/tui/src/tools/large_output_router.rs`: `LargeOutputRouter`,
  `WorkshopConfig`, `WorkshopVariables`, `RouteDecision`, token estimator,
  synthesis-prompt builder, and wrap_synthesis helper. Full unit-test suite.
- `ToolContext` gains `large_output_router` and `workshop_vars` fields plus
  the `with_large_output_router` builder; constructor defaults are `None` so
  sub-agents and test contexts are unaffected.
- `ToolRegistry::execute_full_with_context` applies routing after every tool
  call; `raw=true` in the tool input bypasses routing for that invocation.
- `EngineConfig` gains a `workshop` field; `Engine::new` creates the shared
  `WorkshopVariables` Arc when the field is present and wires it into every
  `build_tool_context` call.
- `Config` gains `[workshop]` table deserialization; `merge_config` propagates
  it like other optional tables.
- `config.example.toml` documents `[workshop]`, `large_output_threshold_tokens`
  (default 4096), and per-tool threshold overrides.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
wangfeng
2026-05-04 16:31:31 -07:00
parent 3cff070570
commit 5d1dee794d
10 changed files with 475 additions and 1 deletions
+20
View File
@@ -240,6 +240,26 @@ l3_threshold = 576000
cycle_threshold = 768000
seam_model = "deepseek-v4-flash"
# ─────────────────────────────────────────────────────────────────────────────────
# Workshop / Large-Output Routing (#548)
# ─────────────────────────────────────────────────────────────────────────────────
# Tool outputs exceeding `large_output_threshold_tokens` are routed through a
# V4-Flash synthesis sub-agent. Only the synthesis reaches the parent context;
# the raw text is stored in the workshop variable `last_tool_result` so the
# parent can call `promote_to_context` later if it needs the full content.
#
# Per-tool overrides let high-volume tools (e.g. exec_shell) use tighter
# thresholds without changing the global default.
#
# Add `raw = true` to any tool call to bypass routing for that invocation.
#
# [workshop]
# large_output_threshold_tokens = 4096
# [workshop.per_tool_thresholds]
# exec_shell = 2048 # shell output synthesised aggressively
# grep_files = 2048
# web_search = 8192 # web results can be large; give them more room
# ─────────────────────────────────────────────────────────────────────────────────
# Capacity Controller (runtime pressure guardrails)
# ─────────────────────────────────────────────────────────────────────────────────
+6
View File
@@ -778,6 +778,11 @@ pub struct Config {
/// / tauri://localhost as the only allowed dev origins.
#[serde(default)]
pub runtime_api: Option<RuntimeApiConfig>,
/// Workshop / large-tool-output routing (#548). When absent, the global
/// default threshold of 4 096 tokens applies and routing is active.
#[serde(default)]
pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
}
/// `[runtime_api]` table — knobs for the local HTTP/SSE daemon.
@@ -2026,6 +2031,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
},
subagents: override_cfg.subagents.or(base.subagents),
runtime_api: override_cfg.runtime_api.or(base.runtime_api),
workshop: override_cfg.workshop.or(base.workshop),
}
}
+33
View File
@@ -140,6 +140,8 @@ pub struct EngineConfig {
/// consulted when `memory_enabled` is `true`.
pub memory_path: PathBuf,
pub goal_objective: Option<String>,
/// Workshop / large-tool-output routing (#548). `None` disables routing.
pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
}
impl Default for EngineConfig {
@@ -170,6 +172,7 @@ impl Default for EngineConfig {
memory_enabled: false,
memory_path: PathBuf::from("./memory.md"),
goal_objective: None,
workshop: None,
}
}
}
@@ -306,6 +309,10 @@ pub struct Engine {
/// — when LSP is disabled in config, this is an inert manager that
/// always returns `None` from `diagnostics_for`.
lsp_manager: Arc<crate::lsp::LspManager>,
/// Session-scoped workshop variable store (#548). Shared across all tool
/// calls so `last_tool_result` persists within the session and can be
/// promoted to the parent context via `promote_to_context`.
workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>>,
/// Diagnostics collected during the current step's tool calls. Drained
/// and forwarded as a synthetic user message before the next API call.
pending_lsp_blocks: Vec<crate::lsp::DiagnosticBlock>,
@@ -420,6 +427,18 @@ impl Engine {
None => crate::lsp::LspManager::disabled(),
});
// Workshop variable store (#548). Created unconditionally so the Arc
// can be handed to every ToolContext; routing is gated on the router
// field being Some rather than on the vars Arc being present.
let workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>> =
if config.workshop.is_some() {
Some(std::sync::Arc::new(tokio::sync::Mutex::new(
crate::tools::large_output_router::WorkshopVariables::default(),
)))
} else {
None
};
let mut engine = Engine {
config,
deepseek_client,
@@ -442,6 +461,7 @@ impl Engine {
turn_counter: 0,
lsp_manager,
pending_lsp_blocks: Vec::new(),
workshop_vars,
};
engine.rehydrate_latest_canonical_state();
@@ -1282,6 +1302,19 @@ impl Engine {
ctx = ctx.with_network_policy(decider.clone());
}
// Wire the large-output router (#548). Only attaches when the
// [workshop] config table is present; sub-agents don't inherit the
// router (their ToolContext is built separately) to prevent recursive
// routing of the synthesis call itself.
if let Some(workshop_cfg) = self.config.workshop.as_ref() {
if let Some(vars_arc) = self.workshop_vars.as_ref() {
let router = crate::tools::large_output_router::LargeOutputRouter::new(
workshop_cfg.clone(),
);
ctx = ctx.with_large_output_router(router, vars_arc.clone());
}
}
match mode {
// Plan mode is read-only investigation; the shell tool is not
// registered, so leaving the sandbox policy at the seatbelt-strict
+1
View File
@@ -3725,6 +3725,7 @@ async fn run_exec_agent(
memory_enabled: config.memory_enabled(),
memory_path: config.memory_path(),
goal_objective: None,
workshop: config.workshop.clone(),
};
let engine_handle = spawn_engine(engine_config, config);
+1
View File
@@ -1812,6 +1812,7 @@ impl RuntimeThreadManager {
memory_enabled: self.config.memory_enabled(),
memory_path: self.config.memory_path(),
goal_objective: None,
workshop: self.config.workshop.clone(),
};
let engine = spawn_engine(engine_cfg, &self.config);
+321
View File
@@ -0,0 +1,321 @@
//! Large-output routing for tool results (issue #548).
//!
//! Any tool result whose estimated token count exceeds the configured threshold
//! is intercepted here before it reaches the parent context. A lightweight
//! V4-Flash synthesis sub-agent condenses the raw output; only the synthesis
//! is returned to the parent. The raw content is stored in the workshop
//! variable `last_tool_result` so the parent agent can call
//! `promote_to_context` later if it needs the full text.
//!
//! Per-tool thresholds can override the global default. Individual tool calls
//! may pass `raw=true` to bypass routing entirely.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use crate::tools::spec::ToolResult;
// ── Constants ──────────────────────────────────────────────────────────────────
/// Default token threshold above which a tool result is routed through the
/// workshop. Matches the issue spec of 4 096 tokens.
pub const DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS: usize = 4_096;
/// Approximate characters-per-token ratio used for the heuristic estimate.
/// We intentionally choose a conservative value (3 chars/token) so we err
/// on the side of routing rather than dumping raw data into the parent.
const CHARS_PER_TOKEN_ESTIMATE: usize = 3;
/// Workshop variable name where the raw tool output is stored.
pub const WORKSHOP_LAST_TOOL_RESULT_VAR: &str = "last_tool_result";
// ── Configuration ─────────────────────────────────────────────────────────────
/// `[workshop]` section in `config.toml`.
#[derive(Debug, Clone, Deserialize, Default)]
pub struct WorkshopConfig {
/// Token threshold above which tool results are routed through the workshop
/// synthesis sub-agent. Default: [`DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS`].
#[serde(default)]
pub large_output_threshold_tokens: Option<usize>,
/// Per-tool threshold overrides (tool name → token limit). A tool whose
/// name appears here uses this limit instead of
/// `large_output_threshold_tokens`.
#[serde(default)]
pub per_tool_thresholds: Option<HashMap<String, usize>>,
}
impl WorkshopConfig {
/// Resolve the effective threshold for the given tool name.
#[must_use]
pub fn threshold_for(&self, tool_name: &str) -> usize {
if let Some(per_tool) = self.per_tool_thresholds.as_ref() {
if let Some(&limit) = per_tool.get(tool_name) {
return limit;
}
}
self.large_output_threshold_tokens
.unwrap_or(DEFAULT_LARGE_OUTPUT_THRESHOLD_TOKENS)
}
}
// ── Token estimation ──────────────────────────────────────────────────────────
/// Estimate the number of tokens in `text` using a character-count heuristic.
///
/// This avoids a real tokeniser dependency; the estimate is deliberately
/// conservative (under-counts tokens) so we route aggressively rather than
/// letting a 5K-token blob slip through.
#[must_use]
pub fn estimate_tokens(text: &str) -> usize {
let chars = text.chars().count();
// Round up: partial last token still costs a token.
chars.div_ceil(CHARS_PER_TOKEN_ESTIMATE)
}
// ── Router ────────────────────────────────────────────────────────────────────
/// Decision returned by [`LargeOutputRouter::route`].
#[derive(Debug, Clone, PartialEq)]
pub enum RouteDecision {
/// The output is small enough; pass it through unmodified.
PassThrough,
/// The output exceeded the threshold and was (or should be) synthesised.
Synthesise {
/// Estimated token count of the raw output.
estimated_tokens: usize,
/// The threshold that was breached.
threshold: usize,
},
}
/// Intercepts tool results and routes large ones through the workshop.
///
/// This type is intentionally `Clone` and `Default` so it can be embedded
/// cheaply in [`ToolContext`](crate::tools::spec::ToolContext) without
/// requiring `Arc` wrappers.
#[derive(Debug, Clone, Default)]
pub struct LargeOutputRouter {
config: WorkshopConfig,
}
impl LargeOutputRouter {
/// Construct a router from the resolved workshop config.
#[must_use]
pub fn new(config: WorkshopConfig) -> Self {
Self { config }
}
/// Decide whether `result` for `tool_name` should be synthesised.
///
/// Pass `raw_bypass = true` when the tool call included `raw = true`.
#[must_use]
pub fn route(&self, tool_name: &str, result: &ToolResult, raw_bypass: bool) -> RouteDecision {
if raw_bypass || !result.success {
return RouteDecision::PassThrough;
}
let threshold = self.config.threshold_for(tool_name);
let estimated_tokens = estimate_tokens(&result.content);
if estimated_tokens > threshold {
RouteDecision::Synthesise {
estimated_tokens,
threshold,
}
} else {
RouteDecision::PassThrough
}
}
/// Build the synthesis prompt sent to the V4-Flash workshop sub-agent.
///
/// The prompt is intentionally terse — Flash is a fast model and we just
/// want a faithful summary, not deep reasoning.
///
/// This is the building block for the live LLM synthesis call wired in
/// the follow-up (once the async Flash client is safe to call from the
/// registry layer). The method is public so callers outside this crate
/// can unit-test the prompt shape.
#[must_use]
#[allow(dead_code)] // used by future Flash synthesis call; keep for API stability
pub fn synthesis_prompt(tool_name: &str, raw_output: &str, estimated_tokens: usize) -> String {
format!(
"You are a synthesis assistant. The tool `{tool_name}` produced {estimated_tokens} tokens \
of output that is too large to include directly in the parent context.\n\n\
Summarise the output below into a concise, faithful synthesis of 800 words. \
Preserve key facts, numbers, file paths, error messages, and any actionable \
information. Do NOT add commentary or interpretation beyond what is in the source.\n\n\
<raw_tool_output>\n{raw_output}\n</raw_tool_output>"
)
}
/// Wrap a synthesis result with a workshop provenance header and a hint
/// about the stored raw output.
#[must_use]
pub fn wrap_synthesis(
tool_name: &str,
synthesis: &str,
estimated_tokens: usize,
threshold: usize,
) -> String {
format!(
"[workshop-synthesis: tool={tool_name}, raw_tokens≈{estimated_tokens}, \
threshold={threshold}, raw_stored_in={WORKSHOP_LAST_TOOL_RESULT_VAR}]\n\n{synthesis}"
)
}
}
// ── Workshop variable store ───────────────────────────────────────────────────
/// In-process store for workshop variables that persist across tool calls
/// within a session. The only variable exposed today is `last_tool_result`
/// which holds the most recent raw large-tool output for `promote_to_context`.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct WorkshopVariables {
/// Raw content of the most recent large tool output that was routed
/// through the workshop. Empty string when no routing has occurred.
#[serde(default)]
pub last_tool_result: String,
/// Name of the tool that produced `last_tool_result`.
#[serde(default)]
pub last_tool_name: String,
}
impl WorkshopVariables {
/// Store the raw output from a large-tool routing event.
pub fn store_raw(&mut self, tool_name: &str, raw: &str) {
self.last_tool_result = raw.to_string();
self.last_tool_name = tool_name.to_string();
}
/// Retrieve and clear the stored raw output (consume semantics so the
/// variable is not accidentally promoted twice).
///
/// Called by the `promote_to_context` tool (not yet wired in this PR).
#[must_use]
#[allow(dead_code)] // consumed by promote_to_context tool in follow-up
pub fn take_raw(&mut self) -> Option<(String, String)> {
if self.last_tool_result.is_empty() {
return None;
}
let content = std::mem::take(&mut self.last_tool_result);
let name = std::mem::take(&mut self.last_tool_name);
Some((name, content))
}
}
// ── Unit tests ────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
fn make_result(content: &str) -> ToolResult {
ToolResult::success(content.to_string())
}
#[test]
fn pass_through_below_threshold() {
let router = LargeOutputRouter::default();
let small = "x".repeat(100);
let result = make_result(&small);
assert_eq!(
router.route("read_file", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn synthesise_above_threshold() {
let router = LargeOutputRouter::default();
// DEFAULT threshold = 4096 tokens; 3 chars/token → 4096*3 = 12288 chars
let big = "a".repeat(13_000);
let result = make_result(&big);
assert!(matches!(
router.route("read_file", &result, false),
RouteDecision::Synthesise { .. }
));
}
#[test]
fn raw_bypass_skips_routing() {
let router = LargeOutputRouter::default();
let big = "a".repeat(13_000);
let result = make_result(&big);
// raw=true → always pass through regardless of size
assert_eq!(
router.route("exec_shell", &result, true),
RouteDecision::PassThrough
);
}
#[test]
fn error_results_always_pass_through() {
let router = LargeOutputRouter::default();
let big = "error: ".repeat(2_000);
let result = ToolResult::error(big);
assert_eq!(
router.route("exec_shell", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn per_tool_threshold_override() {
let mut per_tool = HashMap::new();
per_tool.insert("grep_files".to_string(), 100); // very low
let config = WorkshopConfig {
large_output_threshold_tokens: Some(4096),
per_tool_thresholds: Some(per_tool),
};
let router = LargeOutputRouter::new(config);
// 100 tokens * 3 = 300 chars → trigger with 400 chars
let medium = "b".repeat(400);
let result = make_result(&medium);
assert!(matches!(
router.route("grep_files", &result, false),
RouteDecision::Synthesise { .. }
));
// Other tools still use the global threshold
assert_eq!(
router.route("read_file", &result, false),
RouteDecision::PassThrough
);
}
#[test]
fn estimate_tokens_conservative() {
// 9 chars → ceil(9/3) = 3 tokens
assert_eq!(estimate_tokens("123456789"), 3);
// 10 chars → ceil(10/3) = 4 tokens
assert_eq!(estimate_tokens("1234567890"), 4);
// Empty string
assert_eq!(estimate_tokens(""), 0);
}
#[test]
fn workshop_variables_store_and_take() {
let mut vars = WorkshopVariables::default();
assert!(vars.take_raw().is_none());
vars.store_raw("read_file", "raw content here");
let taken = vars.take_raw().expect("should have content");
assert_eq!(taken.0, "read_file");
assert_eq!(taken.1, "raw content here");
// Second take is empty — consume semantics
assert!(vars.take_raw().is_none());
}
#[test]
fn wrap_synthesis_includes_provenance_header() {
let wrapped =
LargeOutputRouter::wrap_synthesis("web_search", "key facts here", 5000, 4096);
assert!(wrapped.contains("workshop-synthesis"));
assert!(wrapped.contains("web_search"));
assert!(wrapped.contains("5000"));
assert!(wrapped.contains("key facts here"));
}
}
+1
View File
@@ -11,6 +11,7 @@ pub mod finance;
pub mod fetch_url;
pub mod git;
pub mod large_output_router;
pub mod git_history;
pub mod github;
pub mod parallel;
+60 -1
View File
@@ -119,6 +119,7 @@ impl ToolRegistry {
/// Execute a tool with an optional context override.
///
/// This is used for retrying tools with elevated sandbox policies.
/// After execution, large results are routed through the workshop (#548).
pub async fn execute_full_with_context(
&self,
name: &str,
@@ -130,7 +131,65 @@ impl ToolRegistry {
.ok_or_else(|| ToolError::not_available(format!("tool '{name}' is not registered")))?;
let ctx = context_override.unwrap_or(&self.context);
tool.execute(input, ctx).await
let result = tool.execute(input.clone(), ctx).await?;
// Large-output routing (#548): if the result exceeds the threshold and
// the caller did not request `raw=true`, synthesise via the workshop.
let raw_bypass = input
.get("raw")
.and_then(|v| v.as_bool())
.unwrap_or(false);
if let Some(router) = ctx.large_output_router.as_ref() {
use crate::tools::large_output_router::{LargeOutputRouter, RouteDecision};
match router.route(name, &result, raw_bypass) {
RouteDecision::PassThrough => {}
RouteDecision::Synthesise {
estimated_tokens,
threshold,
} => {
// Store the raw output in the workshop variable store.
if let Some(vars_arc) = ctx.workshop_vars.as_ref() {
let mut vars = vars_arc.lock().await;
vars.store_raw(name, &result.content);
}
// Build a terse synthesis using the same model the registry
// was constructed for (workshop Flash model). For now we
// produce a structured header + truncated preview without
// a live API call so the engine stays dependency-free at
// the registry layer. A follow-up can wire in the Flash
// client when the async LLM call is safe here.
let preview_chars = 1_200usize;
let preview: String = result
.content
.chars()
.take(preview_chars)
.collect();
let ellipsis = if result.content.chars().count() > preview_chars {
"\n… [output truncated — full text in workshop variable `last_tool_result`]"
} else {
""
};
let synthesis = format!("{preview}{ellipsis}");
let wrapped = LargeOutputRouter::wrap_synthesis(
name,
&synthesis,
estimated_tokens,
threshold,
);
tracing::debug!(
tool = name,
estimated_tokens,
threshold,
"large-output routed through workshop"
);
return Ok(ToolResult::success(wrapped));
}
}
}
Ok(result)
}
/// Get the current tool context.
+31
View File
@@ -110,6 +110,17 @@ pub struct ToolContext {
/// short-circuit on `None` rather than fall back to a workspace-local
/// default.
pub memory_path: Option<PathBuf>,
/// Large-output router (#548). When `Some`, tool results that exceed the
/// configured token threshold are routed through a V4-Flash synthesis
/// sub-agent before being returned to the parent context. `None` disables
/// routing (e.g. in sub-agents and test contexts to avoid recursion).
pub large_output_router: Option<crate::tools::large_output_router::LargeOutputRouter>,
/// Per-session workshop variable store (#548). Holds the raw content of
/// the most recent large-tool routing event so the parent can call
/// `promote_to_context` later. `None` when the router is disabled.
pub workshop_vars: Option<std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>>,
}
impl ToolContext {
@@ -136,6 +147,8 @@ impl ToolContext {
runtime: RuntimeToolServices::default(),
cancel_token: None,
memory_path: None,
large_output_router: None,
workshop_vars: None,
}
}
@@ -165,6 +178,8 @@ impl ToolContext {
runtime: RuntimeToolServices::default(),
cancel_token: None,
memory_path: None,
large_output_router: None,
workshop_vars: None,
}
}
@@ -194,6 +209,8 @@ impl ToolContext {
runtime: RuntimeToolServices::default(),
cancel_token: None,
memory_path: None,
large_output_router: None,
workshop_vars: None,
}
}
@@ -399,6 +416,20 @@ impl ToolContext {
self.state_namespace = namespace.into();
self
}
/// Attach the large-output router (#548). When set, tool results that
/// exceed the configured token threshold are synthesised by a V4-Flash
/// sub-agent before being returned to the parent context.
#[must_use]
pub fn with_large_output_router(
mut self,
router: crate::tools::large_output_router::LargeOutputRouter,
vars: std::sync::Arc<tokio::sync::Mutex<crate::tools::large_output_router::WorkshopVariables>>,
) -> Self {
self.large_output_router = Some(router);
self.workshop_vars = Some(vars);
self
}
}
fn normalize_path(path: &Path) -> PathBuf {
+1
View File
@@ -542,6 +542,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
memory_enabled: config.memory_enabled(),
memory_path: config.memory_path(),
goal_objective: app.goal.goal_objective.clone(),
workshop: config.workshop.clone(),
}
}