From 74760a39d3ada952086745a1758bc6996470c035 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sat, 9 May 2026 22:53:23 -0500 Subject: [PATCH] fix(mcp): capture stderr from spawned stdio servers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The stdio MCP spawn site discarded server stderr (`Stdio::null`), so a server that crashed after \`initialize\` left only "Stdio transport closed" — useless for debugging. Pipe stderr now and drain it into a bounded ring buffer (\`StderrTail\`, capped at 64 lines) via a tokio task. When the read side fails (EOF or IO error), the transport error includes the captured stderr tail so callers see why the server died. Adds a unix-only regression test that spawns \`sh -c 'echo >&2; exit 1'\` and asserts the stderr line propagates through the recv() error. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 5 ++ crates/tui/src/mcp.rs | 145 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47ff2da7..cb760efa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,11 @@ published. ### Fixed +- MCP stdio servers no longer discard stderr. The spawn site now pipes + stderr through a bounded ring buffer; when a server crashes + mid-session, the transport-closed error includes the captured stderr + tail instead of disappearing into `Stdio::null`. Useful for debugging + Node/Python MCP servers that fail well after `initialize`. - Mouse capture now defaults on inside Windows Terminal (#1169). When `WT_SESSION` is set, in-app text selection is enabled by default; legacy conhost stays opt-in via `--mouse-capture` or `[tui] diff --git a/crates/tui/src/mcp.rs b/crates/tui/src/mcp.rs index 2f811492..3131999c 100644 --- a/crates/tui/src/mcp.rs +++ b/crates/tui/src/mcp.rs @@ -8,6 +8,7 @@ use std::collections::{HashMap, VecDeque}; use std::fs; use std::path::{Component, Path}; +use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::Duration; @@ -17,6 +18,7 @@ use reqwest::header::{ACCEPT, CONTENT_TYPE}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncBufReadExt, AsyncWriteExt}; use tokio::process::{Child, ChildStdin, ChildStdout}; +use tokio::sync::Mutex as TokioMutex; use crate::child_env; use crate::network_policy::{Decision, NetworkPolicyDecider, host_from_url}; @@ -288,6 +290,10 @@ pub struct StdioTransport { child: Child, stdin: ChildStdin, reader: tokio::io::BufReader, + /// Tail of stderr lines from the spawned MCP server. A background task + /// drains the child's stderr into this buffer so a mid-run crash leaves + /// some context behind instead of `Stdio::null` swallowing it. + stderr_tail: Arc, } /// How long `StdioTransport::shutdown` waits for the child to exit on SIGTERM @@ -296,6 +302,54 @@ pub struct StdioTransport { /// a few hundred ms. const STDIO_SHUTDOWN_GRACE: Duration = Duration::from_millis(2_000); +/// How many lines of MCP-server stderr to keep around for crash diagnostics. +/// Bounded so a chatty server can't grow this without limit; large enough to +/// catch typical Node/Python startup or panic output. +const STDERR_TAIL_CAPACITY: usize = 64; + +/// Bounded ring buffer for the most recent stderr lines from a spawned MCP +/// server. Used by `StdioTransport` to surface server-side context when the +/// transport read side fails (server crashed, exited early, etc). +#[derive(Default)] +pub struct StderrTail { + lines: TokioMutex>, +} + +impl StderrTail { + fn new() -> Arc { + Arc::new(Self { + lines: TokioMutex::new(VecDeque::with_capacity(STDERR_TAIL_CAPACITY)), + }) + } + + async fn push(&self, line: String) { + let mut buf = self.lines.lock().await; + if buf.len() >= STDERR_TAIL_CAPACITY { + buf.pop_front(); + } + buf.push_back(line); + } + + async fn snapshot(&self) -> Vec { + self.lines.lock().await.iter().cloned().collect() + } +} + +/// Format the captured stderr tail for inclusion in an error message. Empty +/// tails return `None` so the caller can fall back to its original message. +async fn format_stderr_context(tail: &StderrTail) -> Option { + let lines = tail.snapshot().await; + if lines.is_empty() { + return None; + } + Some(format!( + "MCP server stderr (last {} line{}):\n{}", + lines.len(), + if lines.len() == 1 { "" } else { "s" }, + lines.join("\n"), + )) +} + /// Best-effort SIGTERM. On Unix uses `libc::kill`; on Windows there's no /// equivalent so we let `kill_on_drop` (TerminateProcess) handle it via the /// subsequent Drop. Returns whether a signal was actually sent. @@ -334,8 +388,19 @@ impl McpTransport for StdioTransport { let mut line = String::new(); loop { line.clear(); - let bytes = self.reader.read_line(&mut line).await?; + let bytes = match self.reader.read_line(&mut line).await { + Ok(b) => b, + Err(err) => { + if let Some(stderr) = format_stderr_context(&self.stderr_tail).await { + anyhow::bail!("Stdio transport read error: {err}\n{stderr}"); + } + return Err(err.into()); + } + }; if bytes == 0 { + if let Some(stderr) = format_stderr_context(&self.stderr_tail).await { + anyhow::bail!("Stdio transport closed\n{stderr}"); + } anyhow::bail!("Stdio transport closed"); } @@ -883,7 +948,7 @@ impl McpConnection { cmd.args(&config.args) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) - .stderr(std::process::Stdio::null()) + .stderr(std::process::Stdio::piped()) .kill_on_drop(true); // MCP stdio servers are user-configured integrations. Use the @@ -903,11 +968,28 @@ impl McpConnection { let stdin = child.stdin.take().context("Failed to get MCP stdin")?; let stdout = child.stdout.take().context("Failed to get MCP stdout")?; + let stderr = child.stderr.take().context("Failed to get MCP stderr")?; + + // Drain stderr into a bounded ring buffer so a crash mid-run + // leaves diagnostic breadcrumbs instead of disappearing into + // `Stdio::null`. The task exits naturally when the child closes + // its stderr (kill_on_drop / exit / explicit shutdown). + let stderr_tail = StderrTail::new(); + { + let tail = Arc::clone(&stderr_tail); + tokio::spawn(async move { + let mut lines = tokio::io::BufReader::new(stderr).lines(); + while let Ok(Some(line)) = lines.next_line().await { + tail.push(line).await; + } + }); + } Box::new(StdioTransport { child, stdin, reader: tokio::io::BufReader::new(stdout), + stderr_tail, }) } else { anyhow::bail!( @@ -2782,6 +2864,7 @@ mod tests { child, stdin, reader: tokio::io::BufReader::new(stdout), + stderr_tail: StderrTail::new(), }; // shutdown() should send SIGTERM and complete within the grace window. @@ -2806,6 +2889,64 @@ mod tests { ); } + /// Mid-run MCP server crash: the v0.8.x spawn path used `Stdio::null` for + /// stderr, so a server that died with a useful stderr message left the + /// caller with only "Stdio transport closed". Now stderr is piped into a + /// bounded ring buffer and surfaced when the read side fails. + #[cfg(unix)] + #[tokio::test] + async fn stdio_transport_recv_error_includes_stderr_tail() { + use tokio::process::Command as TokioCommand; + + let mut cmd = TokioCommand::new("sh"); + cmd.arg("-c") + .arg("echo 'mcp-server: failed to load plugin' 1>&2; exit 1") + .stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .kill_on_drop(true); + + let mut child = cmd.spawn().expect("spawn sh"); + let stdin = child.stdin.take().expect("stdin"); + let stdout = child.stdout.take().expect("stdout"); + let stderr = child.stderr.take().expect("stderr"); + + let stderr_tail = StderrTail::new(); + { + let tail = Arc::clone(&stderr_tail); + tokio::spawn(async move { + let mut lines = tokio::io::BufReader::new(stderr).lines(); + while let Ok(Some(line)) = lines.next_line().await { + tail.push(line).await; + } + }); + } + + let mut transport = StdioTransport { + child, + stdin, + reader: tokio::io::BufReader::new(stdout), + stderr_tail, + }; + + // Give the subprocess time to write its stderr line and exit. + tokio::time::sleep(Duration::from_millis(300)).await; + + let err = transport + .recv() + .await + .expect_err("expected transport closed error"); + let err_str = format!("{err}"); + assert!( + err_str.contains("Stdio transport closed"), + "missing closed marker in: {err_str}" + ); + assert!( + err_str.contains("mcp-server: failed to load plugin"), + "stderr context missing from error: {err_str}" + ); + } + #[tokio::test] async fn sse_connect_waits_for_endpoint_before_first_send() { use std::sync::{