fix(mcp): capture stderr from spawned stdio servers

The stdio MCP spawn site discarded server stderr (`Stdio::null`), so a
server that crashed after \`initialize\` left only "Stdio transport
closed" — useless for debugging.

Pipe stderr now and drain it into a bounded ring buffer
(\`StderrTail\`, capped at 64 lines) via a tokio task. When the read
side fails (EOF or IO error), the transport error includes the
captured stderr tail so callers see why the server died.

Adds a unix-only regression test that spawns
\`sh -c 'echo >&2; exit 1'\` and asserts the stderr line propagates
through the recv() error.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-05-09 22:53:23 -05:00
parent 29e418474e
commit 74760a39d3
2 changed files with 148 additions and 2 deletions
+5
View File
@@ -23,6 +23,11 @@ published.
### Fixed
- MCP stdio servers no longer discard stderr. The spawn site now pipes
stderr through a bounded ring buffer; when a server crashes
mid-session, the transport-closed error includes the captured stderr
tail instead of disappearing into `Stdio::null`. Useful for debugging
Node/Python MCP servers that fail well after `initialize`.
- Mouse capture now defaults on inside Windows Terminal (#1169). When
`WT_SESSION` is set, in-app text selection is enabled by default;
legacy conhost stays opt-in via `--mouse-capture` or `[tui]
+143 -2
View File
@@ -8,6 +8,7 @@
use std::collections::{HashMap, VecDeque};
use std::fs;
use std::path::{Component, Path};
use std::sync::Arc;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
@@ -17,6 +18,7 @@ use reqwest::header::{ACCEPT, CONTENT_TYPE};
use serde::{Deserialize, Serialize};
use tokio::io::{AsyncBufReadExt, AsyncWriteExt};
use tokio::process::{Child, ChildStdin, ChildStdout};
use tokio::sync::Mutex as TokioMutex;
use crate::child_env;
use crate::network_policy::{Decision, NetworkPolicyDecider, host_from_url};
@@ -288,6 +290,10 @@ pub struct StdioTransport {
child: Child,
stdin: ChildStdin,
reader: tokio::io::BufReader<ChildStdout>,
/// Tail of stderr lines from the spawned MCP server. A background task
/// drains the child's stderr into this buffer so a mid-run crash leaves
/// some context behind instead of `Stdio::null` swallowing it.
stderr_tail: Arc<StderrTail>,
}
/// How long `StdioTransport::shutdown` waits for the child to exit on SIGTERM
@@ -296,6 +302,54 @@ pub struct StdioTransport {
/// a few hundred ms.
const STDIO_SHUTDOWN_GRACE: Duration = Duration::from_millis(2_000);
/// How many lines of MCP-server stderr to keep around for crash diagnostics.
/// Bounded so a chatty server can't grow this without limit; large enough to
/// catch typical Node/Python startup or panic output.
const STDERR_TAIL_CAPACITY: usize = 64;
/// Bounded ring buffer for the most recent stderr lines from a spawned MCP
/// server. Used by `StdioTransport` to surface server-side context when the
/// transport read side fails (server crashed, exited early, etc).
#[derive(Default)]
pub struct StderrTail {
lines: TokioMutex<VecDeque<String>>,
}
impl StderrTail {
fn new() -> Arc<Self> {
Arc::new(Self {
lines: TokioMutex::new(VecDeque::with_capacity(STDERR_TAIL_CAPACITY)),
})
}
async fn push(&self, line: String) {
let mut buf = self.lines.lock().await;
if buf.len() >= STDERR_TAIL_CAPACITY {
buf.pop_front();
}
buf.push_back(line);
}
async fn snapshot(&self) -> Vec<String> {
self.lines.lock().await.iter().cloned().collect()
}
}
/// Format the captured stderr tail for inclusion in an error message. Empty
/// tails return `None` so the caller can fall back to its original message.
async fn format_stderr_context(tail: &StderrTail) -> Option<String> {
let lines = tail.snapshot().await;
if lines.is_empty() {
return None;
}
Some(format!(
"MCP server stderr (last {} line{}):\n{}",
lines.len(),
if lines.len() == 1 { "" } else { "s" },
lines.join("\n"),
))
}
/// Best-effort SIGTERM. On Unix uses `libc::kill`; on Windows there's no
/// equivalent so we let `kill_on_drop` (TerminateProcess) handle it via the
/// subsequent Drop. Returns whether a signal was actually sent.
@@ -334,8 +388,19 @@ impl McpTransport for StdioTransport {
let mut line = String::new();
loop {
line.clear();
let bytes = self.reader.read_line(&mut line).await?;
let bytes = match self.reader.read_line(&mut line).await {
Ok(b) => b,
Err(err) => {
if let Some(stderr) = format_stderr_context(&self.stderr_tail).await {
anyhow::bail!("Stdio transport read error: {err}\n{stderr}");
}
return Err(err.into());
}
};
if bytes == 0 {
if let Some(stderr) = format_stderr_context(&self.stderr_tail).await {
anyhow::bail!("Stdio transport closed\n{stderr}");
}
anyhow::bail!("Stdio transport closed");
}
@@ -883,7 +948,7 @@ impl McpConnection {
cmd.args(&config.args)
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::null())
.stderr(std::process::Stdio::piped())
.kill_on_drop(true);
// MCP stdio servers are user-configured integrations. Use the
@@ -903,11 +968,28 @@ impl McpConnection {
let stdin = child.stdin.take().context("Failed to get MCP stdin")?;
let stdout = child.stdout.take().context("Failed to get MCP stdout")?;
let stderr = child.stderr.take().context("Failed to get MCP stderr")?;
// Drain stderr into a bounded ring buffer so a crash mid-run
// leaves diagnostic breadcrumbs instead of disappearing into
// `Stdio::null`. The task exits naturally when the child closes
// its stderr (kill_on_drop / exit / explicit shutdown).
let stderr_tail = StderrTail::new();
{
let tail = Arc::clone(&stderr_tail);
tokio::spawn(async move {
let mut lines = tokio::io::BufReader::new(stderr).lines();
while let Ok(Some(line)) = lines.next_line().await {
tail.push(line).await;
}
});
}
Box::new(StdioTransport {
child,
stdin,
reader: tokio::io::BufReader::new(stdout),
stderr_tail,
})
} else {
anyhow::bail!(
@@ -2782,6 +2864,7 @@ mod tests {
child,
stdin,
reader: tokio::io::BufReader::new(stdout),
stderr_tail: StderrTail::new(),
};
// shutdown() should send SIGTERM and complete within the grace window.
@@ -2806,6 +2889,64 @@ mod tests {
);
}
/// Mid-run MCP server crash: the v0.8.x spawn path used `Stdio::null` for
/// stderr, so a server that died with a useful stderr message left the
/// caller with only "Stdio transport closed". Now stderr is piped into a
/// bounded ring buffer and surfaced when the read side fails.
#[cfg(unix)]
#[tokio::test]
async fn stdio_transport_recv_error_includes_stderr_tail() {
use tokio::process::Command as TokioCommand;
let mut cmd = TokioCommand::new("sh");
cmd.arg("-c")
.arg("echo 'mcp-server: failed to load plugin' 1>&2; exit 1")
.stdin(std::process::Stdio::piped())
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.kill_on_drop(true);
let mut child = cmd.spawn().expect("spawn sh");
let stdin = child.stdin.take().expect("stdin");
let stdout = child.stdout.take().expect("stdout");
let stderr = child.stderr.take().expect("stderr");
let stderr_tail = StderrTail::new();
{
let tail = Arc::clone(&stderr_tail);
tokio::spawn(async move {
let mut lines = tokio::io::BufReader::new(stderr).lines();
while let Ok(Some(line)) = lines.next_line().await {
tail.push(line).await;
}
});
}
let mut transport = StdioTransport {
child,
stdin,
reader: tokio::io::BufReader::new(stdout),
stderr_tail,
};
// Give the subprocess time to write its stderr line and exit.
tokio::time::sleep(Duration::from_millis(300)).await;
let err = transport
.recv()
.await
.expect_err("expected transport closed error");
let err_str = format!("{err}");
assert!(
err_str.contains("Stdio transport closed"),
"missing closed marker in: {err_str}"
);
assert!(
err_str.contains("mcp-server: failed to load plugin"),
"stderr context missing from error: {err_str}"
);
}
#[tokio::test]
async fn sse_connect_waits_for_endpoint_before_first_send() {
use std::sync::{