fix(tui): panic safety foundations — spawn_supervised wrapper + process panic hook (#346)
Add spawn_supervised(name, location, future) to utils.rs that wraps futures in AssertUnwindSafe + catch_unwind, logs panics via tracing::error!, and writes crash dumps to ~/.deepseek/crashes/. Add process-level panic hook to main.rs that writes crash dumps before the default hook fires. Convert persistence_actor::spawn_persistence_actor as the first spawn_supervised caller to prove the wiring. Remaining 34 tokio::spawn sites marked as follow-up for a focused PR. Also fix save_mcp_config in main.rs to use write_atomic (missed in #355).
This commit is contained in:
+33
-1
@@ -505,6 +505,38 @@ enum SandboxCommand {
|
|||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
|
// Set up process panic hook before anything else — writes crash dumps
|
||||||
|
// to ~/.deepseek/crashes/ even if the panic happens before tokio is up.
|
||||||
|
let orig_hook = std::panic::take_hook();
|
||||||
|
std::panic::set_hook(Box::new(move |panic_info| {
|
||||||
|
let msg = if let Some(s) = panic_info.payload().downcast_ref::<&str>() {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(s) = panic_info.payload().downcast_ref::<String>() {
|
||||||
|
s.clone()
|
||||||
|
} else {
|
||||||
|
format!("{:?}", panic_info.payload())
|
||||||
|
};
|
||||||
|
let location = panic_info
|
||||||
|
.location()
|
||||||
|
.map(|loc| loc.to_string())
|
||||||
|
.unwrap_or_else(|| "unknown".to_string());
|
||||||
|
tracing::error!(target: "panic", "Process panicked at {location}: {msg}");
|
||||||
|
// Write crash dump best-effort
|
||||||
|
if let Some(home) = dirs::home_dir() {
|
||||||
|
let crash_dir = home.join(".deepseek").join("crashes");
|
||||||
|
let _ = std::fs::create_dir_all(&crash_dir);
|
||||||
|
use chrono::Utc;
|
||||||
|
let ts = Utc::now().format("%Y%m%dT%H%M%S%.3fZ");
|
||||||
|
let path = crash_dir.join(format!("{ts}-process-panic.log"));
|
||||||
|
let contents = format!(
|
||||||
|
"Process panicked\nLocation: {location}\nTimestamp: {ts}\nPanic: {msg}\n",
|
||||||
|
);
|
||||||
|
let _ = std::fs::write(&path, contents);
|
||||||
|
}
|
||||||
|
// Invoke the original hook (prints to stderr, etc.)
|
||||||
|
orig_hook(panic_info);
|
||||||
|
}));
|
||||||
|
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
logging::set_verbose(cli.verbose || logging::env_requests_verbose_logging());
|
logging::set_verbose(cli.verbose || logging::env_requests_verbose_logging());
|
||||||
@@ -2638,7 +2670,7 @@ fn save_mcp_config(path: &Path, cfg: &McpConfig) -> Result<()> {
|
|||||||
}
|
}
|
||||||
let rendered = serde_json::to_string_pretty(cfg)
|
let rendered = serde_json::to_string_pretty(cfg)
|
||||||
.map_err(|e| anyhow!("Failed to serialize MCP config: {e}"))?;
|
.map_err(|e| anyhow!("Failed to serialize MCP config: {e}"))?;
|
||||||
std::fs::write(path, rendered)
|
crate::utils::write_atomic(path, rendered.as_bytes())
|
||||||
.map_err(|e| anyhow!("Failed to write MCP config {}: {}", path.display(), e))?;
|
.map_err(|e| anyhow!("Failed to write MCP config {}: {}", path.display(), e))?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ use std::sync::OnceLock;
|
|||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
|
|
||||||
use crate::session_manager::{SavedSession, SessionManager};
|
use crate::session_manager::{SavedSession, SessionManager};
|
||||||
|
use crate::utils::spawn_supervised;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Request type
|
// Request type
|
||||||
@@ -99,7 +100,7 @@ pub fn spawn_persistence_actor(manager: SessionManager) -> PersistActorHandle {
|
|||||||
let (tx, mut rx) = mpsc::unbounded_channel::<PersistRequest>();
|
let (tx, mut rx) = mpsc::unbounded_channel::<PersistRequest>();
|
||||||
let handle = PersistActorHandle { tx };
|
let handle = PersistActorHandle { tx };
|
||||||
|
|
||||||
tokio::spawn(async move {
|
spawn_supervised("persistence-actor", std::panic::Location::caller(), async move {
|
||||||
let mut latest_checkpoint: Option<SavedSession> = None;
|
let mut latest_checkpoint: Option<SavedSession> = None;
|
||||||
let mut latest_session: Option<SavedSession> = None;
|
let mut latest_session: Option<SavedSession> = None;
|
||||||
let mut should_clear: bool = false;
|
let mut should_clear: bool = false;
|
||||||
|
|||||||
@@ -202,6 +202,72 @@ pub fn flush_and_sync(writer: &mut std::io::BufWriter<std::fs::File>) -> std::io
|
|||||||
writer.get_ref().sync_all()
|
writer.get_ref().sync_all()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Spawn a tokio task with panic supervision.
|
||||||
|
///
|
||||||
|
/// Wraps the future in `AssertUnwindSafe` + `catch_unwind`. On panic:
|
||||||
|
/// 1. Logs the panic with the task name and caller location via `tracing::error!`.
|
||||||
|
/// 2. Writes a crash dump to `~/.deepseek/crashes/<timestamp>-<name>.log`.
|
||||||
|
///
|
||||||
|
/// The returned `JoinHandle` resolves to `()` — the panic is caught and
|
||||||
|
/// handled internally so the parent process stays alive.
|
||||||
|
pub fn spawn_supervised<F>(
|
||||||
|
name: &'static str,
|
||||||
|
location: &'static std::panic::Location<'static>,
|
||||||
|
future: F,
|
||||||
|
) -> tokio::task::JoinHandle<()>
|
||||||
|
where
|
||||||
|
F: std::future::Future<Output = ()> + Send + 'static,
|
||||||
|
{
|
||||||
|
tokio::spawn(async move {
|
||||||
|
use futures_util::FutureExt;
|
||||||
|
let result = std::panic::AssertUnwindSafe(future)
|
||||||
|
.catch_unwind()
|
||||||
|
.await;
|
||||||
|
if let Err(panic_info) = result {
|
||||||
|
let msg = if let Some(s) = panic_info.downcast_ref::<&str>() {
|
||||||
|
s.to_string()
|
||||||
|
} else if let Some(s) = panic_info.downcast_ref::<String>() {
|
||||||
|
s.clone()
|
||||||
|
} else {
|
||||||
|
"unknown panic".to_string()
|
||||||
|
};
|
||||||
|
tracing::error!(
|
||||||
|
target: "panic",
|
||||||
|
"Task '{name}' panicked at {}: {msg}",
|
||||||
|
location,
|
||||||
|
);
|
||||||
|
// Write crash dump (best-effort)
|
||||||
|
let _ = write_panic_dump(name, location, &msg);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write a panic dump file to `~/.deepseek/crashes/`.
|
||||||
|
///
|
||||||
|
/// Creates the directory if needed and writes a timestamped log
|
||||||
|
/// with the task name, caller location, and panic message.
|
||||||
|
/// Best-effort — failures are silently ignored.
|
||||||
|
fn write_panic_dump(
|
||||||
|
name: &str,
|
||||||
|
location: &std::panic::Location<'_>,
|
||||||
|
message: &str,
|
||||||
|
) -> std::io::Result<()> {
|
||||||
|
use chrono::Utc;
|
||||||
|
let home = dirs::home_dir().ok_or_else(|| {
|
||||||
|
std::io::Error::new(std::io::ErrorKind::NotFound, "home directory not found")
|
||||||
|
})?;
|
||||||
|
let crash_dir = home.join(".deepseek").join("crashes");
|
||||||
|
std::fs::create_dir_all(&crash_dir)?;
|
||||||
|
let timestamp = Utc::now().format("%Y%m%dT%H%M%S%.3fZ");
|
||||||
|
let filename = format!("{timestamp}-{name}.log");
|
||||||
|
let path = crash_dir.join(&filename);
|
||||||
|
let contents = format!(
|
||||||
|
"Task: {name}\nLocation: {location}\nTimestamp: {timestamp}\nPanic: {message}\n"
|
||||||
|
);
|
||||||
|
std::fs::write(&path, contents)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub fn ensure_dir(path: &Path) -> Result<()> {
|
pub fn ensure_dir(path: &Path) -> Result<()> {
|
||||||
fs::create_dir_all(path)
|
fs::create_dir_all(path)
|
||||||
|
|||||||
Reference in New Issue
Block a user