A panic inside `handle_deepseek_turn` unwound through `engine.run()` and was
caught by `spawn_supervised("engine-event-loop")`, which wrote a crash dump
and let the whole engine task exit. The UI never received `TurnComplete`, so
it sat on "working" forever and every subsequent turn was dead too — exactly
the "the engine have stopped" / stuck-on-working reports.
Wrap the turn call in `catch_unwind` so a panic now surfaces as a failed
`TurnComplete` (with a clear, actionable message) and the engine keeps
running. The crash dump is still written via a new `record_caught_panic`
helper so maintainers retain the `~/.codewhale/crashes/` diagnostics.
Also dedupes the panic-message extraction in `spawn_supervised` /
`spawn_blocking_supervised` into a shared `panic_message` helper.
https://claude.ai/code/session_01MQrnh6wHfrEYN5BBdMarC1
This commit is contained in:
@@ -1679,16 +1679,35 @@ impl Engine {
|
||||
.as_ref()
|
||||
.map(|client| client.base_url().to_string());
|
||||
|
||||
// Main turn loop
|
||||
let (status, error) = self
|
||||
.handle_deepseek_turn(
|
||||
&mut turn,
|
||||
tool_registry.as_ref(),
|
||||
tools,
|
||||
mode,
|
||||
force_update_plan_first,
|
||||
)
|
||||
.await;
|
||||
// Main turn loop. Catch panics here so an internal error surfaces as a
|
||||
// failed TurnComplete instead of unwinding through `engine.run()` and
|
||||
// killing the whole engine-event-loop task — which left the UI stuck
|
||||
// on "working" forever with the engine silently dead (#2583, #1269).
|
||||
use futures_util::FutureExt as _;
|
||||
let turn_result = std::panic::AssertUnwindSafe(self.handle_deepseek_turn(
|
||||
&mut turn,
|
||||
tool_registry.as_ref(),
|
||||
tools,
|
||||
mode,
|
||||
force_update_plan_first,
|
||||
))
|
||||
.catch_unwind()
|
||||
.await;
|
||||
let (status, error) = match turn_result {
|
||||
Ok(outcome) => outcome,
|
||||
Err(panic) => {
|
||||
let detail = crate::utils::panic_message(&*panic);
|
||||
crate::utils::record_caught_panic("engine-event-loop", &detail);
|
||||
(
|
||||
TurnOutcomeStatus::Failed,
|
||||
Some(format!(
|
||||
"The engine hit an internal error and stopped this turn: {detail}. \
|
||||
Your session is intact — send your message again to retry. \
|
||||
A crash report was saved to ~/.codewhale/crashes/."
|
||||
)),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
// Update session usage
|
||||
self.session.total_usage.add(&turn.usage);
|
||||
|
||||
+28
-14
@@ -282,13 +282,7 @@ where
|
||||
use futures_util::FutureExt;
|
||||
let result = std::panic::AssertUnwindSafe(future).catch_unwind().await;
|
||||
if let Err(panic_info) = result {
|
||||
let msg = if let Some(s) = panic_info.downcast_ref::<&str>() {
|
||||
s.to_string()
|
||||
} else if let Some(s) = panic_info.downcast_ref::<String>() {
|
||||
s.clone()
|
||||
} else {
|
||||
"unknown panic".to_string()
|
||||
};
|
||||
let msg = panic_message(&*panic_info);
|
||||
tracing::error!(
|
||||
target: "panic",
|
||||
"Task '{name}' panicked at {}: {msg}",
|
||||
@@ -300,6 +294,32 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
/// Extract a human-readable message from a caught panic payload (the `Err`
|
||||
/// value of `catch_unwind`). Mirrors how the panic hook formats `&str` and
|
||||
/// `String` payloads so crash dumps stay consistent across call sites.
|
||||
#[must_use]
|
||||
pub fn panic_message(panic: &(dyn std::any::Any + Send)) -> String {
|
||||
if let Some(s) = panic.downcast_ref::<&str>() {
|
||||
(*s).to_string()
|
||||
} else if let Some(s) = panic.downcast_ref::<String>() {
|
||||
s.clone()
|
||||
} else {
|
||||
"unknown panic".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a panic that was caught at a call site (via `catch_unwind`) rather
|
||||
/// than by a task supervisor. Logs it on the `panic` target and writes a
|
||||
/// best-effort crash dump to `~/.codewhale/crashes/`, so diagnostics land in
|
||||
/// the same place `spawn_supervised` writes them even when the caller recovers
|
||||
/// and keeps running.
|
||||
#[track_caller]
|
||||
pub fn record_caught_panic(name: &'static str, message: &str) {
|
||||
let location = std::panic::Location::caller();
|
||||
tracing::error!(target: "panic", "Task '{name}' panicked at {location}: {message}");
|
||||
let _ = write_panic_dump(name, location, message);
|
||||
}
|
||||
|
||||
/// Write a panic dump file to `~/.codewhale/crashes/`.
|
||||
///
|
||||
/// Creates the directory if needed and writes a timestamped log
|
||||
@@ -362,13 +382,7 @@ where
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
|
||||
if let Err(panic_info) = result {
|
||||
let msg = if let Some(s) = panic_info.downcast_ref::<&str>() {
|
||||
s.to_string()
|
||||
} else if let Some(s) = panic_info.downcast_ref::<String>() {
|
||||
s.clone()
|
||||
} else {
|
||||
"unknown panic".to_string()
|
||||
};
|
||||
let msg = panic_message(&*panic_info);
|
||||
tracing::error!(
|
||||
target: "panic",
|
||||
"Blocking task '{name}' panicked at {location}: {msg}",
|
||||
|
||||
Reference in New Issue
Block a user