From a8fe5298a288449ca40357ddf5c37bf98e794ba5 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sun, 26 Apr 2026 16:30:18 -0500 Subject: [PATCH] fix(tui): don't enter offline mode on recoverable engine errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TUI's `EngineEvent::Error` handler in `ui.rs` matched on `{ message, .. }` and unconditionally set `app.offline_mode = true`. This meant any transient stream-disconnect (e.g., the chunked-transfer connection getting closed during a long V4 thinking turn with no SSE keepalive) flipped the session offline, queued the user's next message, and forced them to recover manually — even though the engine had already classified the error as recoverable. The engine has been emitting `Event::error(message, recoverable)` with the correct boolean since the error-taxonomy work in #66. Stream stalls (engine.rs:2286), max-duration aborts (:2322), max-bytes aborts (:2334), and upstream stream errors (:2344) all set `recoverable = true`. Hard failures like sub-agent spawn failures (:1202) and post-recovery context overflows (:1378, :1559) set `recoverable = false`. The UI just wasn't reading it. Pull the body out into a `pub(crate) fn apply_engine_error_to_app` helper so the branch logic is unit-testable from `ui/tests.rs`, then split: - `recoverable = true` → status: "Connection interrupted: …"; stay online. - `recoverable = false` → status: "Engine error; queued messages stay pending: …"; flip into offline mode. Add two regression tests covering both branches. Fixes #86 Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/tui/ui.rs | 44 +++++++++++++++++------- crates/tui/src/tui/ui/tests.rs | 61 ++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 0fd29561..bd8a79af 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -639,18 +639,11 @@ async fn run_event_loop( queued_to_send = app.pop_queued_message(); } } - EngineEvent::Error { message, .. } => { - app.streaming_state.reset(); - app.streaming_message_index = None; - app.streaming_thinking_active_entry = None; - app.add_message(HistoryCell::System { - content: format!("Error: {message}"), - }); - app.offline_mode = true; - app.is_loading = false; - app.status_message = Some(format!( - "Engine error; queued messages stay pending: {message}" - )); + EngineEvent::Error { + message, + recoverable, + } => { + apply_engine_error_to_app(app, message, recoverable); } EngineEvent::Status { message } => { app.status_message = Some(message); @@ -1715,6 +1708,33 @@ fn queued_session_to_ui(msg: QueuedSessionMessage) -> QueuedMessage { } } +/// Translate an `EngineEvent::Error` into UI state updates. +/// +/// `recoverable` is the engine's own classification: stream stalls, chunk +/// timeouts, transient network errors, and rate-limit/server hiccups arrive +/// with `recoverable = true` and must NOT flip the session into offline mode +/// — the user can resend the turn and the underlying transport will retry. +/// Hard failures (auth, billing, invalid request) arrive with +/// `recoverable = false`; those flip offline mode so subsequent messages get +/// queued instead of silently lost mid-flight. +pub(crate) fn apply_engine_error_to_app(app: &mut App, message: String, recoverable: bool) { + app.streaming_state.reset(); + app.streaming_message_index = None; + app.streaming_thinking_active_entry = None; + app.add_message(HistoryCell::System { + content: format!("Error: {message}"), + }); + app.is_loading = false; + if recoverable { + app.status_message = Some(format!("Connection interrupted: {message}")); + } else { + app.offline_mode = true; + app.status_message = Some(format!( + "Engine error; queued messages stay pending: {message}" + )); + } +} + fn persist_offline_queue_state(app: &App) { if let Ok(manager) = SessionManager::default_location() { if app.queued_messages.is_empty() && app.queued_draft.is_none() { diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index a4cc7bf6..c8a4844d 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -1914,3 +1914,64 @@ fn truncate_line_to_width_respects_display_width_for_tiny_budgets() { assert!(trimmed_long.ends_with("...")); assert!(UnicodeWidthStr::width(trimmed_long.as_str()) <= 10); } + +/// Regression for #86. A recoverable engine error (stream stall, transient +/// disconnect, retryable server hiccup) must NOT flip the session into +/// offline mode. Until this fix the UI matched on `EngineEvent::Error { +/// message, .. }` and unconditionally set `app.offline_mode = true`, so a +/// long V4 thinking turn whose chunked stream got closed mid-flight ended +/// the session in offline mode with the next typed message queued. +#[test] +fn recoverable_engine_error_does_not_enter_offline_mode() { + let mut app = create_test_app(); + assert!(!app.offline_mode); + + apply_engine_error_to_app( + &mut app, + "Stream stalled: no data received for 60s, closing stream".to_string(), + true, + ); + + assert!( + !app.offline_mode, + "recoverable error must keep the session online so the user can retry" + ); + assert!(!app.is_loading); + let status = app + .status_message + .as_deref() + .expect("recoverable errors must set a status message"); + assert!( + status.starts_with("Connection interrupted"), + "expected interrupt-style status, got {status:?}" + ); +} + +/// Hard failures (auth, billing, malformed request) DO need to flip offline +/// mode so subsequent typed messages get queued instead of silently lost +/// against a broken upstream. +#[test] +fn non_recoverable_engine_error_enters_offline_mode() { + let mut app = create_test_app(); + assert!(!app.offline_mode); + + apply_engine_error_to_app( + &mut app, + "Authentication failed: invalid API key".to_string(), + false, + ); + + assert!( + app.offline_mode, + "non-recoverable error must enter offline mode" + ); + assert!(!app.is_loading); + let status = app + .status_message + .as_deref() + .expect("non-recoverable errors must set a status message"); + assert!( + status.starts_with("Engine error"), + "expected engine-error status, got {status:?}" + ); +}