feat(tui): visual retry/backoff countdown in footer (#499)

When the API client retries a 429 / 5xx / network failure, the TUI
previously went silent during the backoff sleep. The user saw "thinking"
or "ready" with no signal anything was wrong, until the request finally
either succeeded or raised. PR adds a foreground retry banner so the
user sees what's happening and how long until the next attempt.

### What ships

- New `crates/tui/src/retry_status.rs` module exposing a process-wide
  `RetryState` (`Idle | Active(banner) | Failed { reason }`) with
  `start`, `succeeded`, `failed`, and `clear` helpers. The state is
  process-global because the user-facing TUI is one engine per process;
  sub-agent retries deliberately don't light up the foreground banner.
- `client::send_with_retry` now flips the state in its retry callback
  (`start(attempt+1, delay, reason)`) and on the final outcome
  (`succeeded()` on Ok, `failed(reason)` on Err with retries-exhausted,
  `clear()` on Err with attempts==1 so non-retryable errors don't pin
  the failure row).
- `human_retry_reason` translates the structured `LlmError` into a
  short label: rate-limit reasons include the `Retry-After` header
  when the upstream provided one ("rate limited (Retry-After 30s)").
- Footer's `render` checks `retry_status::snapshot()` first; when
  `Active` it renders `⟳ retry N in Ms — <reason>` in the warning
  color; when `Failed` it renders `× failed: <reason>` in the error
  color. Banner takes precedence over the toast and the regular
  status line.
- `Engine::handle_user_message` calls `retry_status::clear()` right
  after emitting `TurnStarted` so the previous turn's failure row
  doesn't bleed into a new turn.

### Tests

- 4 unit tests in `retry_status::tests` covering idle default, the
  active → succeeded round-trip, the failed-state pin, and a
  past-deadline saturation.
- 2 footer rendering tests asserting the banner / failure-row text
  appears in the rendered buffer.
- All tests touching the global retry surface serialize through
  `retry_status::test_guard()` so cargo's parallel runner can't observe
  a torn read.

### Verification

cargo fmt --all -- --check                                          ✓
cargo clippy --workspace --all-targets --all-features --locked --   -D warnings   ✓
cargo test --workspace --all-features --locked                      ✓ 1815 + supporting (was 1809 on stabilization base)
cargo test -p deepseek-tui --bin deepseek-tui --locked retry        ✓ 28 passed

Closes #499

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-05-03 03:08:53 -05:00
parent dd3882e1bf
commit 8680a43298
5 changed files with 337 additions and 10 deletions
+43 -9
View File
@@ -576,36 +576,70 @@ impl DeepSeekClient {
}
},
Some(Box::new(|err, attempt, delay| {
let reason = match err {
LlmError::RateLimited { .. } => "rate_limited",
LlmError::ServerError { .. } => "server_error",
LlmError::NetworkError(_) => "network_error",
LlmError::Timeout(_) => "timeout",
_ => "other",
};
logging::warn(format!(
"HTTP retry reason={} attempt={} delay={:.2}s",
match err {
LlmError::RateLimited { .. } => "rate_limited",
LlmError::ServerError { .. } => "server_error",
LlmError::NetworkError(_) => "network_error",
LlmError::Timeout(_) => "timeout",
_ => "other",
},
reason,
attempt + 1,
delay.as_secs_f64(),
));
// Light up the foreground retry banner (#499). `attempt`
// here is 0-indexed for the *failed* attempt; surface the
// 1-indexed *upcoming* attempt the user is waiting on.
crate::retry_status::start(attempt + 1, delay, human_retry_reason(err, reason));
})),
)
.await;
match request_result {
Ok(response) => {
crate::retry_status::succeeded();
self.mark_request_success().await;
Ok(response)
}
Err(err) => {
self.mark_request_failure(&err.to_string()).await;
let last = err.last_error.to_string();
// Only mark the retry banner failed if at least one
// retry actually fired — non-retryable errors should
// surface as turn errors, not as retry-banner failures.
if err.attempts > 1 {
crate::retry_status::failed(last.clone());
} else {
crate::retry_status::clear();
}
self.mark_request_failure(&last).await;
self.maybe_probe_recovery().await;
Err(anyhow::anyhow!(err.to_string()))
Err(anyhow::anyhow!(last))
}
}
}
}
/// Translate the structured `LlmError` into a short human reason string
/// for the retry banner. Falls back to the categorical label so even
/// unknown variants render something useful.
fn human_retry_reason(err: &LlmError, fallback: &'static str) -> String {
match err {
LlmError::RateLimited { retry_after, .. } => {
if let Some(after) = retry_after {
format!("rate limited (Retry-After {}s)", after.as_secs())
} else {
"rate limited".to_string()
}
}
LlmError::ServerError { status, .. } => format!("upstream {status}"),
LlmError::NetworkError(_) => "network error".to_string(),
LlmError::Timeout(_) => "timeout".to_string(),
_ => fallback.to_string(),
}
}
impl LlmClient for DeepSeekClient {
fn provider_name(&self) -> &'static str {
self.api_provider.as_str()
+6
View File
@@ -706,6 +706,12 @@ impl Engine {
})
.await;
// A new turn means any leftover retry banner (success cleared
// it, failure pinned it) is no longer relevant — reset to idle
// so the footer doesn't display a stale failure row across
// turns (#499).
crate::retry_status::clear();
// Check if we have the appropriate client
if self.deepseek_client.is_none() {
let message = self
+1
View File
@@ -44,6 +44,7 @@ mod project_doc;
mod prompts;
pub mod repl;
mod responses_api_proxy;
mod retry_status;
pub mod rlm;
mod runtime_api;
mod runtime_threads;
+201
View File
@@ -0,0 +1,201 @@
//! Process-wide retry-state surface (#499).
//!
//! The HTTP retry path in `client::send_with_retry` already times its
//! waits and knows the error category. This module gives the TUI a way
//! to observe that state — `start`, `succeeded`, and `failed` flip a
//! global `RetryState` that the footer / status panel reads each frame.
//!
//! Why a process-wide global: the user-facing TUI runs as one engine
//! per process, and the only retry state we want to surface is the one
//! the user is staring at. Sub-agent retries in background tasks
//! deliberately do **not** light up the foreground banner — they're
//! supposed to be invisible. If a future feature ever needs per-engine
//! retry surfaces, swap this for an `Arc<RwLock<...>>` carried on the
//! `EngineHandle`; the public API stays the same.
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant};
/// One in-flight retry attempt. `deadline` is the wall-clock time the
/// next request will fire — the UI subtracts `Instant::now()` from it
/// to render a live countdown.
#[derive(Debug, Clone)]
pub struct RetryBanner {
/// 1-indexed retry attempt number (the first retry is attempt 1).
pub attempt: u32,
/// Time at which the next request will be sent.
pub deadline: Instant,
/// Short human-readable reason ("rate limited", "server error", …).
pub reason: String,
}
/// Snapshot of the retry surface for the UI to render.
#[derive(Debug, Clone, Default)]
pub enum RetryState {
/// No retry in flight. Banner hidden.
#[default]
Idle,
/// A request is sleeping before retrying. Show countdown banner.
Active(RetryBanner),
/// All retries exhausted; show failure row until the next turn
/// starts. `since` records when the row was set so a future polish
/// pass can age it out automatically; today the engine clears it on
/// `TurnStarted`.
Failed {
reason: String,
#[allow(dead_code)]
since: Instant,
},
}
impl RetryState {
/// Wall-clock seconds remaining on the active banner, or `None` if
/// not active. Saturates at zero — the renderer should treat any
/// negative remaining as "firing now".
#[must_use]
pub fn seconds_remaining(&self) -> Option<u64> {
match self {
Self::Active(banner) => Some(
banner
.deadline
.saturating_duration_since(Instant::now())
.as_secs(),
),
_ => None,
}
}
/// Whether the failure row should still be shown. Mirrors the
/// "until next turn" rule in the issue spec; the engine clears it
/// explicitly via [`clear`] on `TurnStarted`.
#[cfg(test)]
#[must_use]
pub fn is_failed(&self) -> bool {
matches!(self, Self::Failed { .. })
}
}
/// Lazy-init the cell on first read so callers don't have to initialize
/// process-wide state at boot.
fn cell() -> &'static Mutex<RetryState> {
static STATE: OnceLock<Mutex<RetryState>> = OnceLock::new();
STATE.get_or_init(|| Mutex::new(RetryState::Idle))
}
/// Public read snapshot for renderers.
#[must_use]
pub fn snapshot() -> RetryState {
cell().lock().map(|s| s.clone()).unwrap_or(RetryState::Idle)
}
/// Mark an in-flight retry. `attempt` is the number of the *upcoming*
/// retry (1 for the first); `delay` is how long the client will sleep
/// before firing.
pub fn start(attempt: u32, delay: Duration, reason: impl Into<String>) {
let banner = RetryBanner {
attempt,
deadline: Instant::now() + delay,
reason: reason.into(),
};
if let Ok(mut s) = cell().lock() {
*s = RetryState::Active(banner);
}
}
/// Mark the retry chain as having succeeded. Hides the banner.
pub fn succeeded() {
if let Ok(mut s) = cell().lock() {
*s = RetryState::Idle;
}
}
/// Mark the retry chain as having exhausted retries. The renderer keeps
/// the failure row until [`clear`] (typically called on `TurnStarted`).
pub fn failed(reason: impl Into<String>) {
if let Ok(mut s) = cell().lock() {
*s = RetryState::Failed {
reason: reason.into(),
since: Instant::now(),
};
}
}
/// Reset to idle. Called on `TurnStarted` so the previous turn's
/// failure row doesn't bleed into the next turn.
pub fn clear() {
if let Ok(mut s) = cell().lock() {
*s = RetryState::Idle;
}
}
/// Test helper: serialize tests that touch the global state so cargo's
/// parallel runner can't observe a torn read. The guard is exported so
/// tests in *other* modules (e.g. footer rendering tests) can hold the
/// same lock as the ones in `retry_status::tests`.
#[cfg(test)]
pub fn test_guard() -> std::sync::MutexGuard<'static, ()> {
static GUARD: Mutex<()> = Mutex::new(());
GUARD.lock().unwrap_or_else(|e| e.into_inner())
}
#[cfg(test)]
mod tests {
use super::*;
/// Acquire the cross-module test guard from [`super::test_guard`] and
/// reset state to `Idle` before yielding to the test body.
fn setup() -> std::sync::MutexGuard<'static, ()> {
let g = test_guard();
clear();
g
}
#[test]
fn idle_by_default_after_clear() {
let _g = setup();
assert!(matches!(snapshot(), RetryState::Idle));
assert_eq!(snapshot().seconds_remaining(), None);
}
#[test]
fn start_then_succeeded_returns_to_idle() {
let _g = setup();
start(1, Duration::from_secs(5), "rate limited");
let s = snapshot();
assert!(matches!(s, RetryState::Active(_)));
let remaining = s.seconds_remaining().unwrap();
assert!(remaining <= 5, "{remaining}");
succeeded();
assert!(matches!(snapshot(), RetryState::Idle));
}
#[test]
fn failed_persists_until_clear() {
let _g = setup();
failed("upstream 500");
let s = snapshot();
assert!(s.is_failed());
if let RetryState::Failed { reason, .. } = s {
assert_eq!(reason, "upstream 500");
} else {
panic!("expected Failed");
}
clear();
assert!(matches!(snapshot(), RetryState::Idle));
}
#[test]
fn deadline_in_past_yields_zero_remaining() {
let _g = setup();
// Bypass `start` so we can plant a deadline already in the past.
if let Ok(mut s) = cell().lock() {
*s = RetryState::Active(RetryBanner {
attempt: 2,
deadline: Instant::now() - Duration::from_secs(1),
reason: "test".into(),
});
}
assert_eq!(snapshot().seconds_remaining(), Some(0));
clear();
}
}
+86 -1
View File
@@ -438,6 +438,33 @@ fn spans_text(spans: &[Span<'_>]) -> String {
spans.iter().map(|s| s.content.as_ref()).collect::<String>()
}
/// Render the retry banner (#499) when the global retry-status surface
/// reports an active retry or a final failure. Returns `None` when idle
/// so callers fall back to the regular status line / toast.
fn retry_banner_spans(max_width: usize, props: &FooterProps) -> Option<Vec<Span<'static>>> {
let snapshot = crate::retry_status::snapshot();
let label = match &snapshot {
crate::retry_status::RetryState::Active(banner) => {
let secs = snapshot.seconds_remaining().unwrap_or(0);
// Round to 1s — we redraw each frame anyway so the
// countdown ticks visually without us having to schedule
// anything extra.
format!("⟳ retry {} in {secs}s — {}", banner.attempt, banner.reason)
}
crate::retry_status::RetryState::Failed { reason, .. } => {
format!("× failed: {reason}")
}
crate::retry_status::RetryState::Idle => return None,
};
let color = match &snapshot {
crate::retry_status::RetryState::Failed { .. } => crate::palette::STATUS_ERROR,
_ => crate::palette::STATUS_WARNING,
};
let _ = props; // keeping signature stable for future theme wiring
let truncated = truncate_to_width(&label, max_width);
Some(vec![Span::styled(truncated, Style::default().fg(color))])
}
impl Renderable for FooterWidget {
fn render(&self, area: Rect, buf: &mut Buffer) {
if area.height == 0 || area.width == 0 {
@@ -456,7 +483,13 @@ impl Renderable for FooterWidget {
.saturating_sub(min_gap)
.max(1);
let left_spans = if let Some(toast) = self.props.toast.as_ref() {
let left_spans = if let Some(banner) = retry_banner_spans(max_left_width, &self.props) {
// Retry banner takes precedence over toast and the regular
// status line so the user sees it loud and clear (#499).
// The banner clears automatically on success or on the next
// `TurnStarted` (engine emits the clear).
banner
} else if let Some(toast) = self.props.toast.as_ref() {
Self::toast_spans(toast, max_left_width)
} else {
self.status_line_spans(max_left_width)
@@ -699,6 +732,58 @@ mod tests {
}
}
#[test]
fn render_shows_retry_banner_when_active() {
let _g = crate::retry_status::test_guard();
crate::retry_status::clear();
crate::retry_status::start(2, std::time::Duration::from_secs(7), "rate limited");
let app = make_app();
let props = idle_props_for(&app);
let widget = FooterWidget::new(props);
let area = ratatui::layout::Rect::new(0, 0, 80, 1);
let mut buf = ratatui::buffer::Buffer::empty(area);
widget.render(area, &mut buf);
let rendered: String = (0..area.width).map(|x| buf[(x, 0)].symbol()).collect();
assert!(
rendered.contains("retry 2"),
"expected retry banner in render: {rendered:?}",
);
assert!(
rendered.contains("rate limited"),
"expected reason in render: {rendered:?}",
);
crate::retry_status::clear();
}
#[test]
fn render_shows_failure_row_when_failed() {
let _g = crate::retry_status::test_guard();
crate::retry_status::clear();
crate::retry_status::failed("upstream 500");
let app = make_app();
let props = idle_props_for(&app);
let widget = FooterWidget::new(props);
let area = ratatui::layout::Rect::new(0, 0, 80, 1);
let mut buf = ratatui::buffer::Buffer::empty(area);
widget.render(area, &mut buf);
let rendered: String = (0..area.width).map(|x| buf[(x, 0)].symbol()).collect();
assert!(
rendered.contains("failed"),
"expected failure row: {rendered:?}",
);
assert!(
rendered.contains("upstream 500"),
"expected reason: {rendered:?}",
);
crate::retry_status::clear();
}
#[test]
fn render_emits_mode_and_model_when_idle() {
let app = make_app();