diff --git a/README.md b/README.md index 27518ae7..7250ea8d 100644 --- a/README.md +++ b/README.md @@ -334,6 +334,7 @@ Key environment variables: | `DEEPSEEK_BASE_URL` | API base URL | | `DEEPSEEK_HTTP_HEADERS` | Optional custom model request headers, e.g. `X-Model-Provider-Id=your-model-provider` | | `DEEPSEEK_MODEL` | Default model | +| `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` | Stream idle timeout in seconds, default `300`, clamped to `1..=3600` | | `DEEPSEEK_PROVIDER` | `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openai`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, `ollama` | | `DEEPSEEK_PROFILE` | Config profile name | | `DEEPSEEK_MEMORY` | Set to `on` to enable user memory | diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 88108163..b571ca03 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2039,9 +2039,9 @@ use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths}; use self::streaming::TOOL_CALL_START_MARKERS; use self::streaming::{ ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, - MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES, - STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, - should_transparently_retry_stream, + MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS, + ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, + should_transparently_retry_stream, stream_chunk_timeout_secs, }; use self::tool_catalog::{ CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME, diff --git a/crates/tui/src/core/engine/streaming.rs b/crates/tui/src/core/engine/streaming.rs index 1855dc18..3c2a654c 100644 --- a/crates/tui/src/core/engine/streaming.rs +++ b/crates/tui/src/core/engine/streaming.rs @@ -22,15 +22,30 @@ pub(super) struct ToolUseState { pub(super) input_buffer: String, } -/// Maximum time to wait for a single stream chunk before assuming a stall. +/// Default maximum time to wait for a single stream chunk before assuming a stall. /// **This is the idle timeout** — it resets on every SSE chunk, so long /// thinking turns that ARE producing reasoning_content stay alive. Only a /// genuine `chunk_timeout` window of silence kills the stream. -pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90; +const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300; +const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1; +const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600; +const STREAM_IDLE_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS"; + +/// Reads the shared stream idle-timeout override used by the SSE client. +pub(super) fn stream_chunk_timeout_secs() -> u64 { + stream_chunk_timeout_secs_from_env(std::env::var(STREAM_IDLE_TIMEOUT_ENV).ok().as_deref()) +} + +fn stream_chunk_timeout_secs_from_env(value: Option<&str>) -> u64 { + value + .and_then(|v| v.parse::().ok()) + .unwrap_or(DEFAULT_STREAM_CHUNK_TIMEOUT_SECS) + .clamp(MIN_STREAM_CHUNK_TIMEOUT_SECS, MAX_STREAM_CHUNK_TIMEOUT_SECS) +} /// Maximum total bytes of text/thinking content before aborting the stream. pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB /// Sanity backstop for total stream wall-clock duration. **Not** a routine -/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall +/// kill switch — the stream chunk idle timeout is the primary stall /// detector. The wall-clock cap is here only to bound pathological cases /// (e.g. a server that keeps sending heartbeats forever without progress). /// @@ -135,3 +150,20 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St output } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn stream_chunk_timeout_defaults_and_clamps_env_values() { + assert_eq!(stream_chunk_timeout_secs_from_env(None), 300); + assert_eq!( + stream_chunk_timeout_secs_from_env(Some("not-a-number")), + 300 + ); + assert_eq!(stream_chunk_timeout_secs_from_env(Some("0")), 1); + assert_eq!(stream_chunk_timeout_secs_from_env(Some("90")), 90); + assert_eq!(stream_chunk_timeout_secs_from_env(Some("99999")), 3600); + } +} diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index b43fb8a4..7aa9bff7 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -327,7 +327,8 @@ impl Engine { // budget restarts with the fresh stream. let mut stream_start = Instant::now(); let mut stream_content_bytes: usize = 0; - let chunk_timeout = Duration::from_secs(STREAM_CHUNK_TIMEOUT_SECS); + let chunk_timeout_secs = stream_chunk_timeout_secs(); + let chunk_timeout = Duration::from_secs(chunk_timeout_secs); let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS); // Process stream events @@ -340,7 +341,7 @@ impl Engine { Ok(None) => None, // stream ended normally Err(_) => { let envelope = StreamError::Stall { - timeout_secs: STREAM_CHUNK_TIMEOUT_SECS, + timeout_secs: chunk_timeout_secs, } .into_envelope(); crate::logging::warn(&envelope.message); diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f877a777..5ad3f27f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -157,6 +157,7 @@ fallbacks after saved config and keyring credentials: - `DEEPSEEK_HTTP_HEADERS` (custom model request headers, comma-separated `name=value` pairs) - `DEEPSEEK_PROVIDER` (`deepseek|deepseek-cn|nvidia-nim|openai|openrouter|novita|fireworks|sglang|vllm|ollama`) - `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL` +- `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` (stream idle timeout in seconds; default `300`, clamped to `1..=3600`) - `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`) - `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL` - `NVIDIA_NIM_MODEL`