fix(engine): align stream idle timeout guard (#1012)

2026-05-07 18:42:24 +08:00
parent f97604c3f0
commit 9327167e1d
5 changed files with 43 additions and 8 deletions
@@ -334,6 +334,7 @@ Key environment variables:
 | `DEEPSEEK_BASE_URL` | API base URL |
 | `DEEPSEEK_HTTP_HEADERS` | Optional custom model request headers, e.g. `X-Model-Provider-Id=your-model-provider` |
 | `DEEPSEEK_MODEL` | Default model |
+| `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` | Stream idle timeout in seconds, default `300`, clamped to `1..=3600` |
 | `DEEPSEEK_PROVIDER` | `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openai`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, `ollama` |
 | `DEEPSEEK_PROFILE` | Config profile name |
 | `DEEPSEEK_MEMORY` | Set to `on` to enable user memory |
@@ -2039,9 +2039,9 @@ use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
 use self::streaming::TOOL_CALL_START_MARKERS;
 use self::streaming::{
    ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
-    MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES,
-    STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
-    should_transparently_retry_stream,
+    MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS,
+    ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
+    should_transparently_retry_stream, stream_chunk_timeout_secs,
 };
 use self::tool_catalog::{
    CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
@@ -22,15 +22,30 @@ pub(super) struct ToolUseState {
    pub(super) input_buffer: String,
 }

-/// Maximum time to wait for a single stream chunk before assuming a stall.
+/// Default maximum time to wait for a single stream chunk before assuming a stall.
 /// **This is the idle timeout** — it resets on every SSE chunk, so long
 /// thinking turns that ARE producing reasoning_content stay alive. Only a
 /// genuine `chunk_timeout` window of silence kills the stream.
-pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
+const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300;
+const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1;
+const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600;
+const STREAM_IDLE_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS";
+
+/// Reads the shared stream idle-timeout override used by the SSE client.
+pub(super) fn stream_chunk_timeout_secs() -> u64 {
+    stream_chunk_timeout_secs_from_env(std::env::var(STREAM_IDLE_TIMEOUT_ENV).ok().as_deref())
+}
+
+fn stream_chunk_timeout_secs_from_env(value: Option<&str>) -> u64 {
+    value
+        .and_then(|v| v.parse::<u64>().ok())
+        .unwrap_or(DEFAULT_STREAM_CHUNK_TIMEOUT_SECS)
+        .clamp(MIN_STREAM_CHUNK_TIMEOUT_SECS, MAX_STREAM_CHUNK_TIMEOUT_SECS)
+}
 /// Maximum total bytes of text/thinking content before aborting the stream.
 pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
 /// Sanity backstop for total stream wall-clock duration. **Not** a routine
-/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
+/// kill switch — the stream chunk idle timeout is the primary stall
 /// detector. The wall-clock cap is here only to bound pathological cases
 /// (e.g. a server that keeps sending heartbeats forever without progress).
 ///
@@ -135,3 +150,20 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St

    output
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn stream_chunk_timeout_defaults_and_clamps_env_values() {
+        assert_eq!(stream_chunk_timeout_secs_from_env(None), 300);
+        assert_eq!(
+            stream_chunk_timeout_secs_from_env(Some("not-a-number")),
+            300
+        );
+        assert_eq!(stream_chunk_timeout_secs_from_env(Some("0")), 1);
+        assert_eq!(stream_chunk_timeout_secs_from_env(Some("90")), 90);
+        assert_eq!(stream_chunk_timeout_secs_from_env(Some("99999")), 3600);
+    }
+}
@@ -327,7 +327,8 @@ impl Engine {
            // budget restarts with the fresh stream.
            let mut stream_start = Instant::now();
            let mut stream_content_bytes: usize = 0;
-            let chunk_timeout = Duration::from_secs(STREAM_CHUNK_TIMEOUT_SECS);
+            let chunk_timeout_secs = stream_chunk_timeout_secs();
+            let chunk_timeout = Duration::from_secs(chunk_timeout_secs);
            let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS);

            // Process stream events
@@ -340,7 +341,7 @@ impl Engine {
                            Ok(None) => None, // stream ended normally
                            Err(_) => {
                                let envelope = StreamError::Stall {
-                                    timeout_secs: STREAM_CHUNK_TIMEOUT_SECS,
+                                    timeout_secs: chunk_timeout_secs,
                                }
                                .into_envelope();
                                crate::logging::warn(&envelope.message);
@@ -157,6 +157,7 @@ fallbacks after saved config and keyring credentials:
 - `DEEPSEEK_HTTP_HEADERS` (custom model request headers, comma-separated `name=value` pairs)
 - `DEEPSEEK_PROVIDER` (`deepseek|deepseek-cn|nvidia-nim|openai|openrouter|novita|fireworks|sglang|vllm|ollama`)
 - `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL`
+- `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` (stream idle timeout in seconds; default `300`, clamped to `1..=3600`)
 - `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`)
 - `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL`
 - `NVIDIA_NIM_MODEL`