fix(engine): align stream idle timeout guard (#1012)

This commit is contained in:
axobase001
2026-05-07 18:42:24 +08:00
committed by GitHub
parent f97604c3f0
commit 9327167e1d
5 changed files with 43 additions and 8 deletions
+1
View File
@@ -334,6 +334,7 @@ Key environment variables:
| `DEEPSEEK_BASE_URL` | API base URL |
| `DEEPSEEK_HTTP_HEADERS` | Optional custom model request headers, e.g. `X-Model-Provider-Id=your-model-provider` |
| `DEEPSEEK_MODEL` | Default model |
| `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` | Stream idle timeout in seconds, default `300`, clamped to `1..=3600` |
| `DEEPSEEK_PROVIDER` | `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openai`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, `ollama` |
| `DEEPSEEK_PROFILE` | Config profile name |
| `DEEPSEEK_MEMORY` | Set to `on` to enable user memory |
+3 -3
View File
@@ -2039,9 +2039,9 @@ use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
use self::streaming::TOOL_CALL_START_MARKERS;
use self::streaming::{
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES,
STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
should_transparently_retry_stream,
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS,
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
should_transparently_retry_stream, stream_chunk_timeout_secs,
};
use self::tool_catalog::{
CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
+35 -3
View File
@@ -22,15 +22,30 @@ pub(super) struct ToolUseState {
pub(super) input_buffer: String,
}
/// Maximum time to wait for a single stream chunk before assuming a stall.
/// Default maximum time to wait for a single stream chunk before assuming a stall.
/// **This is the idle timeout** — it resets on every SSE chunk, so long
/// thinking turns that ARE producing reasoning_content stay alive. Only a
/// genuine `chunk_timeout` window of silence kills the stream.
pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300;
const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1;
const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600;
const STREAM_IDLE_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS";
/// Reads the shared stream idle-timeout override used by the SSE client.
pub(super) fn stream_chunk_timeout_secs() -> u64 {
stream_chunk_timeout_secs_from_env(std::env::var(STREAM_IDLE_TIMEOUT_ENV).ok().as_deref())
}
fn stream_chunk_timeout_secs_from_env(value: Option<&str>) -> u64 {
value
.and_then(|v| v.parse::<u64>().ok())
.unwrap_or(DEFAULT_STREAM_CHUNK_TIMEOUT_SECS)
.clamp(MIN_STREAM_CHUNK_TIMEOUT_SECS, MAX_STREAM_CHUNK_TIMEOUT_SECS)
}
/// Maximum total bytes of text/thinking content before aborting the stream.
pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
/// Sanity backstop for total stream wall-clock duration. **Not** a routine
/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
/// kill switch — the stream chunk idle timeout is the primary stall
/// detector. The wall-clock cap is here only to bound pathological cases
/// (e.g. a server that keeps sending heartbeats forever without progress).
///
@@ -135,3 +150,20 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St
output
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn stream_chunk_timeout_defaults_and_clamps_env_values() {
assert_eq!(stream_chunk_timeout_secs_from_env(None), 300);
assert_eq!(
stream_chunk_timeout_secs_from_env(Some("not-a-number")),
300
);
assert_eq!(stream_chunk_timeout_secs_from_env(Some("0")), 1);
assert_eq!(stream_chunk_timeout_secs_from_env(Some("90")), 90);
assert_eq!(stream_chunk_timeout_secs_from_env(Some("99999")), 3600);
}
}
+3 -2
View File
@@ -327,7 +327,8 @@ impl Engine {
// budget restarts with the fresh stream.
let mut stream_start = Instant::now();
let mut stream_content_bytes: usize = 0;
let chunk_timeout = Duration::from_secs(STREAM_CHUNK_TIMEOUT_SECS);
let chunk_timeout_secs = stream_chunk_timeout_secs();
let chunk_timeout = Duration::from_secs(chunk_timeout_secs);
let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS);
// Process stream events
@@ -340,7 +341,7 @@ impl Engine {
Ok(None) => None, // stream ended normally
Err(_) => {
let envelope = StreamError::Stall {
timeout_secs: STREAM_CHUNK_TIMEOUT_SECS,
timeout_secs: chunk_timeout_secs,
}
.into_envelope();
crate::logging::warn(&envelope.message);
+1
View File
@@ -157,6 +157,7 @@ fallbacks after saved config and keyring credentials:
- `DEEPSEEK_HTTP_HEADERS` (custom model request headers, comma-separated `name=value` pairs)
- `DEEPSEEK_PROVIDER` (`deepseek|deepseek-cn|nvidia-nim|openai|openrouter|novita|fireworks|sglang|vllm|ollama`)
- `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL`
- `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` (stream idle timeout in seconds; default `300`, clamped to `1..=3600`)
- `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`)
- `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL`
- `NVIDIA_NIM_MODEL`