fix(engine): align stream idle timeout guard (#1012)
This commit is contained in:
@@ -334,6 +334,7 @@ Key environment variables:
|
||||
| `DEEPSEEK_BASE_URL` | API base URL |
|
||||
| `DEEPSEEK_HTTP_HEADERS` | Optional custom model request headers, e.g. `X-Model-Provider-Id=your-model-provider` |
|
||||
| `DEEPSEEK_MODEL` | Default model |
|
||||
| `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` | Stream idle timeout in seconds, default `300`, clamped to `1..=3600` |
|
||||
| `DEEPSEEK_PROVIDER` | `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openai`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, `ollama` |
|
||||
| `DEEPSEEK_PROFILE` | Config profile name |
|
||||
| `DEEPSEEK_MEMORY` | Set to `on` to enable user memory |
|
||||
|
||||
@@ -2039,9 +2039,9 @@ use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
|
||||
use self::streaming::TOOL_CALL_START_MARKERS;
|
||||
use self::streaming::{
|
||||
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
|
||||
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES,
|
||||
STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
|
||||
should_transparently_retry_stream,
|
||||
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS,
|
||||
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
|
||||
should_transparently_retry_stream, stream_chunk_timeout_secs,
|
||||
};
|
||||
use self::tool_catalog::{
|
||||
CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
|
||||
|
||||
@@ -22,15 +22,30 @@ pub(super) struct ToolUseState {
|
||||
pub(super) input_buffer: String,
|
||||
}
|
||||
|
||||
/// Maximum time to wait for a single stream chunk before assuming a stall.
|
||||
/// Default maximum time to wait for a single stream chunk before assuming a stall.
|
||||
/// **This is the idle timeout** — it resets on every SSE chunk, so long
|
||||
/// thinking turns that ARE producing reasoning_content stay alive. Only a
|
||||
/// genuine `chunk_timeout` window of silence kills the stream.
|
||||
pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
|
||||
const DEFAULT_STREAM_CHUNK_TIMEOUT_SECS: u64 = 300;
|
||||
const MIN_STREAM_CHUNK_TIMEOUT_SECS: u64 = 1;
|
||||
const MAX_STREAM_CHUNK_TIMEOUT_SECS: u64 = 3600;
|
||||
const STREAM_IDLE_TIMEOUT_ENV: &str = "DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS";
|
||||
|
||||
/// Reads the shared stream idle-timeout override used by the SSE client.
|
||||
pub(super) fn stream_chunk_timeout_secs() -> u64 {
|
||||
stream_chunk_timeout_secs_from_env(std::env::var(STREAM_IDLE_TIMEOUT_ENV).ok().as_deref())
|
||||
}
|
||||
|
||||
fn stream_chunk_timeout_secs_from_env(value: Option<&str>) -> u64 {
|
||||
value
|
||||
.and_then(|v| v.parse::<u64>().ok())
|
||||
.unwrap_or(DEFAULT_STREAM_CHUNK_TIMEOUT_SECS)
|
||||
.clamp(MIN_STREAM_CHUNK_TIMEOUT_SECS, MAX_STREAM_CHUNK_TIMEOUT_SECS)
|
||||
}
|
||||
/// Maximum total bytes of text/thinking content before aborting the stream.
|
||||
pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
|
||||
/// Sanity backstop for total stream wall-clock duration. **Not** a routine
|
||||
/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
|
||||
/// kill switch — the stream chunk idle timeout is the primary stall
|
||||
/// detector. The wall-clock cap is here only to bound pathological cases
|
||||
/// (e.g. a server that keeps sending heartbeats forever without progress).
|
||||
///
|
||||
@@ -135,3 +150,20 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn stream_chunk_timeout_defaults_and_clamps_env_values() {
|
||||
assert_eq!(stream_chunk_timeout_secs_from_env(None), 300);
|
||||
assert_eq!(
|
||||
stream_chunk_timeout_secs_from_env(Some("not-a-number")),
|
||||
300
|
||||
);
|
||||
assert_eq!(stream_chunk_timeout_secs_from_env(Some("0")), 1);
|
||||
assert_eq!(stream_chunk_timeout_secs_from_env(Some("90")), 90);
|
||||
assert_eq!(stream_chunk_timeout_secs_from_env(Some("99999")), 3600);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -327,7 +327,8 @@ impl Engine {
|
||||
// budget restarts with the fresh stream.
|
||||
let mut stream_start = Instant::now();
|
||||
let mut stream_content_bytes: usize = 0;
|
||||
let chunk_timeout = Duration::from_secs(STREAM_CHUNK_TIMEOUT_SECS);
|
||||
let chunk_timeout_secs = stream_chunk_timeout_secs();
|
||||
let chunk_timeout = Duration::from_secs(chunk_timeout_secs);
|
||||
let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS);
|
||||
|
||||
// Process stream events
|
||||
@@ -340,7 +341,7 @@ impl Engine {
|
||||
Ok(None) => None, // stream ended normally
|
||||
Err(_) => {
|
||||
let envelope = StreamError::Stall {
|
||||
timeout_secs: STREAM_CHUNK_TIMEOUT_SECS,
|
||||
timeout_secs: chunk_timeout_secs,
|
||||
}
|
||||
.into_envelope();
|
||||
crate::logging::warn(&envelope.message);
|
||||
|
||||
@@ -157,6 +157,7 @@ fallbacks after saved config and keyring credentials:
|
||||
- `DEEPSEEK_HTTP_HEADERS` (custom model request headers, comma-separated `name=value` pairs)
|
||||
- `DEEPSEEK_PROVIDER` (`deepseek|deepseek-cn|nvidia-nim|openai|openrouter|novita|fireworks|sglang|vllm|ollama`)
|
||||
- `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL`
|
||||
- `DEEPSEEK_STREAM_IDLE_TIMEOUT_SECS` (stream idle timeout in seconds; default `300`, clamped to `1..=3600`)
|
||||
- `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`)
|
||||
- `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL`
|
||||
- `NVIDIA_NIM_MODEL`
|
||||
|
||||
Reference in New Issue
Block a user