refactor(engine): isolate streaming state helpers
This commit is contained in:
+17
-183
@@ -32,9 +32,11 @@ use crate::error_taxonomy::{ErrorCategory, ErrorEnvelope, StreamError};
|
||||
use crate::features::{Feature, Features};
|
||||
use crate::llm_client::LlmClient;
|
||||
use crate::mcp::McpPool;
|
||||
#[cfg(test)]
|
||||
use crate::models::ToolCaller;
|
||||
use crate::models::{
|
||||
ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
|
||||
StreamEvent, SystemPrompt, Tool, ToolCaller, Usage,
|
||||
StreamEvent, SystemPrompt, Tool, Usage,
|
||||
};
|
||||
use crate::prompts;
|
||||
use crate::seam_manager::{SeamConfig, SeamManager};
|
||||
@@ -291,185 +293,7 @@ pub struct Engine {
|
||||
pending_lsp_blocks: Vec<crate::lsp::DiagnosticBlock>,
|
||||
}
|
||||
|
||||
// === Internal stream helpers ===
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
enum ContentBlockKind {
|
||||
Text,
|
||||
Thinking,
|
||||
ToolUse,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ToolUseState {
|
||||
id: String,
|
||||
name: String,
|
||||
input: serde_json::Value,
|
||||
caller: Option<ToolCaller>,
|
||||
input_buffer: String,
|
||||
}
|
||||
|
||||
/// Maximum time to wait for a single stream chunk before assuming a stall.
|
||||
/// **This is the idle timeout** — it resets on every SSE chunk, so long
|
||||
/// thinking turns that ARE producing reasoning_content stay alive. Only a
|
||||
/// genuine `chunk_timeout` window of silence kills the stream.
|
||||
const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
|
||||
/// Maximum total bytes of text/thinking content before aborting the stream.
|
||||
const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
|
||||
/// Sanity backstop for total stream wall-clock duration. **Not** a routine
|
||||
/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
|
||||
/// detector. The wall-clock cap is here only to bound pathological cases
|
||||
/// (e.g. a server that keeps sending heartbeats forever without progress).
|
||||
///
|
||||
/// History: this used to be 300s (5 min) which was too aggressive — V4
|
||||
/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock
|
||||
/// while still emitting reasoning_content chunks the whole way. Bumped to
|
||||
/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a
|
||||
/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but
|
||||
/// give the wall-clock a generous window so it never fires in practice.
|
||||
const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1)
|
||||
/// Hard cap on consecutive recoverable stream errors before we surface a turn
|
||||
/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults
|
||||
/// (#103) — keepalive should make spurious decode errors rarer, so we can
|
||||
/// tolerate a longer streak before giving up on the turn.
|
||||
const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5;
|
||||
/// Cap on transparent stream-level retries — these only happen when the wire
|
||||
/// dies before any content was streamed, so DeepSeek hasn't billed us and
|
||||
/// the user hasn't seen anything. Two attempts is enough to ride out a
|
||||
/// flaky edge node without amplifying real outages (#103).
|
||||
const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2;
|
||||
|
||||
/// Decide whether a stream error is eligible for a transparent retry.
|
||||
///
|
||||
/// True only when ALL three conditions hold:
|
||||
/// 1. No content has been received on the current attempt — otherwise DeepSeek
|
||||
/// has already billed us for output tokens and the user has seen partial
|
||||
/// deltas; resending would double-bill and desync the UI.
|
||||
/// 2. We still have transparent-retry budget remaining.
|
||||
/// 3. The turn has not been cancelled.
|
||||
///
|
||||
/// Extracted as a pure function so the four #103 retry cases can be exercised
|
||||
/// in unit tests without booting the full engine state machine.
|
||||
fn should_transparently_retry_stream(
|
||||
any_content_received: bool,
|
||||
transparent_attempts: u32,
|
||||
cancelled: bool,
|
||||
) -> bool {
|
||||
!any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
|
||||
}
|
||||
pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
|
||||
"[TOOL_CALL]",
|
||||
"<deepseek:tool_call",
|
||||
"<tool_call",
|
||||
"<invoke ",
|
||||
"<function_calls>",
|
||||
];
|
||||
|
||||
pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [
|
||||
"[/TOOL_CALL]",
|
||||
"</deepseek:tool_call>",
|
||||
"</tool_call>",
|
||||
"</invoke>",
|
||||
"</function_calls>",
|
||||
];
|
||||
|
||||
/// Compact one-shot notice emitted when a model attempts to forge a tool-call
|
||||
/// wrapper in plain text instead of using the API tool channel. The visible
|
||||
/// content is still scrubbed; this exists so the user can see why their text
|
||||
/// shrank.
|
||||
pub(crate) const FAKE_WRAPPER_NOTICE: &str =
|
||||
"Stripped non-API tool-call wrapper from model output (use the API tool channel)";
|
||||
|
||||
/// True if `text` contains any of the known fake-wrapper start markers. Used by
|
||||
/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`.
|
||||
pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool {
|
||||
TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m))
|
||||
}
|
||||
|
||||
fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> {
|
||||
markers
|
||||
.iter()
|
||||
.filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len())))
|
||||
.min_by_key(|(idx, _)| *idx)
|
||||
}
|
||||
|
||||
pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String {
|
||||
if delta.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut output = String::new();
|
||||
let mut rest = delta;
|
||||
|
||||
loop {
|
||||
if *in_tool_call {
|
||||
let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else {
|
||||
break;
|
||||
};
|
||||
rest = &rest[idx + len..];
|
||||
*in_tool_call = false;
|
||||
} else {
|
||||
let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else {
|
||||
output.push_str(rest);
|
||||
break;
|
||||
};
|
||||
output.push_str(&rest[..idx]);
|
||||
rest = &rest[idx + len..];
|
||||
*in_tool_call = true;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
|
||||
caller.map_or("direct", |c| c.caller_type.as_str())
|
||||
}
|
||||
|
||||
fn caller_allowed_for_tool(caller: Option<&ToolCaller>, tool_def: Option<&Tool>) -> bool {
|
||||
let requested = caller_type_for_tool_use(caller);
|
||||
if let Some(def) = tool_def
|
||||
&& let Some(allowed) = &def.allowed_callers
|
||||
{
|
||||
if allowed.is_empty() {
|
||||
return requested == "direct";
|
||||
}
|
||||
return allowed.iter().any(|item| item == requested);
|
||||
}
|
||||
requested == "direct"
|
||||
}
|
||||
|
||||
fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
|
||||
match err {
|
||||
ToolError::InvalidInput { message } => {
|
||||
format!("Invalid input for tool '{tool_name}': {message}")
|
||||
}
|
||||
ToolError::MissingField { field } => {
|
||||
format!("Tool '{tool_name}' is missing required field '{field}'")
|
||||
}
|
||||
ToolError::PathEscape { path } => format!(
|
||||
"Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.",
|
||||
path.display()
|
||||
),
|
||||
ToolError::ExecutionFailed { message } => message.clone(),
|
||||
ToolError::Timeout { seconds } => format!(
|
||||
"Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout."
|
||||
),
|
||||
ToolError::NotAvailable { message } => {
|
||||
let lower = message.to_ascii_lowercase();
|
||||
if lower.contains("current tool catalog") || lower.contains("did you mean:") {
|
||||
message.clone()
|
||||
} else {
|
||||
format!(
|
||||
"Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name."
|
||||
)
|
||||
}
|
||||
}
|
||||
ToolError::PermissionDenied { message } => format!(
|
||||
"Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission."
|
||||
),
|
||||
}
|
||||
}
|
||||
// === Internal tool helpers ===
|
||||
|
||||
impl Engine {
|
||||
fn reset_cancel_token(&mut self) {
|
||||
@@ -1850,6 +1674,7 @@ use context::{
|
||||
};
|
||||
mod dispatch;
|
||||
mod lsp_hooks;
|
||||
mod streaming;
|
||||
mod tool_catalog;
|
||||
mod tool_execution;
|
||||
mod tool_setup;
|
||||
@@ -1858,12 +1683,21 @@ mod turn_loop;
|
||||
use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision};
|
||||
use self::dispatch::{
|
||||
ParallelToolResult, ParallelToolResultEntry, ToolExecGuard, ToolExecOutcome, ToolExecutionPlan,
|
||||
final_tool_input, mcp_tool_approval_description, mcp_tool_is_parallel_safe,
|
||||
mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input,
|
||||
should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool,
|
||||
caller_allowed_for_tool, caller_type_for_tool_use, final_tool_input, format_tool_error,
|
||||
mcp_tool_approval_description, mcp_tool_is_parallel_safe, mcp_tool_is_read_only,
|
||||
parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first,
|
||||
should_parallelize_tool_batch, should_stop_after_plan_tool,
|
||||
};
|
||||
#[cfg(test)]
|
||||
use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
|
||||
#[cfg(test)]
|
||||
use self::streaming::TOOL_CALL_START_MARKERS;
|
||||
use self::streaming::{
|
||||
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
|
||||
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES,
|
||||
STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
|
||||
should_transparently_retry_stream,
|
||||
};
|
||||
use self::tool_catalog::{
|
||||
CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
|
||||
active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
|
||||
use serde_json::json;
|
||||
|
||||
use crate::models::ToolCaller;
|
||||
use crate::models::{Tool, ToolCaller};
|
||||
use crate::tools::spec::{ToolError, ToolResult};
|
||||
use crate::tui::app::AppMode;
|
||||
|
||||
@@ -71,6 +71,60 @@ pub(super) enum ToolExecGuard<'a> {
|
||||
Write(#[allow(dead_code)] tokio::sync::RwLockWriteGuard<'a, ()>),
|
||||
}
|
||||
|
||||
// === Caller policy and errors ========================================
|
||||
|
||||
pub(super) fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
|
||||
caller.map_or("direct", |c| c.caller_type.as_str())
|
||||
}
|
||||
|
||||
pub(super) fn caller_allowed_for_tool(
|
||||
caller: Option<&ToolCaller>,
|
||||
tool_def: Option<&Tool>,
|
||||
) -> bool {
|
||||
let requested = caller_type_for_tool_use(caller);
|
||||
if let Some(def) = tool_def
|
||||
&& let Some(allowed) = &def.allowed_callers
|
||||
{
|
||||
if allowed.is_empty() {
|
||||
return requested == "direct";
|
||||
}
|
||||
return allowed.iter().any(|item| item == requested);
|
||||
}
|
||||
requested == "direct"
|
||||
}
|
||||
|
||||
pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
|
||||
match err {
|
||||
ToolError::InvalidInput { message } => {
|
||||
format!("Invalid input for tool '{tool_name}': {message}")
|
||||
}
|
||||
ToolError::MissingField { field } => {
|
||||
format!("Tool '{tool_name}' is missing required field '{field}'")
|
||||
}
|
||||
ToolError::PathEscape { path } => format!(
|
||||
"Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.",
|
||||
path.display()
|
||||
),
|
||||
ToolError::ExecutionFailed { message } => message.clone(),
|
||||
ToolError::Timeout { seconds } => format!(
|
||||
"Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout."
|
||||
),
|
||||
ToolError::NotAvailable { message } => {
|
||||
let lower = message.to_ascii_lowercase();
|
||||
if lower.contains("current tool catalog") || lower.contains("did you mean:") {
|
||||
message.clone()
|
||||
} else {
|
||||
format!(
|
||||
"Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name."
|
||||
)
|
||||
}
|
||||
}
|
||||
ToolError::PermissionDenied { message } => format!(
|
||||
"Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission."
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
// === Streaming-buffer parsing =========================================
|
||||
|
||||
/// Promote a streaming `ToolUseState` to a finalized JSON input.
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
//! Streaming response state and guardrails.
|
||||
//!
|
||||
//! This module owns the local state used while decoding one model stream:
|
||||
//! content block kind tracking, streamed tool-use buffers, transparent retry
|
||||
//! policy, and scrubbers for text that looks like a forged tool-call wrapper.
|
||||
|
||||
use crate::models::ToolCaller;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum ContentBlockKind {
|
||||
Text,
|
||||
Thinking,
|
||||
ToolUse,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(super) struct ToolUseState {
|
||||
pub(super) id: String,
|
||||
pub(super) name: String,
|
||||
pub(super) input: serde_json::Value,
|
||||
pub(super) caller: Option<ToolCaller>,
|
||||
pub(super) input_buffer: String,
|
||||
}
|
||||
|
||||
/// Maximum time to wait for a single stream chunk before assuming a stall.
|
||||
/// **This is the idle timeout** — it resets on every SSE chunk, so long
|
||||
/// thinking turns that ARE producing reasoning_content stay alive. Only a
|
||||
/// genuine `chunk_timeout` window of silence kills the stream.
|
||||
pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
|
||||
/// Maximum total bytes of text/thinking content before aborting the stream.
|
||||
pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
|
||||
/// Sanity backstop for total stream wall-clock duration. **Not** a routine
|
||||
/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
|
||||
/// detector. The wall-clock cap is here only to bound pathological cases
|
||||
/// (e.g. a server that keeps sending heartbeats forever without progress).
|
||||
///
|
||||
/// History: this used to be 300s (5 min) which was too aggressive — V4
|
||||
/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock
|
||||
/// while still emitting reasoning_content chunks the whole way. Bumped to
|
||||
/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a
|
||||
/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but
|
||||
/// give the wall-clock a generous window so it never fires in practice.
|
||||
pub(super) const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1)
|
||||
/// Hard cap on consecutive recoverable stream errors before we surface a turn
|
||||
/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults
|
||||
/// (#103) — keepalive should make spurious decode errors rarer, so we can
|
||||
/// tolerate a longer streak before giving up on the turn.
|
||||
pub(super) const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5;
|
||||
/// Cap on transparent stream-level retries — these only happen when the wire
|
||||
/// dies before any content was streamed, so DeepSeek hasn't billed us and
|
||||
/// the user hasn't seen anything. Two attempts is enough to ride out a
|
||||
/// flaky edge node without amplifying real outages (#103).
|
||||
pub(super) const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2;
|
||||
|
||||
/// Decide whether a stream error is eligible for a transparent retry.
|
||||
///
|
||||
/// True only when ALL three conditions hold:
|
||||
/// 1. No content has been received on the current attempt — otherwise DeepSeek
|
||||
/// has already billed us for output tokens and the user has seen partial
|
||||
/// deltas; resending would double-bill and desync the UI.
|
||||
/// 2. We still have transparent-retry budget remaining.
|
||||
/// 3. The turn has not been cancelled.
|
||||
///
|
||||
/// Extracted as a pure function so the four #103 retry cases can be exercised
|
||||
/// in unit tests without booting the full engine state machine.
|
||||
pub(super) fn should_transparently_retry_stream(
|
||||
any_content_received: bool,
|
||||
transparent_attempts: u32,
|
||||
cancelled: bool,
|
||||
) -> bool {
|
||||
!any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
|
||||
}
|
||||
|
||||
pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
|
||||
"[TOOL_CALL]",
|
||||
"<deepseek:tool_call",
|
||||
"<tool_call",
|
||||
"<invoke ",
|
||||
"<function_calls>",
|
||||
];
|
||||
|
||||
pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [
|
||||
"[/TOOL_CALL]",
|
||||
"</deepseek:tool_call>",
|
||||
"</tool_call>",
|
||||
"</invoke>",
|
||||
"</function_calls>",
|
||||
];
|
||||
|
||||
/// Compact one-shot notice emitted when a model attempts to forge a tool-call
|
||||
/// wrapper in plain text instead of using the API tool channel. The visible
|
||||
/// content is still scrubbed; this exists so the user can see why their text
|
||||
/// shrank.
|
||||
pub(crate) const FAKE_WRAPPER_NOTICE: &str =
|
||||
"Stripped non-API tool-call wrapper from model output (use the API tool channel)";
|
||||
|
||||
/// True if `text` contains any of the known fake-wrapper start markers. Used by
|
||||
/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`.
|
||||
pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool {
|
||||
TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m))
|
||||
}
|
||||
|
||||
fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> {
|
||||
markers
|
||||
.iter()
|
||||
.filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len())))
|
||||
.min_by_key(|(idx, _)| *idx)
|
||||
}
|
||||
|
||||
pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String {
|
||||
if delta.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
|
||||
let mut output = String::new();
|
||||
let mut rest = delta;
|
||||
|
||||
loop {
|
||||
if *in_tool_call {
|
||||
let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else {
|
||||
break;
|
||||
};
|
||||
rest = &rest[idx + len..];
|
||||
*in_tool_call = false;
|
||||
} else {
|
||||
let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else {
|
||||
output.push_str(rest);
|
||||
break;
|
||||
};
|
||||
output.push_str(&rest[..idx]);
|
||||
rest = &rest[idx + len..];
|
||||
*in_tool_call = true;
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
Reference in New Issue
Block a user