feat(subagents): make step API timeout configurable
This commit is contained in:
@@ -146,6 +146,7 @@ max_subagents = 10 # optional (1-20)
|
||||
# Optional sub-agent tuning. max_concurrent overrides top-level max_subagents.
|
||||
# [subagents]
|
||||
# max_concurrent = 10
|
||||
# api_timeout_secs = 120 # per-step API timeout, clamped to 1..=1800
|
||||
|
||||
# Optional managed policy paths (defaults to /etc/deepseek/*.toml on unix):
|
||||
# managed_config_path = "/etc/deepseek/managed_config.toml"
|
||||
|
||||
@@ -19,6 +19,18 @@ use crate::hooks::HooksConfig;
|
||||
|
||||
pub const DEFAULT_MAX_SUBAGENTS: usize = 10;
|
||||
pub const MAX_SUBAGENTS: usize = 20;
|
||||
/// Default per-step DeepSeek API timeout for sub-agent requests, in seconds.
|
||||
/// Matches the legacy hardcoded value so existing configs keep their old
|
||||
/// behavior when `[subagents] api_timeout_secs` is unset (#1806, #1808).
|
||||
pub const DEFAULT_SUBAGENT_API_TIMEOUT_SECS: u64 = 120;
|
||||
/// Minimum accepted `[subagents] api_timeout_secs`. Anything lower (including
|
||||
/// `0`, which would otherwise produce an immediate timeout footgun) clamps
|
||||
/// up to this value before the runtime sees it.
|
||||
pub const MIN_SUBAGENT_API_TIMEOUT_SECS: u64 = 1;
|
||||
/// Maximum accepted `[subagents] api_timeout_secs` (30 minutes). The cap
|
||||
/// keeps a misconfigured per-step timeout from masking real model/network
|
||||
/// hangs forever.
|
||||
pub const MAX_SUBAGENT_API_TIMEOUT_SECS: u64 = 1800;
|
||||
pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro";
|
||||
pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
|
||||
pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro";
|
||||
@@ -879,6 +891,14 @@ pub struct SubagentsConfig {
|
||||
/// setting. Clamped to [1, MAX_SUBAGENTS].
|
||||
#[serde(default)]
|
||||
pub max_concurrent: Option<usize>,
|
||||
/// Per-step DeepSeek API timeout for sub-agent requests, in seconds. The
|
||||
/// timeout wraps `client.create_message` so a stuck single step cannot
|
||||
/// pin the parent's parent-completion wakeup channel indefinitely.
|
||||
/// Defaults to `DEFAULT_SUBAGENT_API_TIMEOUT_SECS` (120) and is clamped
|
||||
/// to `MIN_SUBAGENT_API_TIMEOUT_SECS..=MAX_SUBAGENT_API_TIMEOUT_SECS`
|
||||
/// (1..=1800). Zero or unset uses the legacy 120s default (#1806, #1808).
|
||||
#[serde(default)]
|
||||
pub api_timeout_secs: Option<u64>,
|
||||
}
|
||||
|
||||
/// `[auto]` table — knobs for the `--model auto` / `/model auto` router.
|
||||
@@ -1815,6 +1835,27 @@ impl Config {
|
||||
.clamp(1, MAX_SUBAGENTS)
|
||||
}
|
||||
|
||||
/// Resolved per-step DeepSeek API timeout for sub-agents, in seconds.
|
||||
///
|
||||
/// Reads `[subagents] api_timeout_secs` and clamps to
|
||||
/// `[MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS]`
|
||||
/// (1..=1800). `None` or `0` resolve to the legacy
|
||||
/// `DEFAULT_SUBAGENT_API_TIMEOUT_SECS` (120) so existing configs keep
|
||||
/// their old behavior; explicit `1` is honored, useful only in fast
|
||||
/// fail-fast tests, not production (#1806, #1808).
|
||||
#[must_use]
|
||||
pub fn subagent_api_timeout_secs(&self) -> u64 {
|
||||
let raw = self
|
||||
.subagents
|
||||
.as_ref()
|
||||
.and_then(|cfg| cfg.api_timeout_secs)
|
||||
.unwrap_or(DEFAULT_SUBAGENT_API_TIMEOUT_SECS);
|
||||
if raw == 0 {
|
||||
return DEFAULT_SUBAGENT_API_TIMEOUT_SECS;
|
||||
}
|
||||
raw.clamp(MIN_SUBAGENT_API_TIMEOUT_SECS, MAX_SUBAGENT_API_TIMEOUT_SECS)
|
||||
}
|
||||
|
||||
/// Raw sub-agent model override map. Values are validated at spawn time
|
||||
/// so an invalid role/type model fails before any partial agent spawn.
|
||||
#[must_use]
|
||||
@@ -3970,6 +4011,47 @@ mod tests {
|
||||
assert_eq!(high.max_subagents(), MAX_SUBAGENTS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_api_timeout_defaults_and_clamps() {
|
||||
assert_eq!(
|
||||
Config::default().subagent_api_timeout_secs(),
|
||||
DEFAULT_SUBAGENT_API_TIMEOUT_SECS
|
||||
);
|
||||
|
||||
let zero = Config {
|
||||
subagents: Some(SubagentsConfig {
|
||||
api_timeout_secs: Some(0),
|
||||
..SubagentsConfig::default()
|
||||
}),
|
||||
..Config::default()
|
||||
};
|
||||
assert_eq!(
|
||||
zero.subagent_api_timeout_secs(),
|
||||
DEFAULT_SUBAGENT_API_TIMEOUT_SECS
|
||||
);
|
||||
|
||||
let explicit_min = Config {
|
||||
subagents: Some(SubagentsConfig {
|
||||
api_timeout_secs: Some(MIN_SUBAGENT_API_TIMEOUT_SECS),
|
||||
..SubagentsConfig::default()
|
||||
}),
|
||||
..Config::default()
|
||||
};
|
||||
assert_eq!(explicit_min.subagent_api_timeout_secs(), 1);
|
||||
|
||||
let high = Config {
|
||||
subagents: Some(SubagentsConfig {
|
||||
api_timeout_secs: Some(MAX_SUBAGENT_API_TIMEOUT_SECS + 60),
|
||||
..SubagentsConfig::default()
|
||||
}),
|
||||
..Config::default()
|
||||
};
|
||||
assert_eq!(
|
||||
high.subagent_api_timeout_secs(),
|
||||
MAX_SUBAGENT_API_TIMEOUT_SECS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn save_api_key_writes_config_file_under_cfg_test() -> Result<()> {
|
||||
// `save_api_key` writes to the shared user config file. This
|
||||
|
||||
@@ -166,6 +166,11 @@ pub struct EngineConfig {
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo.
|
||||
pub search_api_key: Option<String>,
|
||||
/// Per-step DeepSeek API timeout for sub-agent `create_message` requests.
|
||||
/// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800)
|
||||
/// once at engine construction, then threaded onto every
|
||||
/// `SubAgentRuntime` the engine builds (#1806, #1808).
|
||||
pub subagent_api_timeout: Duration,
|
||||
}
|
||||
|
||||
impl Default for EngineConfig {
|
||||
@@ -206,6 +211,9 @@ impl Default for EngineConfig {
|
||||
workshop: None,
|
||||
search_provider: crate::config::SearchProvider::default(),
|
||||
search_api_key: None,
|
||||
subagent_api_timeout: Duration::from_secs(
|
||||
crate::config::DEFAULT_SUBAGENT_API_TIMEOUT_SECS,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -656,6 +664,7 @@ impl Engine {
|
||||
self.session.reasoning_effort_auto,
|
||||
)
|
||||
.with_max_spawn_depth(self.config.max_spawn_depth)
|
||||
.with_step_api_timeout(self.config.subagent_api_timeout)
|
||||
.background_runtime();
|
||||
let route = resolve_subagent_assignment_route(&runtime, None, &prompt).await;
|
||||
runtime.model = route.model;
|
||||
@@ -1063,6 +1072,7 @@ impl Engine {
|
||||
self.session.reasoning_effort_auto,
|
||||
)
|
||||
.with_max_spawn_depth(self.config.max_spawn_depth)
|
||||
.with_step_api_timeout(self.config.subagent_api_timeout)
|
||||
.with_parent_completion_tx(self.tx_subagent_completion.clone());
|
||||
if let Some(context) = fork_context_for_runtime.clone() {
|
||||
rt = rt.with_fork_context(context);
|
||||
|
||||
@@ -4679,6 +4679,7 @@ async fn run_exec_agent(
|
||||
lsp_config,
|
||||
runtime_services: crate::tools::spec::RuntimeToolServices::default(),
|
||||
subagent_model_overrides: config.subagent_model_overrides(),
|
||||
subagent_api_timeout: std::time::Duration::from_secs(config.subagent_api_timeout_secs()),
|
||||
memory_enabled: config.memory_enabled(),
|
||||
memory_path: config.memory_path(),
|
||||
vision_config: config.vision_model_config(),
|
||||
|
||||
@@ -1964,6 +1964,9 @@ impl RuntimeThreadManager {
|
||||
rlm_sessions: crate::rlm::session::new_shared_rlm_session_store(),
|
||||
},
|
||||
subagent_model_overrides: self.config.subagent_model_overrides(),
|
||||
subagent_api_timeout: std::time::Duration::from_secs(
|
||||
self.config.subagent_api_timeout_secs(),
|
||||
),
|
||||
memory_enabled: self.config.memory_enabled(),
|
||||
memory_path: self.config.memory_path(),
|
||||
vision_config: self.config.vision_model_config(),
|
||||
|
||||
@@ -67,7 +67,13 @@ const TOOL_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
/// Per-step LLM API call timeout. Each `create_message` request must complete
|
||||
/// within this window or the step is treated as timed out. Prevents a single
|
||||
/// stuck API call from blocking the sub-agent indefinitely.
|
||||
const STEP_API_TIMEOUT: Duration = Duration::from_secs(120);
|
||||
/// Legacy fallback for the per-step DeepSeek API timeout. The active timeout
|
||||
/// now travels on `SubAgentRuntime::step_api_timeout` so users can override
|
||||
/// it via `[subagents] api_timeout_secs` in `~/.deepseek/config.toml`. The
|
||||
/// constant only exists for tests/stub runtimes that need a hard-coded
|
||||
/// default; production runtimes set the field explicitly (#1806, #1808).
|
||||
const DEFAULT_STEP_API_TIMEOUT: Duration =
|
||||
Duration::from_secs(crate::config::DEFAULT_SUBAGENT_API_TIMEOUT_SECS);
|
||||
const RESULT_POLL_INTERVAL: Duration = Duration::from_millis(250);
|
||||
const DEFAULT_RESULT_TIMEOUT_MS: u64 = 30_000;
|
||||
#[allow(dead_code)] // Legacy agent_wait clamp; new agent_eval uses DEFAULT/MAX.
|
||||
@@ -643,6 +649,12 @@ pub struct SubAgentRuntime {
|
||||
pub parent_completion_tx: Option<mpsc::UnboundedSender<SubAgentCompletion>>,
|
||||
/// Snapshot of the request prefix visible to an opt-in forked child.
|
||||
pub fork_context: Option<SubAgentForkContext>,
|
||||
/// Per-step DeepSeek API timeout for the child's `create_message` call.
|
||||
/// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800) at
|
||||
/// engine construction so a slow but legitimate model turn does not
|
||||
/// false-timeout the child mid-thinking. `child_runtime()` and
|
||||
/// `background_runtime()` preserve the parent's value (#1806, #1808).
|
||||
pub step_api_timeout: Duration,
|
||||
}
|
||||
|
||||
impl SubAgentRuntime {
|
||||
@@ -676,9 +688,20 @@ impl SubAgentRuntime {
|
||||
mailbox: None,
|
||||
parent_completion_tx: None,
|
||||
fork_context: None,
|
||||
step_api_timeout: DEFAULT_STEP_API_TIMEOUT,
|
||||
}
|
||||
}
|
||||
|
||||
/// Override the per-step DeepSeek API timeout (default
|
||||
/// `DEFAULT_STEP_API_TIMEOUT`). Called by the engine after reading
|
||||
/// `[subagents] api_timeout_secs`. Tests may use this to fail fast
|
||||
/// without waiting the legacy 120 seconds (#1806, #1808).
|
||||
#[must_use]
|
||||
pub fn with_step_api_timeout(mut self, timeout: Duration) -> Self {
|
||||
self.step_api_timeout = timeout;
|
||||
self
|
||||
}
|
||||
|
||||
/// Attach the wakeup channel so the engine's parent turn loop can resume
|
||||
/// when this runtime's direct children finish (issue #756). The channel
|
||||
/// is propagated to descendants via clone, but only `spawn_depth == 1`
|
||||
@@ -799,6 +822,7 @@ impl SubAgentRuntime {
|
||||
mailbox: self.mailbox.clone(),
|
||||
parent_completion_tx: self.parent_completion_tx.clone(),
|
||||
fork_context: self.fork_context.clone(),
|
||||
step_api_timeout: self.step_api_timeout,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3534,8 +3558,8 @@ async fn run_subagent(
|
||||
from_prior_session: false,
|
||||
});
|
||||
}
|
||||
api = tokio::time::timeout(STEP_API_TIMEOUT, runtime.client.create_message(request)) => {
|
||||
api.map_err(|_| anyhow!("API call timed out after {}s", STEP_API_TIMEOUT.as_secs()))??
|
||||
api = tokio::time::timeout(runtime.step_api_timeout, runtime.client.create_message(request)) => {
|
||||
api.map_err(|_| anyhow!("API call timed out after {}s", runtime.step_api_timeout.as_secs()))??
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -1164,6 +1164,7 @@ fn child_runtime_increments_depth_and_preserves_auto_approve() {
|
||||
parent.context.auto_approve = false; // parent in suggest mode
|
||||
let child = parent.child_runtime();
|
||||
assert_eq!(child.spawn_depth, 2, "child depth = parent + 1");
|
||||
assert_eq!(child.step_api_timeout, DEFAULT_STEP_API_TIMEOUT);
|
||||
assert!(
|
||||
!child.context.auto_approve,
|
||||
"child must inherit parent approval state"
|
||||
@@ -1178,6 +1179,18 @@ fn child_runtime_increments_depth_and_preserves_auto_approve() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn child_and_background_runtimes_preserve_step_api_timeout() {
|
||||
let timeout = Duration::from_secs(7);
|
||||
let parent = stub_runtime().with_step_api_timeout(timeout);
|
||||
|
||||
let child = parent.child_runtime();
|
||||
assert_eq!(child.step_api_timeout, timeout);
|
||||
|
||||
let background = parent.background_runtime();
|
||||
assert_eq!(background.step_api_timeout, timeout);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn subagent_registry_blocks_approval_tools_without_parent_auto_approve() {
|
||||
let mut runtime = stub_runtime();
|
||||
@@ -1434,6 +1447,7 @@ fn stub_runtime() -> SubAgentRuntime {
|
||||
mailbox: None,
|
||||
parent_completion_tx: None,
|
||||
fork_context: None,
|
||||
step_api_timeout: DEFAULT_STEP_API_TIMEOUT,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -707,6 +707,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig {
|
||||
.map(crate::config::LspConfigToml::into_runtime),
|
||||
runtime_services: app.runtime_services.clone(),
|
||||
subagent_model_overrides: config.subagent_model_overrides(),
|
||||
subagent_api_timeout: Duration::from_secs(config.subagent_api_timeout_secs()),
|
||||
memory_enabled: config.memory_enabled(),
|
||||
memory_path: config.memory_path(),
|
||||
vision_config: config.vision_model_config(),
|
||||
|
||||
@@ -447,12 +447,14 @@ If you are upgrading from older releases:
|
||||
related persistent sub-agent sessions. Explicit tool `model` values win, then role/type
|
||||
overrides, then the parent runtime model. Supported convenience keys are
|
||||
`default_model`, `worker_model`, `explorer_model`, `awaiter_model`,
|
||||
`review_model`, `custom_model`, and `max_concurrent`. The
|
||||
`review_model`, `custom_model`, `max_concurrent`, and `api_timeout_secs`. The
|
||||
`[subagents] max_concurrent` value overrides top-level `max_subagents` and is
|
||||
also clamped to `1..=20`. `[subagents.models]` accepts lower-case role or type
|
||||
keys such as `worker`, `explorer`, `general`, `explore`, `plan`, and
|
||||
`review`. Values must normalize to a supported DeepSeek model id before an
|
||||
agent is spawned.
|
||||
also clamped to `1..=20`; `[subagents] api_timeout_secs` controls the
|
||||
per-step API timeout for sub-agent model calls and is clamped to `1..=1800`,
|
||||
with `0` or unset preserving the legacy 120 second default.
|
||||
`[subagents.models]` accepts lower-case role or type keys such as `worker`,
|
||||
`explorer`, `general`, `explore`, `plan`, and `review`. Values must normalize
|
||||
to a supported DeepSeek model id before an agent is spawned.
|
||||
- `skills_dir` (string, optional): defaults to `~/.deepseek/skills` (each skill is
|
||||
a directory containing `SKILL.md`). Workspace-local `.agents/skills` or
|
||||
`./skills` are preferred when present; the runtime also discovers global
|
||||
|
||||
Reference in New Issue
Block a user