From a06bbe57a6def8e7d80b5c95d642dfe8fa48ec7e Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Tue, 26 May 2026 13:09:48 -0500 Subject: [PATCH] fix(tools): replace cross-await RwLock with Semaphore to prevent deadlock Replace `Arc>` in ToolCallRuntime with `Arc` to eliminate the risk of a tool re-entering and deadlocking on the same lock. Parallel tools now acquire then immediately drop the permit, allowing concurrent execution after any in-flight serial tool finishes. Serial tools hold the permit for the full duration. Fixes #2157. Harvested from #1856. Co-authored-by: Fire-dtx <58944505+Fire-dtx@users.noreply.github.com> --- crates/tools/src/lib.rs | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/crates/tools/src/lib.rs b/crates/tools/src/lib.rs index a7179410..050b840f 100644 --- a/crates/tools/src/lib.rs +++ b/crates/tools/src/lib.rs @@ -8,7 +8,7 @@ use async_trait::async_trait; use codewhale_protocol::{ToolKind, ToolOutput, ToolPayload}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use tokio::sync::RwLock; +use tokio::sync::Semaphore; /// Capabilities that a tool may have or require. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -309,9 +309,19 @@ pub trait ToolHandler: Send + Sync { ) -> std::result::Result; } -#[derive(Debug, Default)] +#[derive(Debug)] pub struct ToolCallRuntime { - pub parallel_execution: Arc>, + /// Serialise non-parallel tool executions. Capacity 1 ensures at most one + /// serial tool runs at a time, and blocks parallel tools while it runs. + serial_semaphore: Arc, +} + +impl Default for ToolCallRuntime { + fn default() -> Self { + Self { + serial_semaphore: Arc::new(Semaphore::new(1)), + } + } } #[derive(Default)] @@ -380,13 +390,20 @@ impl ToolRegistry { }; if configured.supports_parallel_tool_calls { - let _guard = self.runtime.parallel_execution.read().await; + // Parallel tools wait for any in-flight serial tool to finish, + // but do not hold the permit so other parallel tools may run concurrently. + drop(self.runtime.serial_semaphore.acquire().await + .map_err(|_| FunctionCallError::Cancelled { name: call.name })?); self.execute_with_timeout(handler, configured.spec.timeout_ms, invocation) .await } else { - let _guard = self.runtime.parallel_execution.write().await; + // Serial tools hold the semaphore for the full execution duration, + // preventing other serial AND parallel tools from starting. + let _permit = self.runtime.serial_semaphore.acquire().await + .map_err(|_| FunctionCallError::Cancelled { name: call.name })?; self.execute_with_timeout(handler, configured.spec.timeout_ms, invocation) .await + // _permit dropped here, releasing the semaphore. } }