From 55024a16d8118403b4999e565fd486b48a394d3d Mon Sep 17 00:00:00 2001
From: Hunter B <hmbown@gmail.com>
Date: Wed, 3 Jun 2026 21:43:18 -0700
Subject: [PATCH] fix(subagent): inherit tool-agent model route

Harvested from PR #2736 by @h3c-hexin.

Co-authored-by: h3c-hexin <13790929+h3c-hexin@users.noreply.github.com>
---
 CHANGELOG.md                           | 12 ++++++++----
 crates/tui/src/client.rs               | 23 +++++++++++++++++++++++
 crates/tui/src/tools/subagent/mod.rs   |  9 ++++++---
 crates/tui/src/tools/subagent/tests.rs | 25 ++++++++++++++++++++++---
 docs/V0_9_0_EXECUTION_MAP.md           |  5 +++--
 5 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cb088d30..f8695693 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,18 +57,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Explicit `skills_dir` configuration is now unioned with workspace skill
   discovery instead of being shadowed by workspace-local skills, and configured
   skills take precedence over global defaults when prompt space is constrained.
+- Tool-agent sub-agent routing now inherits the parent session model, or an
+  explicit tool-agent override, instead of hard-coding `deepseek-v4-flash`;
+  the fast lane still disables thinking through provider-aware request shaping.
 
 ### Community
 
-Thanks to **@cyq1017** for the restore-listing implementation (#2513) and
-**@wywsoor** for the broader macOS/iTerm rollback UX report (#2494), and
+Thanks to **@cyq1017** for the restore-listing implementation (#2513),
+**@wywsoor** for the broader macOS/iTerm rollback UX report (#2494),
 **@HUQIANTAO** for the `web_run` lock-splitting work (#2502) and turn-metadata
 prefix-cache stability work (#2517), **@xyuai** for canonical CodeWhale
 settings-path migration work (#2730), **@gaord** for the runtime thread
 workspace update API (#2640), **@shenjackyuanjie** for the
-HarmonyOS/OpenHarmony port and MatePad Edge validation trail (#2634), and
+HarmonyOS/OpenHarmony port and MatePad Edge validation trail (#2634),
 **@idling11** for the PlanArtifact direction in Plan mode (#2733), and
-**@h3c-hexin** for the configured `skills_dir` merge fix (#2737).
+**@h3c-hexin** for the tool-agent model inheritance and configured
+`skills_dir` fixes (#2736, #2737).
 
 ## [0.8.53] - 2026-06-03
 
diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index 4f727de8..e53f1877 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -2437,6 +2437,29 @@ mod tests {
         assert!(body.get("extra_body").is_none());
     }
 
+    #[test]
+    fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() {
+        for provider in [
+            ApiProvider::Openai,
+            ApiProvider::Atlascloud,
+            ApiProvider::WanjieArk,
+            ApiProvider::Arcee,
+            ApiProvider::Huggingface,
+            ApiProvider::Moonshot,
+            ApiProvider::Ollama,
+            ApiProvider::Fireworks,
+        ] {
+            let mut body = json!({});
+            apply_reasoning_effort(&mut body, Some("off"), provider);
+
+            assert_eq!(
+                body,
+                json!({}),
+                "provider {provider:?} should not receive unsupported reasoning-off fields"
+            );
+        }
+    }
+
     #[test]
     fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() {
         let mut body = json!({});
diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs
index 9713a156..80bdf3e3 100644
--- a/crates/tui/src/tools/subagent/mod.rs
+++ b/crates/tui/src/tools/subagent/mod.rs
@@ -4697,7 +4697,7 @@ pub(crate) async fn resolve_subagent_assignment_route(
     agent_type: &SubAgentType,
 ) -> SubAgentResolvedRoute {
     if matches!(agent_type, SubAgentType::ToolAgent) {
-        return tool_agent_route();
+        return tool_agent_route(&runtime.model, configured_model);
     }
 
     let explicit_model = configured_model.is_some();
@@ -4720,9 +4720,12 @@ pub(crate) async fn resolve_subagent_assignment_route(
     route
 }
 
-fn tool_agent_route() -> SubAgentResolvedRoute {
+fn tool_agent_route(parent_model: &str, configured_model: Option<String>) -> SubAgentResolvedRoute {
     SubAgentResolvedRoute {
-        model: "deepseek-v4-flash".to_string(),
+        // The tool-agent fast lane is defined by disabling thinking, not by a
+        // DeepSeek-specific model id. Honor explicit role/spawn overrides when
+        // present, otherwise inherit the already provider-resolved parent model.
+        model: configured_model.unwrap_or_else(|| parent_model.to_string()),
         reasoning_effort: Some("off".to_string()),
     }
 }
diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs
index 097b2323..0ed21884 100644
--- a/crates/tui/src/tools/subagent/tests.rs
+++ b/crates/tui/src/tools/subagent/tests.rs
@@ -845,10 +845,28 @@ fn subagent_auto_route_respects_explicit_or_role_model() {
 }
 
 #[tokio::test]
-async fn tool_agent_route_forces_flash_with_thinking_off() {
-    let runtime = stub_runtime()
+async fn tool_agent_route_inherits_parent_model_with_thinking_off() {
+    let mut runtime = stub_runtime()
         .with_auto_model(false)
         .with_reasoning_effort(Some("max".to_string()), false);
+    runtime.model = "local-provider/tool-fast".to_string();
+
+    let route = resolve_subagent_assignment_route(
+        &runtime,
+        None,
+        "run OCR on this screenshot",
+        &SubAgentType::ToolAgent,
+    )
+    .await;
+
+    assert_eq!(route.model, "local-provider/tool-fast");
+    assert_eq!(route.reasoning_effort.as_deref(), Some("off"));
+}
+
+#[tokio::test]
+async fn tool_agent_route_respects_explicit_model_with_thinking_off() {
+    let mut runtime = stub_runtime().with_auto_model(false);
+    runtime.model = "local-provider/tool-fast".to_string();
 
     let route = resolve_subagent_assignment_route(
         &runtime,
@@ -858,7 +876,7 @@ async fn tool_agent_route_forces_flash_with_thinking_off() {
     )
     .await;
 
-    assert_eq!(route.model, "deepseek-v4-flash");
+    assert_eq!(route.model, "deepseek-v4-pro");
     assert_eq!(route.reasoning_effort.as_deref(), Some("off"));
 }
 
@@ -2210,6 +2228,7 @@ fn stub_runtime() -> SubAgentRuntime {
 /// `Option<...>`. `Config::default()` is enough — `DeepSeekClient::new`
 /// only validates that an API key field exists, not that the key works.
 fn stub_client() -> DeepSeekClient {
+    let _ = rustls::crypto::ring::default_provider().install_default();
     let config = crate::config::Config {
         api_key: Some("test-key".to_string()),
         ..crate::config::Config::default()
diff --git a/docs/V0_9_0_EXECUTION_MAP.md b/docs/V0_9_0_EXECUTION_MAP.md
index 04f4b96c..af246522 100644
--- a/docs/V0_9_0_EXECUTION_MAP.md
+++ b/docs/V0_9_0_EXECUTION_MAP.md
@@ -46,6 +46,7 @@ harvest/stewardship commits:
 | Contributor credit plumbing | Added locally after the co-author audit. | Normalized unpushed harvest author/trailer emails to numeric GitHub noreply identities, added `.github/AUTHOR_MAP`, and wired `scripts/check-coauthor-trailers.py` into CI so future `Harvested from PR #N by @handle` commits require machine-readable credit. |
 | #2640 workspace field on UpdateThreadRequest | Harvested with the stale-engine fix restored. | Added `workspace` to `PATCH /v1/threads/{id}`, rejects empty paths, rejects workspace changes during active turns, and evicts idle cached engines so the next turn uses the new workspace. `cargo test -p codewhale-tui --bin codewhale-tui --locked update_thread_workspace -- --nocapture` and `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
 | #2733 PlanArtifact for Plan mode | Locally harvested as a broader continuity-artifact slice. | Added rich `update_plan` fields for objective, context, sources, files, constraints, verification, risks, and handoff notes; renders them in the transcript card and Plan confirmation prompt; preserves them through `/relay`, fork-state, and saved-session replay. `cargo test -p codewhale-tui --bin codewhale-tui --locked plan_ -- --nocapture`, `cargo test -p codewhale-tui --bin codewhale-tui --locked relay_slash_command_routes_to_session_relay_instruction -- --nocapture`, and `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. |
+| #2736 sub-agent model inheritance | Locally harvested with explicit-override and provider-shaping tests. | Tool-agent routing now inherits the parent runtime model instead of hard-coding `deepseek-v4-flash`, while explicit DeepSeek-style tool-agent overrides still win. The `reasoning_effort = off` fast lane is covered by strict OpenAI-like provider request-shaping tests. Credit @h3c-hexin; comment/close the original after the integration branch is public. |
 | #2737 configured `skills_dir` discovery | Locally harvested with explicit-config precedence. | The system prompt now unions workspace-discovered skills and configured `skills_dir` skills instead of treating the configured directory as a fallback. Explicit configured skills are inserted before global defaults so they are not lost behind a large global skill library. Credit @h3c-hexin; comment/close the original after the integration branch is public. |
 | #2636 project-context mtime cache | Defer direct merge; harvest only after cache key/signature is widened. | Must include constitution changes, auto-generated context deletion, canonical path equivalence, and overwrite detection before landing. |
 | #2634 HarmonyOS port | Locally harvested with additional Nix-chain clearance; keep credited and do not close until the integration branch is public. | User-supplied MatePad Edge demo (`https://bilibili.com/video/av116689597368905`) confirms real-device interest. Added env-driven OpenHarmony SDK setup, OHOS platform guards/fallbacks, self-update disablement, and OHOS target gating for Starlark execpolicy parsing plus PTY support so published OHOS builds do not pull `nix` 0.28 through `rustyline` or `portable-pty`. `cargo check --workspace --all-features --locked`, focused PTY/clipboard tests, and `cargo tree --locked -p codewhale-tui --target aarch64-unknown-linux-ohos -i nix@0.28.0` passed; full OHOS target check is blocked on this host because `OHOS_NATIVE_SDK`/target CC/sysroot are not configured and `ring` cannot find `assert.h`. |
@@ -116,7 +117,7 @@ harvest/stewardship commits:
 | #2730 canonical codewhale settings path | Mergeable | Already harvested as `9e15805f6`; follow-up reviewer assertion added locally. Comment/close original after integration branch is public, crediting @xyuai and issue #2664. |
 | #2732 pausable command lifecycle | Draft/mergeable | Defer; review flagged behavior changes. |
 | #2733 PlanArtifact UI | Mergeable | Locally harvested with richer schema, rendering, relay/fork-state propagation, and replay tests. Comment/close original after integration branch is public, crediting @idling11 and issue #2691; keep #2691 open only if additional PlanReview product work remains. |
-| #2736 sub-agent model inheritance | Mergeable | Harvest next with tighter tests: `ToolAgent` should inherit the parent runtime model instead of hard-coding `deepseek-v4-flash`, while preserving the current reasoning-effort behavior unless provider-specific request shaping proves unsafe. Credit @h3c-hexin. |
+| #2736 sub-agent model inheritance | Mergeable | Locally harvested with parent-model inheritance, explicit override coverage, and strict OpenAI-like `reasoning_effort = off` shaping coverage. Comment/close original after the integration branch is public, crediting @h3c-hexin. |
 | #2737 configured `skills_dir` discovery | Mergeable | Locally harvested with extra configured-before-global precedence tests. Comment/close original after the integration branch is public, crediting @h3c-hexin. |
 | #2738 dense tool-call transcript collapse | Mergeable | Do not merge as-is. The compaction idea matches the `/relay` direction, but the PR currently bypasses normal rendering, lacks expansion wiring, defaults to expanded mode, and has cache-key/index maintenance risks. Harvest only after completing those behaviors. |
 
@@ -139,6 +140,6 @@ Issue count should drop through evidence-backed consolidation, not bulk closing.
 ## Immediate Next Actions
 
 1. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627,
-   #2634, #2636, #2687, #2737, and already-harvested performance PRs.
+   #2634, #2636, #2687, #2736, #2737, and already-harvested performance PRs.
 2. Start file decomposition Phase 1 only after the PR harvest table has no
    unknown high-priority provider/prompt/cache branches.