From ec527b6a2bb94e24de109010298f163e402ed514 Mon Sep 17 00:00:00 2001
From: hexin <372726039@qq.com>
Date: Wed, 13 May 2026 04:37:04 +0800
Subject: [PATCH] fix(cli): honor config.toml reasoning_effort on non-auto exec
 routes (#1511)

`resolve_cli_auto_route` was hard-coding `reasoning_effort: None` when
`--model` is not `auto`, which silently dropped the value the user had
set in `~/.deepseek/config.toml` on every non-auto-route exec/one-shot
call.

For vllm + Qwen3 users with `reasoning_effort = "off"`, thinking was
therefore never disabled. The model emitted a long reasoning trace for
every prompt and SSE idle timeouts (`did not receive response headers
after 45s`) fired on any non-trivial prompt. After this fix, the same
prompts return in ~1.5s.

Route the configured value through `ReasoningEffort::from_setting`, the
same parser the TUI uses elsewhere for this field. Auto-route behaviour
(`--model auto`) is unchanged.

Verified by capturing the outgoing request body with `nc` before and
after; chat_template_kwargs.enable_thinking=false now appears in the
body on vllm exec runs.

Co-authored-by: hexin <he.xin@h3c.com>
---
 crates/tui/src/main.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index 02ba2e19..7542ecf1 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -4269,9 +4269,17 @@ async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> C
             auto_model: true,
         }
     } else {
+        // When --model is not `auto`, fall back to the reasoning_effort
+        // declared in the user's config.toml. The previous hard-coded `None`
+        // silently dropped the user's setting on every non-auto-route exec
+        // call, which (for example) prevented vllm + Qwen3 users from
+        // disabling thinking via `reasoning_effort = "off"` and caused
+        // 30+ second SSE idle timeouts on trivial prompts.
         CliAutoRoute {
             model: model.to_string(),
-            reasoning_effort: None,
+            reasoning_effort: config
+                .reasoning_effort()
+                .map(crate::tui::app::ReasoningEffort::from_setting),
             auto_model: false,
         }
     }