From ec527b6a2bb94e24de109010298f163e402ed514 Mon Sep 17 00:00:00 2001 From: hexin <372726039@qq.com> Date: Wed, 13 May 2026 04:37:04 +0800 Subject: [PATCH] fix(cli): honor config.toml reasoning_effort on non-auto exec routes (#1511) `resolve_cli_auto_route` was hard-coding `reasoning_effort: None` when `--model` is not `auto`, which silently dropped the value the user had set in `~/.deepseek/config.toml` on every non-auto-route exec/one-shot call. For vllm + Qwen3 users with `reasoning_effort = "off"`, thinking was therefore never disabled. The model emitted a long reasoning trace for every prompt and SSE idle timeouts (`did not receive response headers after 45s`) fired on any non-trivial prompt. After this fix, the same prompts return in ~1.5s. Route the configured value through `ReasoningEffort::from_setting`, the same parser the TUI uses elsewhere for this field. Auto-route behaviour (`--model auto`) is unchanged. Verified by capturing the outgoing request body with `nc` before and after; chat_template_kwargs.enable_thinking=false now appears in the body on vllm exec runs. Co-authored-by: hexin --- crates/tui/src/main.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 02ba2e19..7542ecf1 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -4269,9 +4269,17 @@ async fn resolve_cli_auto_route(config: &Config, model: &str, prompt: &str) -> C auto_model: true, } } else { + // When --model is not `auto`, fall back to the reasoning_effort + // declared in the user's config.toml. The previous hard-coded `None` + // silently dropped the user's setting on every non-auto-route exec + // call, which (for example) prevented vllm + Qwen3 users from + // disabling thinking via `reasoning_effort = "off"` and caused + // 30+ second SSE idle timeouts on trivial prompts. CliAutoRoute { model: model.to_string(), - reasoning_effort: None, + reasoning_effort: config + .reasoning_effort() + .map(crate::tui::app::ReasoningEffort::from_setting), auto_model: false, } }