diff --git a/CHANGELOG.md b/CHANGELOG.md
index d6e6e999..e5f4e348 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,24 @@ real world uses."
 
 ### Fixed
 
+- **vLLM provider: `reasoning_effort = "off"` now actually
+  disables thinking on Qwen3 / DeepSeek-R1 servers, cutting
+  TTFT from ~13s to ~270ms** (harvested from PR #1480 by
+  **@h3c-hexin**). The vLLM branch of `apply_reasoning_effort`
+  was injecting `thinking: {type: "disabled"}` at the top of
+  the request body — but vLLM speaks OpenAI's chat-completions
+  protocol, not Anthropic-native fields, and silently ignored
+  the directive. The model then emitted a full hidden reasoning
+  trace into the non-standard `reasoning` field (which this
+  client doesn't surface), so users saw a multi-second freeze
+  before any content token arrived. The vLLM branch now emits
+  the OpenAI extension `chat_template_kwargs.enable_thinking`
+  (which vLLM forwards into the model's chat template — the
+  canonical way to toggle Qwen3's `<think>...</think>` mode).
+  Measurement against vLLM + Qwen3.6-35B-A3B-FP8: TTFT
+  13039ms → 274ms, total LLM call 13s → 5.7s. The `high` /
+  `max` effort levels likewise switch to the OpenAI extension.
+  No change for non-vLLM providers.
 - **`/sessions` picker no longer shows `<turn_meta>` as the
   session title** (harvested from PR #1498 by **@wdw8276**).
   `session_manager::create_saved_session_with_id_and_mode`
diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index b7ec2145..3b5875ff 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -888,10 +888,23 @@ pub(super) fn apply_reasoning_effort(
             | ApiProvider::Openrouter
             | ApiProvider::Novita
             | ApiProvider::Fireworks
-            | ApiProvider::Sglang
-            | ApiProvider::Vllm => {
+            | ApiProvider::Sglang => {
                 body["thinking"] = json!({ "type": "disabled" });
             }
+            // vLLM is an OpenAI-protocol server, not an Anthropic-protocol one.
+            // For Qwen3 / DeepSeek-R1 / other reasoning models hosted via vLLM,
+            // the canonical OpenAI extension to disable thinking is
+            // `chat_template_kwargs.enable_thinking`. The old
+            // `thinking: {type: disabled}` field is Anthropic-native and
+            // silently ignored by vLLM — the model still emits a full
+            // reasoning trace into the `reasoning` field (which this client
+            // doesn't surface), causing 10+ seconds of perceived "freeze"
+            // before the first content token (PR #1480 by @h3c-hexin).
+            ApiProvider::Vllm => {
+                body["chat_template_kwargs"] = json!({
+                    "enable_thinking": false,
+                });
+            }
             ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Ollama => {}
             ApiProvider::NvidiaNim => {
                 body["chat_template_kwargs"] = json!({
@@ -905,11 +918,16 @@ pub(super) fn apply_reasoning_effort(
             | ApiProvider::Openrouter
             | ApiProvider::Novita
             | ApiProvider::Fireworks
-            | ApiProvider::Sglang
-            | ApiProvider::Vllm => {
+            | ApiProvider::Sglang => {
                 body["reasoning_effort"] = json!("high");
                 body["thinking"] = json!({ "type": "enabled" });
             }
+            ApiProvider::Vllm => {
+                body["chat_template_kwargs"] = json!({
+                    "enable_thinking": true,
+                });
+                body["reasoning_effort"] = json!("high");
+            }
             ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Ollama => {}
             ApiProvider::NvidiaNim => {
                 body["chat_template_kwargs"] = json!({
@@ -924,11 +942,16 @@ pub(super) fn apply_reasoning_effort(
             | ApiProvider::Openrouter
             | ApiProvider::Novita
             | ApiProvider::Fireworks
-            | ApiProvider::Sglang
-            | ApiProvider::Vllm => {
+            | ApiProvider::Sglang => {
                 body["reasoning_effort"] = json!("max");
                 body["thinking"] = json!({ "type": "enabled" });
             }
+            ApiProvider::Vllm => {
+                body["chat_template_kwargs"] = json!({
+                    "enable_thinking": true,
+                });
+                body["reasoning_effort"] = json!("max");
+            }
             ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Ollama => {}
             ApiProvider::NvidiaNim => {
                 body["chat_template_kwargs"] = json!({