diff --git a/crates/tui/src/core/capacity.rs b/crates/tui/src/core/capacity.rs
index 437075fb..55547c9b 100644
--- a/crates/tui/src/core/capacity.rs
+++ b/crates/tui/src/core/capacity.rs
@@ -28,17 +28,18 @@ impl Default for CapacityControllerConfig {
         model_priors.insert("deepseek_v4_flash".to_string(), 4.2);
 
         Self {
-            enabled: true,
-            // Tuning history (#63 follow-up): the previous defaults
-            // (low_risk_max=0.34, refresh_cooldown_turns=2, min_turns=2)
-            // fired `TargetedContextRefresh` every couple of turns whenever
-            // p_fail crept above 0.34. Each refresh runs `compact_messages_safe`
-            // which rewrites the conversation history — visually that looked
-            // like the agent "restarting" mid-session. Bumping the floor to
-            // 0.50 (still well below the medium ceiling of 0.62) and
-            // lengthening the cooldown to 6 turns reduces interventions
-            // ~3-4x without disabling the controller; it keeps firing on
-            // genuine risk while ignoring routine noise.
+            // OFF BY DEFAULT. The capacity controller's main intervention,
+            // `TargetedContextRefresh`, runs `compact_messages_safe` which
+            // rewrites the live conversation — visually identical to the
+            // agent "restarting" mid-turn. Power users running V4 on a 1M
+            // context window simply don't need this guardrail; the failure
+            // mode it protects against (context overflow) is rare in
+            // practice and self-correcting (the model surfaces a clear
+            // error). Users who do want the controller back can enable it
+            // via `capacity.enabled = true` in `~/.deepseek/config.toml`.
+            enabled: false,
+            // Thresholds retained for the opt-in path; tuning notes live
+            // in git history (#63 follow-up).
             low_risk_max: 0.50,
             medium_risk_max: 0.62,
             severe_min_slack: -0.25,
@@ -693,7 +694,13 @@ mod tests {
 
     #[test]
     fn cooldown_blocks_repeated_action() {
-        let mut controller = CapacityController::new(CapacityControllerConfig::default());
+        // Capacity controller is opt-in (off by default since v0.6.2). This
+        // test exercises the cooldown logic, so explicitly enable it.
+        let config = CapacityControllerConfig {
+            enabled: true,
+            ..CapacityControllerConfig::default()
+        };
+        let mut controller = CapacityController::new(config);
         let turn_index = 5;
         controller.mark_turn_start(turn_index);
         controller.mark_intervention_applied(turn_index, GuardrailAction::TargetedContextRefresh);
diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 5a2c053c..ce9765eb 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -263,12 +263,13 @@ const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
 const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
 /// Maximum wall-clock duration for a single streaming response.
 const STREAM_MAX_DURATION_SECS: u64 = 300; // 5 minutes
-/// Max output tokens requested for normal agent turns. Bumped from 4096 to
-/// 32768: V4 thinking models can consume 8-15K reasoning tokens on hard
-/// prompts; the old 4K ceiling exhausted the budget, the API closed the
-/// SSE stream with `finish_reason: "length"`, and the visible reply ended
-/// up empty (surfaced as the assistant "stopping mid-response").
-const TURN_MAX_OUTPUT_TOKENS: u32 = 32768;
+/// Max output tokens requested for normal agent turns. Generous on purpose:
+/// V4 thinking models can produce tens of thousands of reasoning tokens on
+/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
+/// context window. 256K leaves the model effectively unconstrained on
+/// output without us imposing artificial per-turn caps that surfaced as the
+/// assistant "stopping mid-response" when reasoning consumed the budget.
+const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
 /// Keep this many most recent messages when emergency trimming is required.
 const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
 /// Allow a few emergency recovery attempts before failing the turn.
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 80c446af..3f030d97 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -260,9 +260,12 @@ fn detects_context_length_errors_from_provider_payloads() {
 
 #[test]
 fn context_budget_reserves_output_and_headroom() {
-    let budget = context_input_budget("deepseek-v3.2-128k", TURN_MAX_OUTPUT_TOKENS)
-        .expect("deepseek models should have known context window");
-    let expected = 128_000usize - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
+    // V4 has a 1M context window — the only family that comfortably hosts
+    // a 256K output reservation without saturating the input budget to 0.
+    let budget = context_input_budget("deepseek-v4-pro", TURN_MAX_OUTPUT_TOKENS)
+        .expect("deepseek-v4-pro should have a known context window");
+    let v4_window: usize = 1_000_000;
+    let expected = v4_window - (TURN_MAX_OUTPUT_TOKENS as usize) - 1_024usize;
     assert_eq!(budget, expected);
 }