diff --git a/README.md b/README.md
index 39b3b582..dec3b51d 100644
--- a/README.md
+++ b/README.md
@@ -329,7 +329,9 @@ UI locale is separate from model language — set `locale` in `settings.toml`, u
 | `deepseek-v4-pro` | 1M | $0.003625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
 | `deepseek-v4-flash` | 1M | $0.0028 / 1M | $0.14 / 1M | $0.28 / 1M |
 
-Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash`. NVIDIA NIM variants use your NVIDIA account terms.
+DeepSeek Platform defaults to `https://api.deepseek.com/beta` in v0.8.16 so beta-gated API features can be tested without extra setup. Set `base_url = "https://api.deepseek.com"` to opt out.
+
+Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash` and retire after July 24, 2026. NVIDIA NIM variants use your NVIDIA account terms.
 
 *DeepSeek Pro rates currently reflect a limited-time 75% discount, which remains valid until 15:59 UTC on 31 May 2026. After that time, the TUI cost estimator will revert to the base Pro rates.*
 
diff --git a/config.example.toml b/config.example.toml
index cb88f524..5fe9492f 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -17,9 +17,9 @@
 # defaults when `[providers.deepseek]` is absent (backward compatibility).
 provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang | vllm | ollama
 api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
-base_url = "https://api.deepseek.com"
+base_url = "https://api.deepseek.com/beta"
 # base_url = "https://api.deepseeki.com"         # China users
-# base_url = "https://api.deepseek.com/beta"     # DeepSeek beta features such as strict tool mode
+# base_url = "https://api.deepseek.com"          # opt out of DeepSeek beta features
 # Optional custom model request headers for OpenAI-compatible gateways.
 # Authorization and Content-Type are managed by the client and cannot be overridden here.
 # http_headers = { "X-Model-Provider-Id" = "your-model-provider" }
@@ -163,7 +163,7 @@ max_subagents = 10 # optional (1-20)
 # DeepSeek Platform (https://platform.deepseek.com)
 [providers.deepseek]
 # api_key = "YOUR_DEEPSEEK_API_KEY"
-# base_url = "https://api.deepseek.com"
+# base_url = "https://api.deepseek.com/beta"
 # model = "deepseek-v4-pro"
 # http_headers = { "X-Model-Provider-Id" = "your-model-provider" } # optional custom request headers
 
@@ -296,8 +296,8 @@ verbatim_window_turns = 16
 l1_threshold = 192000
 l2_threshold = 384000
 l3_threshold = 576000
-# Hard cycle also reserves the normal 262144-token output budget plus 1024
-# safety tokens against the model window.
+# Hard cycle reserves the normal 262144-token internal turn budget plus 1024
+# safety tokens, separate from V4's official 384000 max-output metadata.
 cycle_threshold = 768000
 seam_model = "deepseek-v4-flash"
 
@@ -347,7 +347,7 @@ fallback_default_prior = 3.8
 # Select a profile with `deepseek --profile <name>` or `DEEPSEEK_PROFILE=<name>`.
 [profiles.work]
 api_key = "WORK_DEEPSEEK_API_KEY"
-base_url = "https://api.deepseek.com"
+base_url = "https://api.deepseek.com/beta"
 
 [profiles.dev]
 api_key = "DEV_DEEPSEEK_API_KEY"
diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs
index 349c9400..b0bb82e9 100644
--- a/crates/config/src/lib.rs
+++ b/crates/config/src/lib.rs
@@ -18,7 +18,7 @@ const DEFAULT_DEEPSEEK_MODEL: &str = "deepseek-v4-pro";
 const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro";
 const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
 const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1";
-const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com";
+const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
 const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
 const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
 const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro";
@@ -1492,6 +1492,19 @@ mod tests {
         assert_eq!(resolved.model, "deepseek-v4-pro");
     }
 
+    #[test]
+    fn deepseek_runtime_defaults_to_beta_endpoint() {
+        let _lock = env_lock();
+        let _env = EnvGuard::without_deepseek_runtime_overrides();
+        let config = ConfigToml::default();
+
+        let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default());
+
+        assert_eq!(resolved.provider, ProviderKind::Deepseek);
+        assert_eq!(resolved.base_url, DEFAULT_DEEPSEEK_BASE_URL);
+        assert_eq!(resolved.model, DEFAULT_DEEPSEEK_MODEL);
+    }
+
     #[test]
     fn provider_specific_deepseek_fields_override_tui_compat_fields() {
         let _lock = env_lock();
diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index fbe29c5c..e71cc8ae 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -367,11 +367,24 @@ pub(super) fn versioned_base_url(base_url: &str) -> String {
     }
 }
 
+fn unversioned_base_url(base_url: &str) -> String {
+    let trimmed = base_url.trim_end_matches('/');
+    trimmed
+        .strip_suffix("/v1")
+        .or_else(|| trimmed.strip_suffix("/beta"))
+        .unwrap_or(trimmed)
+        .to_string()
+}
+
 pub(super) fn api_url(base_url: &str, path: &str) -> String {
+    let path = path.trim_start_matches('/');
+    if path.starts_with("beta/") {
+        return format!("{}/{}", unversioned_base_url(base_url), path);
+    }
     format!(
         "{}/{}",
         versioned_base_url(base_url).trim_end_matches('/'),
-        path.trim_start_matches('/')
+        path
     )
 }
 
@@ -1021,6 +1034,22 @@ mod tests {
         );
     }
 
+    #[test]
+    fn api_url_routes_beta_paths_from_any_deepseek_base() {
+        assert_eq!(
+            api_url("https://api.deepseek.com", "beta/completions"),
+            "https://api.deepseek.com/beta/completions"
+        );
+        assert_eq!(
+            api_url("https://api.deepseek.com/v1", "beta/completions"),
+            "https://api.deepseek.com/beta/completions"
+        );
+        assert_eq!(
+            api_url("https://api.deepseek.com/beta", "beta/completions"),
+            "https://api.deepseek.com/beta/completions"
+        );
+    }
+
     #[test]
     fn default_headers_include_custom_headers_when_configured() {
         let mut extra = HashMap::new();
diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs
index 4de50503..28542788 100644
--- a/crates/tui/src/config.rs
+++ b/crates/tui/src/config.rs
@@ -20,6 +20,7 @@ use crate::hooks::HooksConfig;
 pub const DEFAULT_MAX_SUBAGENTS: usize = 10;
 pub const MAX_SUBAGENTS: usize = 20;
 pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro";
+pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
 pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro";
 pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
 pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
@@ -148,7 +149,10 @@ pub struct ProviderCapability {
     pub resolved_model: String,
     /// Context window in tokens (the maximum input the model can accept).
     pub context_window: u32,
-    /// Recommended maximum output tokens (`max_tokens`) for this combo.
+    /// Official maximum output tokens for this combo.
+    ///
+    /// This is model metadata for diagnostics and CI policy. Normal turns use
+    /// a separate, more conservative request cap in the engine.
     pub max_output: u32,
     /// Whether the provider+model supports thinking/reasoning mode.
     pub thinking_supported: bool,
@@ -199,9 +203,10 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
             .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
     };
 
-    // Max output tokens: DeepSeek V4 models allow 262K; others get 4096.
+    // Max output tokens: official DeepSeek V4 API metadata lists 384K;
+    // runtime request caps remain separate and more conservative.
     let max_output = if is_v4_pro || is_v4_flash {
-        262_144
+        384_000
     } else {
         4096
     };
@@ -1231,7 +1236,7 @@ impl Config {
         };
         let base = provider_base.or(root_base).unwrap_or_else(|| {
             match provider {
-                ApiProvider::Deepseek => "https://api.deepseek.com",
+                ApiProvider::Deepseek => DEFAULT_DEEPSEEK_BASE_URL,
                 ApiProvider::DeepseekCN => DEFAULT_DEEPSEEKCN_BASE_URL,
                 ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL,
                 ApiProvider::Openrouter => DEFAULT_OPENROUTER_BASE_URL,
@@ -1704,8 +1709,9 @@ pub fn ensure_config_file_exists(path: Option<PathBuf>) -> Result<Option<PathBuf
 # Get your API key from https://platform.deepseek.com
 # Save it with: deepseek auth set --provider deepseek
 
-# Base URL (default: https://api.deepseek.com)
-# base_url = "https://api.deepseek.com"
+# Base URL (default: https://api.deepseek.com/beta)
+# Set https://api.deepseek.com to opt out of beta features.
+# base_url = "https://api.deepseek.com/beta"
 
 # Default model
 default_text_model = "{default_model}"
@@ -2651,8 +2657,9 @@ fn save_api_key_to_config_file(api_key: &str) -> Result<PathBuf> {
 
 api_key = "{key_to_write}"
 
-# Base URL (default: https://api.deepseek.com)
-# base_url = "https://api.deepseek.com"
+# Base URL (default: https://api.deepseek.com/beta)
+# Set https://api.deepseek.com to opt out of beta features.
+# base_url = "https://api.deepseek.com/beta"
 
 # Default model
 default_text_model = "{default_model}"
@@ -3916,6 +3923,25 @@ api_key = "old-openrouter-key"
         Ok(())
     }
 
+    #[test]
+    fn deepseek_provider_defaults_to_beta_endpoint() {
+        let config = Config::default();
+
+        assert_eq!(config.api_provider(), ApiProvider::Deepseek);
+        assert_eq!(config.deepseek_base_url(), DEFAULT_DEEPSEEK_BASE_URL);
+    }
+
+    #[test]
+    fn explicit_deepseek_base_url_overrides_beta_default() {
+        let config = Config {
+            base_url: Some("https://api.deepseek.com".to_string()),
+            ..Default::default()
+        };
+
+        assert_eq!(config.api_provider(), ApiProvider::Deepseek);
+        assert_eq!(config.deepseek_base_url(), "https://api.deepseek.com");
+    }
+
     #[test]
     fn deepseek_model_env_overrides_default_text_model() -> Result<()> {
         let _lock = lock_test_env();
@@ -4752,7 +4778,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(cap.cache_telemetry_supported);
         assert_eq!(
@@ -4768,7 +4794,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(cap.cache_telemetry_supported);
     }
@@ -4780,7 +4806,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(cap.cache_telemetry_supported);
         assert_eq!(
@@ -4796,7 +4822,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(cap.cache_telemetry_supported);
     }
@@ -4808,7 +4834,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         // OpenRouter does not return DeepSeek prompt-cache telemetry.
         assert!(!cap.cache_telemetry_supported);
@@ -4825,7 +4851,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(!cap.cache_telemetry_supported);
     }
@@ -4837,7 +4863,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(!cap.cache_telemetry_supported);
     }
@@ -4849,7 +4875,7 @@ model = "deepseek-v4-pro"
             cap.context_window,
             crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
         );
-        assert_eq!(cap.max_output, 262_144);
+        assert_eq!(cap.max_output, 384_000);
         assert!(cap.thinking_supported);
         assert!(!cap.cache_telemetry_supported);
     }
diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index 8f55bfdd..10720dbb 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -1379,13 +1379,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
             );
         }
     }
-    println!(
-        "  · base_url: {}",
-        config
-            .base_url
-            .as_deref()
-            .unwrap_or("https://api.deepseek.com")
-    );
+    println!("  · base_url: {}", config.deepseek_base_url());
     let model = config
         .default_text_model
         .clone()
@@ -4278,7 +4272,7 @@ mod doctor_endpoint_tests {
         let target = doctor_api_target(&config);
 
         assert_eq!(target.provider, "deepseek");
-        assert_eq!(target.base_url, "https://api.deepseek.com");
+        assert_eq!(target.base_url, crate::config::DEFAULT_DEEPSEEK_BASE_URL);
         assert_eq!(target.model, crate::config::DEFAULT_TEXT_MODEL);
     }
 
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 6d938108..c31a979a 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -109,11 +109,11 @@ Current boundary note (v0.8.6):
 #### DeepSeek API Endpoints
 
 DeepSeek exposes OpenAI-compatible endpoints. The CLI uses:
-- `https://api.deepseek.com/v1/chat/completions` - normal and streaming model turns
-- `https://api.deepseek.com/v1/models` - live model discovery and health checks
+- `https://api.deepseek.com/beta/chat/completions` - default v0.8.16 DeepSeek model turns
+- `https://api.deepseek.com/beta/models` - default v0.8.16 live model discovery and health checks
 
 `https://api.deepseek.com/v1` is accepted for OpenAI SDK compatibility, and
-`https://api.deepseek.com/beta` can be configured for beta-only features such as
+can still be configured explicitly to opt out of beta-only features such as
 strict tool mode, chat prefix completion, and FIM completion. The public
 DeepSeek docs do not document a Responses API path for this workflow; the engine
 drives turns through Chat Completions.
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index c78fbd3e..e64529ca 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -94,7 +94,7 @@ default_text_model = "deepseek-v4-pro"
 
 [profiles.work]
 api_key = "WORK_KEY"
-base_url = "https://api.deepseek.com"
+base_url = "https://api.deepseek.com/beta"
 
 [profiles.nvidia-nim]
 provider = "nvidia-nim"
@@ -296,7 +296,7 @@ separate:
 | Quantity | Meaning | Allowed to drive |
 |---|---|---|
 | Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. |
-| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. |
+| Reserved response headroom | The internal turn budget plus safety headroom. v0.8.16 keeps normal turns at `262144` reserved output tokens and adds `1024` safety tokens for context-window checks, even though V4 capability metadata reports the official `384000` max output. | Hard-cycle and emergency overflow budget checks only. |
 | Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. |
 | Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. |
 | Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. |
@@ -327,8 +327,8 @@ If you are upgrading from older releases:
 
 - `provider` (string, optional): `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, or `ollama`. `deepseek-cn` uses DeepSeek's mainland China endpoint (`https://api.deepseeki.com`); `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
 - `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it.
-- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion.
-- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.
+- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API in v0.8.16, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
+- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.
 - `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`.
 - `allow_shell` (bool, optional): defaults to `true` (sandboxed).
 - `approval_policy` (string, optional): `on-request`, `untrusted`, or `never`. Runtime `approval_mode` editing in `/config` also accepts `on-request` and `untrusted` aliases.
@@ -550,14 +550,12 @@ The `capability` key contains per-provider capability info derived from
 static knowledge (release docs, API guides) rather than live API probes.
 Top-level sub-keys: `resolved_provider`, `resolved_model`, `context_window`,
 `max_output`, `thinking_supported`, `cache_telemetry_supported`,
-`request_payload_mode`, and `deprecation`. When the resolved model is a known
-legacy alias (e.g. `deepseek-chat`, `deepseek-reasoner`), the `deprecation`
-sub-object carries `alias`, `replacement`, and `notice` fields.
+and `request_payload_mode`.
 
-Use `capability.context_window` and `capability.max_output` for context-window
-budgeting in CI scripts. Use `capability.thinking_supported` to decide whether
-to configure reasoning effort. Use `capability.deprecation` to warn users about
-legacy model aliases.
+Use `capability.context_window` and `capability.max_output` for model-limit
+checks in CI scripts; do not treat `capability.max_output` as the per-turn
+request budget. Use `capability.thinking_supported` to decide whether to
+configure reasoning effort.
 
 ## Setup status, clean, and extension dirs
 
diff --git a/docs/OPERATIONS_RUNBOOK.md b/docs/OPERATIONS_RUNBOOK.md
index d0966343..f1085c44 100644
--- a/docs/OPERATIONS_RUNBOOK.md
+++ b/docs/OPERATIONS_RUNBOOK.md
@@ -24,7 +24,7 @@ Symptoms:
 Checks:
 1. Inspect retry/health logs (`deepseek_cli::client`)
 2. Verify endpoint connectivity:
-   - `curl -sS https://api.deepseek.com/v1/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"`
+   - `curl -sS https://api.deepseek.com/beta/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"`
 3. Confirm no local sandbox/permission deadlock in tool output
 
 Actions:
diff --git a/docs/RUNTIME_API.md b/docs/RUNTIME_API.md
index 44e5eb0b..c4d0cdd2 100644
--- a/docs/RUNTIME_API.md
+++ b/docs/RUNTIME_API.md
@@ -92,7 +92,7 @@ deepseek doctor --json
   "api_key": {
     "source": "env"
   },
-  "base_url": "https://api.deepseek.com",
+  "base_url": "https://api.deepseek.com/beta",
   "default_text_model": "deepseek-v4-pro",
   "memory": {
     "enabled": false,