diff --git a/README.md b/README.md index 39b3b582..dec3b51d 100644 --- a/README.md +++ b/README.md @@ -329,7 +329,9 @@ UI locale is separate from model language — set `locale` in `settings.toml`, u | `deepseek-v4-pro` | 1M | $0.003625 / 1M* | $0.435 / 1M* | $0.87 / 1M* | | `deepseek-v4-flash` | 1M | $0.0028 / 1M | $0.14 / 1M | $0.28 / 1M | -Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash`. NVIDIA NIM variants use your NVIDIA account terms. +DeepSeek Platform defaults to `https://api.deepseek.com/beta` in v0.8.16 so beta-gated API features can be tested without extra setup. Set `base_url = "https://api.deepseek.com"` to opt out. + +Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash` and retire after July 24, 2026. NVIDIA NIM variants use your NVIDIA account terms. *DeepSeek Pro rates currently reflect a limited-time 75% discount, which remains valid until 15:59 UTC on 31 May 2026. After that time, the TUI cost estimator will revert to the base Pro rates.* diff --git a/config.example.toml b/config.example.toml index cb88f524..5fe9492f 100644 --- a/config.example.toml +++ b/config.example.toml @@ -17,9 +17,9 @@ # defaults when `[providers.deepseek]` is absent (backward compatibility). provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang | vllm | ollama api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty -base_url = "https://api.deepseek.com" +base_url = "https://api.deepseek.com/beta" # base_url = "https://api.deepseeki.com" # China users -# base_url = "https://api.deepseek.com/beta" # DeepSeek beta features such as strict tool mode +# base_url = "https://api.deepseek.com" # opt out of DeepSeek beta features # Optional custom model request headers for OpenAI-compatible gateways. # Authorization and Content-Type are managed by the client and cannot be overridden here. # http_headers = { "X-Model-Provider-Id" = "your-model-provider" } @@ -163,7 +163,7 @@ max_subagents = 10 # optional (1-20) # DeepSeek Platform (https://platform.deepseek.com) [providers.deepseek] # api_key = "YOUR_DEEPSEEK_API_KEY" -# base_url = "https://api.deepseek.com" +# base_url = "https://api.deepseek.com/beta" # model = "deepseek-v4-pro" # http_headers = { "X-Model-Provider-Id" = "your-model-provider" } # optional custom request headers @@ -296,8 +296,8 @@ verbatim_window_turns = 16 l1_threshold = 192000 l2_threshold = 384000 l3_threshold = 576000 -# Hard cycle also reserves the normal 262144-token output budget plus 1024 -# safety tokens against the model window. +# Hard cycle reserves the normal 262144-token internal turn budget plus 1024 +# safety tokens, separate from V4's official 384000 max-output metadata. cycle_threshold = 768000 seam_model = "deepseek-v4-flash" @@ -347,7 +347,7 @@ fallback_default_prior = 3.8 # Select a profile with `deepseek --profile ` or `DEEPSEEK_PROFILE=`. [profiles.work] api_key = "WORK_DEEPSEEK_API_KEY" -base_url = "https://api.deepseek.com" +base_url = "https://api.deepseek.com/beta" [profiles.dev] api_key = "DEV_DEEPSEEK_API_KEY" diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 349c9400..b0bb82e9 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -18,7 +18,7 @@ const DEFAULT_DEEPSEEK_MODEL: &str = "deepseek-v4-pro"; const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1"; -const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com"; +const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta"; const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro"; @@ -1492,6 +1492,19 @@ mod tests { assert_eq!(resolved.model, "deepseek-v4-pro"); } + #[test] + fn deepseek_runtime_defaults_to_beta_endpoint() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let config = ConfigToml::default(); + + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::Deepseek); + assert_eq!(resolved.base_url, DEFAULT_DEEPSEEK_BASE_URL); + assert_eq!(resolved.model, DEFAULT_DEEPSEEK_MODEL); + } + #[test] fn provider_specific_deepseek_fields_override_tui_compat_fields() { let _lock = env_lock(); diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index fbe29c5c..e71cc8ae 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -367,11 +367,24 @@ pub(super) fn versioned_base_url(base_url: &str) -> String { } } +fn unversioned_base_url(base_url: &str) -> String { + let trimmed = base_url.trim_end_matches('/'); + trimmed + .strip_suffix("/v1") + .or_else(|| trimmed.strip_suffix("/beta")) + .unwrap_or(trimmed) + .to_string() +} + pub(super) fn api_url(base_url: &str, path: &str) -> String { + let path = path.trim_start_matches('/'); + if path.starts_with("beta/") { + return format!("{}/{}", unversioned_base_url(base_url), path); + } format!( "{}/{}", versioned_base_url(base_url).trim_end_matches('/'), - path.trim_start_matches('/') + path ) } @@ -1021,6 +1034,22 @@ mod tests { ); } + #[test] + fn api_url_routes_beta_paths_from_any_deepseek_base() { + assert_eq!( + api_url("https://api.deepseek.com", "beta/completions"), + "https://api.deepseek.com/beta/completions" + ); + assert_eq!( + api_url("https://api.deepseek.com/v1", "beta/completions"), + "https://api.deepseek.com/beta/completions" + ); + assert_eq!( + api_url("https://api.deepseek.com/beta", "beta/completions"), + "https://api.deepseek.com/beta/completions" + ); + } + #[test] fn default_headers_include_custom_headers_when_configured() { let mut extra = HashMap::new(); diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 4de50503..28542788 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -20,6 +20,7 @@ use crate::hooks::HooksConfig; pub const DEFAULT_MAX_SUBAGENTS: usize = 10; pub const MAX_SUBAGENTS: usize = 20; pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; +pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta"; pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; @@ -148,7 +149,10 @@ pub struct ProviderCapability { pub resolved_model: String, /// Context window in tokens (the maximum input the model can accept). pub context_window: u32, - /// Recommended maximum output tokens (`max_tokens`) for this combo. + /// Official maximum output tokens for this combo. + /// + /// This is model metadata for diagnostics and CI policy. Normal turns use + /// a separate, more conservative request cap in the engine. pub max_output: u32, /// Whether the provider+model supports thinking/reasoning mode. pub thinking_supported: bool, @@ -199,9 +203,10 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi .unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS) }; - // Max output tokens: DeepSeek V4 models allow 262K; others get 4096. + // Max output tokens: official DeepSeek V4 API metadata lists 384K; + // runtime request caps remain separate and more conservative. let max_output = if is_v4_pro || is_v4_flash { - 262_144 + 384_000 } else { 4096 }; @@ -1231,7 +1236,7 @@ impl Config { }; let base = provider_base.or(root_base).unwrap_or_else(|| { match provider { - ApiProvider::Deepseek => "https://api.deepseek.com", + ApiProvider::Deepseek => DEFAULT_DEEPSEEK_BASE_URL, ApiProvider::DeepseekCN => DEFAULT_DEEPSEEKCN_BASE_URL, ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL, ApiProvider::Openrouter => DEFAULT_OPENROUTER_BASE_URL, @@ -1704,8 +1709,9 @@ pub fn ensure_config_file_exists(path: Option) -> Result Result { api_key = "{key_to_write}" -# Base URL (default: https://api.deepseek.com) -# base_url = "https://api.deepseek.com" +# Base URL (default: https://api.deepseek.com/beta) +# Set https://api.deepseek.com to opt out of beta features. +# base_url = "https://api.deepseek.com/beta" # Default model default_text_model = "{default_model}" @@ -3916,6 +3923,25 @@ api_key = "old-openrouter-key" Ok(()) } + #[test] + fn deepseek_provider_defaults_to_beta_endpoint() { + let config = Config::default(); + + assert_eq!(config.api_provider(), ApiProvider::Deepseek); + assert_eq!(config.deepseek_base_url(), DEFAULT_DEEPSEEK_BASE_URL); + } + + #[test] + fn explicit_deepseek_base_url_overrides_beta_default() { + let config = Config { + base_url: Some("https://api.deepseek.com".to_string()), + ..Default::default() + }; + + assert_eq!(config.api_provider(), ApiProvider::Deepseek); + assert_eq!(config.deepseek_base_url(), "https://api.deepseek.com"); + } + #[test] fn deepseek_model_env_overrides_default_text_model() -> Result<()> { let _lock = lock_test_env(); @@ -4752,7 +4778,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(cap.cache_telemetry_supported); assert_eq!( @@ -4768,7 +4794,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(cap.cache_telemetry_supported); } @@ -4780,7 +4806,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(cap.cache_telemetry_supported); assert_eq!( @@ -4796,7 +4822,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(cap.cache_telemetry_supported); } @@ -4808,7 +4834,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); // OpenRouter does not return DeepSeek prompt-cache telemetry. assert!(!cap.cache_telemetry_supported); @@ -4825,7 +4851,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(!cap.cache_telemetry_supported); } @@ -4837,7 +4863,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(!cap.cache_telemetry_supported); } @@ -4849,7 +4875,7 @@ model = "deepseek-v4-pro" cap.context_window, crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS ); - assert_eq!(cap.max_output, 262_144); + assert_eq!(cap.max_output, 384_000); assert!(cap.thinking_supported); assert!(!cap.cache_telemetry_supported); } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 8f55bfdd..10720dbb 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1379,13 +1379,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { ); } } - println!( - " · base_url: {}", - config - .base_url - .as_deref() - .unwrap_or("https://api.deepseek.com") - ); + println!(" · base_url: {}", config.deepseek_base_url()); let model = config .default_text_model .clone() @@ -4278,7 +4272,7 @@ mod doctor_endpoint_tests { let target = doctor_api_target(&config); assert_eq!(target.provider, "deepseek"); - assert_eq!(target.base_url, "https://api.deepseek.com"); + assert_eq!(target.base_url, crate::config::DEFAULT_DEEPSEEK_BASE_URL); assert_eq!(target.model, crate::config::DEFAULT_TEXT_MODEL); } diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 6d938108..c31a979a 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -109,11 +109,11 @@ Current boundary note (v0.8.6): #### DeepSeek API Endpoints DeepSeek exposes OpenAI-compatible endpoints. The CLI uses: -- `https://api.deepseek.com/v1/chat/completions` - normal and streaming model turns -- `https://api.deepseek.com/v1/models` - live model discovery and health checks +- `https://api.deepseek.com/beta/chat/completions` - default v0.8.16 DeepSeek model turns +- `https://api.deepseek.com/beta/models` - default v0.8.16 live model discovery and health checks `https://api.deepseek.com/v1` is accepted for OpenAI SDK compatibility, and -`https://api.deepseek.com/beta` can be configured for beta-only features such as +can still be configured explicitly to opt out of beta-only features such as strict tool mode, chat prefix completion, and FIM completion. The public DeepSeek docs do not document a Responses API path for this workflow; the engine drives turns through Chat Completions. diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index c78fbd3e..e64529ca 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -94,7 +94,7 @@ default_text_model = "deepseek-v4-pro" [profiles.work] api_key = "WORK_KEY" -base_url = "https://api.deepseek.com" +base_url = "https://api.deepseek.com/beta" [profiles.nvidia-nim] provider = "nvidia-nim" @@ -296,7 +296,7 @@ separate: | Quantity | Meaning | Allowed to drive | |---|---|---| | Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. | -| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. | +| Reserved response headroom | The internal turn budget plus safety headroom. v0.8.16 keeps normal turns at `262144` reserved output tokens and adds `1024` safety tokens for context-window checks, even though V4 capability metadata reports the official `384000` max output. | Hard-cycle and emergency overflow budget checks only. | | Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. | | Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. | | Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. | @@ -327,8 +327,8 @@ If you are upgrading from older releases: - `provider` (string, optional): `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, or `ollama`. `deepseek-cn` uses DeepSeek's mainland China endpoint (`https://api.deepseeki.com`); `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`. - `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it. -- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion. -- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process. +- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API in v0.8.16, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features. +- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process. - `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`. - `allow_shell` (bool, optional): defaults to `true` (sandboxed). - `approval_policy` (string, optional): `on-request`, `untrusted`, or `never`. Runtime `approval_mode` editing in `/config` also accepts `on-request` and `untrusted` aliases. @@ -550,14 +550,12 @@ The `capability` key contains per-provider capability info derived from static knowledge (release docs, API guides) rather than live API probes. Top-level sub-keys: `resolved_provider`, `resolved_model`, `context_window`, `max_output`, `thinking_supported`, `cache_telemetry_supported`, -`request_payload_mode`, and `deprecation`. When the resolved model is a known -legacy alias (e.g. `deepseek-chat`, `deepseek-reasoner`), the `deprecation` -sub-object carries `alias`, `replacement`, and `notice` fields. +and `request_payload_mode`. -Use `capability.context_window` and `capability.max_output` for context-window -budgeting in CI scripts. Use `capability.thinking_supported` to decide whether -to configure reasoning effort. Use `capability.deprecation` to warn users about -legacy model aliases. +Use `capability.context_window` and `capability.max_output` for model-limit +checks in CI scripts; do not treat `capability.max_output` as the per-turn +request budget. Use `capability.thinking_supported` to decide whether to +configure reasoning effort. ## Setup status, clean, and extension dirs diff --git a/docs/OPERATIONS_RUNBOOK.md b/docs/OPERATIONS_RUNBOOK.md index d0966343..f1085c44 100644 --- a/docs/OPERATIONS_RUNBOOK.md +++ b/docs/OPERATIONS_RUNBOOK.md @@ -24,7 +24,7 @@ Symptoms: Checks: 1. Inspect retry/health logs (`deepseek_cli::client`) 2. Verify endpoint connectivity: - - `curl -sS https://api.deepseek.com/v1/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"` + - `curl -sS https://api.deepseek.com/beta/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"` 3. Confirm no local sandbox/permission deadlock in tool output Actions: diff --git a/docs/RUNTIME_API.md b/docs/RUNTIME_API.md index 44e5eb0b..c4d0cdd2 100644 --- a/docs/RUNTIME_API.md +++ b/docs/RUNTIME_API.md @@ -92,7 +92,7 @@ deepseek doctor --json "api_key": { "source": "env" }, - "base_url": "https://api.deepseek.com", + "base_url": "https://api.deepseek.com/beta", "default_text_model": "deepseek-v4-pro", "memory": { "enabled": false,