feat(api): default DeepSeek to beta endpoint

Closes #941.\n\nRefs #938, #939, #940.
This commit is contained in:
Hunter Bown
2026-05-06 21:24:59 -05:00
committed by GitHub
parent 0ee298bd77
commit c7ed05a07c
10 changed files with 111 additions and 49 deletions
+3 -1
View File
@@ -329,7 +329,9 @@ UI locale is separate from model language — set `locale` in `settings.toml`, u
| `deepseek-v4-pro` | 1M | $0.003625 / 1M* | $0.435 / 1M* | $0.87 / 1M* |
| `deepseek-v4-flash` | 1M | $0.0028 / 1M | $0.14 / 1M | $0.28 / 1M |
Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash`. NVIDIA NIM variants use your NVIDIA account terms.
DeepSeek Platform defaults to `https://api.deepseek.com/beta` in v0.8.16 so beta-gated API features can be tested without extra setup. Set `base_url = "https://api.deepseek.com"` to opt out.
Legacy aliases `deepseek-chat` / `deepseek-reasoner` map to `deepseek-v4-flash` and retire after July 24, 2026. NVIDIA NIM variants use your NVIDIA account terms.
*DeepSeek Pro rates currently reflect a limited-time 75% discount, which remains valid until 15:59 UTC on 31 May 2026. After that time, the TUI cost estimator will revert to the base Pro rates.*
+6 -6
View File
@@ -17,9 +17,9 @@
# defaults when `[providers.deepseek]` is absent (backward compatibility).
provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang | vllm | ollama
api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
base_url = "https://api.deepseek.com"
base_url = "https://api.deepseek.com/beta"
# base_url = "https://api.deepseeki.com" # China users
# base_url = "https://api.deepseek.com/beta" # DeepSeek beta features such as strict tool mode
# base_url = "https://api.deepseek.com" # opt out of DeepSeek beta features
# Optional custom model request headers for OpenAI-compatible gateways.
# Authorization and Content-Type are managed by the client and cannot be overridden here.
# http_headers = { "X-Model-Provider-Id" = "your-model-provider" }
@@ -163,7 +163,7 @@ max_subagents = 10 # optional (1-20)
# DeepSeek Platform (https://platform.deepseek.com)
[providers.deepseek]
# api_key = "YOUR_DEEPSEEK_API_KEY"
# base_url = "https://api.deepseek.com"
# base_url = "https://api.deepseek.com/beta"
# model = "deepseek-v4-pro"
# http_headers = { "X-Model-Provider-Id" = "your-model-provider" } # optional custom request headers
@@ -296,8 +296,8 @@ verbatim_window_turns = 16
l1_threshold = 192000
l2_threshold = 384000
l3_threshold = 576000
# Hard cycle also reserves the normal 262144-token output budget plus 1024
# safety tokens against the model window.
# Hard cycle reserves the normal 262144-token internal turn budget plus 1024
# safety tokens, separate from V4's official 384000 max-output metadata.
cycle_threshold = 768000
seam_model = "deepseek-v4-flash"
@@ -347,7 +347,7 @@ fallback_default_prior = 3.8
# Select a profile with `deepseek --profile <name>` or `DEEPSEEK_PROFILE=<name>`.
[profiles.work]
api_key = "WORK_DEEPSEEK_API_KEY"
base_url = "https://api.deepseek.com"
base_url = "https://api.deepseek.com/beta"
[profiles.dev]
api_key = "DEV_DEEPSEEK_API_KEY"
+14 -1
View File
@@ -18,7 +18,7 @@ const DEFAULT_DEEPSEEK_MODEL: &str = "deepseek-v4-pro";
const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro";
const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1";
const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com";
const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
const DEFAULT_OPENROUTER_MODEL: &str = "deepseek/deepseek-v4-pro";
@@ -1492,6 +1492,19 @@ mod tests {
assert_eq!(resolved.model, "deepseek-v4-pro");
}
#[test]
fn deepseek_runtime_defaults_to_beta_endpoint() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
let config = ConfigToml::default();
let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Deepseek);
assert_eq!(resolved.base_url, DEFAULT_DEEPSEEK_BASE_URL);
assert_eq!(resolved.model, DEFAULT_DEEPSEEK_MODEL);
}
#[test]
fn provider_specific_deepseek_fields_override_tui_compat_fields() {
let _lock = env_lock();
+30 -1
View File
@@ -367,11 +367,24 @@ pub(super) fn versioned_base_url(base_url: &str) -> String {
}
}
fn unversioned_base_url(base_url: &str) -> String {
let trimmed = base_url.trim_end_matches('/');
trimmed
.strip_suffix("/v1")
.or_else(|| trimmed.strip_suffix("/beta"))
.unwrap_or(trimmed)
.to_string()
}
pub(super) fn api_url(base_url: &str, path: &str) -> String {
let path = path.trim_start_matches('/');
if path.starts_with("beta/") {
return format!("{}/{}", unversioned_base_url(base_url), path);
}
format!(
"{}/{}",
versioned_base_url(base_url).trim_end_matches('/'),
path.trim_start_matches('/')
path
)
}
@@ -1021,6 +1034,22 @@ mod tests {
);
}
#[test]
fn api_url_routes_beta_paths_from_any_deepseek_base() {
assert_eq!(
api_url("https://api.deepseek.com", "beta/completions"),
"https://api.deepseek.com/beta/completions"
);
assert_eq!(
api_url("https://api.deepseek.com/v1", "beta/completions"),
"https://api.deepseek.com/beta/completions"
);
assert_eq!(
api_url("https://api.deepseek.com/beta", "beta/completions"),
"https://api.deepseek.com/beta/completions"
);
}
#[test]
fn default_headers_include_custom_headers_when_configured() {
let mut extra = HashMap::new();
+42 -16
View File
@@ -20,6 +20,7 @@ use crate::hooks::HooksConfig;
pub const DEFAULT_MAX_SUBAGENTS: usize = 10;
pub const MAX_SUBAGENTS: usize = 20;
pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro";
pub const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro";
pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
@@ -148,7 +149,10 @@ pub struct ProviderCapability {
pub resolved_model: String,
/// Context window in tokens (the maximum input the model can accept).
pub context_window: u32,
/// Recommended maximum output tokens (`max_tokens`) for this combo.
/// Official maximum output tokens for this combo.
///
/// This is model metadata for diagnostics and CI policy. Normal turns use
/// a separate, more conservative request cap in the engine.
pub max_output: u32,
/// Whether the provider+model supports thinking/reasoning mode.
pub thinking_supported: bool,
@@ -199,9 +203,10 @@ pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> Provi
.unwrap_or(crate::models::LEGACY_DEEPSEEK_CONTEXT_WINDOW_TOKENS)
};
// Max output tokens: DeepSeek V4 models allow 262K; others get 4096.
// Max output tokens: official DeepSeek V4 API metadata lists 384K;
// runtime request caps remain separate and more conservative.
let max_output = if is_v4_pro || is_v4_flash {
262_144
384_000
} else {
4096
};
@@ -1231,7 +1236,7 @@ impl Config {
};
let base = provider_base.or(root_base).unwrap_or_else(|| {
match provider {
ApiProvider::Deepseek => "https://api.deepseek.com",
ApiProvider::Deepseek => DEFAULT_DEEPSEEK_BASE_URL,
ApiProvider::DeepseekCN => DEFAULT_DEEPSEEKCN_BASE_URL,
ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL,
ApiProvider::Openrouter => DEFAULT_OPENROUTER_BASE_URL,
@@ -1704,8 +1709,9 @@ pub fn ensure_config_file_exists(path: Option<PathBuf>) -> Result<Option<PathBuf
# Get your API key from https://platform.deepseek.com
# Save it with: deepseek auth set --provider deepseek
# Base URL (default: https://api.deepseek.com)
# base_url = "https://api.deepseek.com"
# Base URL (default: https://api.deepseek.com/beta)
# Set https://api.deepseek.com to opt out of beta features.
# base_url = "https://api.deepseek.com/beta"
# Default model
default_text_model = "{default_model}"
@@ -2651,8 +2657,9 @@ fn save_api_key_to_config_file(api_key: &str) -> Result<PathBuf> {
api_key = "{key_to_write}"
# Base URL (default: https://api.deepseek.com)
# base_url = "https://api.deepseek.com"
# Base URL (default: https://api.deepseek.com/beta)
# Set https://api.deepseek.com to opt out of beta features.
# base_url = "https://api.deepseek.com/beta"
# Default model
default_text_model = "{default_model}"
@@ -3916,6 +3923,25 @@ api_key = "old-openrouter-key"
Ok(())
}
#[test]
fn deepseek_provider_defaults_to_beta_endpoint() {
let config = Config::default();
assert_eq!(config.api_provider(), ApiProvider::Deepseek);
assert_eq!(config.deepseek_base_url(), DEFAULT_DEEPSEEK_BASE_URL);
}
#[test]
fn explicit_deepseek_base_url_overrides_beta_default() {
let config = Config {
base_url: Some("https://api.deepseek.com".to_string()),
..Default::default()
};
assert_eq!(config.api_provider(), ApiProvider::Deepseek);
assert_eq!(config.deepseek_base_url(), "https://api.deepseek.com");
}
#[test]
fn deepseek_model_env_overrides_default_text_model() -> Result<()> {
let _lock = lock_test_env();
@@ -4752,7 +4778,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(cap.cache_telemetry_supported);
assert_eq!(
@@ -4768,7 +4794,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(cap.cache_telemetry_supported);
}
@@ -4780,7 +4806,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(cap.cache_telemetry_supported);
assert_eq!(
@@ -4796,7 +4822,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(cap.cache_telemetry_supported);
}
@@ -4808,7 +4834,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
// OpenRouter does not return DeepSeek prompt-cache telemetry.
assert!(!cap.cache_telemetry_supported);
@@ -4825,7 +4851,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
}
@@ -4837,7 +4863,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
}
@@ -4849,7 +4875,7 @@ model = "deepseek-v4-pro"
cap.context_window,
crate::models::DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS
);
assert_eq!(cap.max_output, 262_144);
assert_eq!(cap.max_output, 384_000);
assert!(cap.thinking_supported);
assert!(!cap.cache_telemetry_supported);
}
+2 -8
View File
@@ -1379,13 +1379,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
);
}
}
println!(
" · base_url: {}",
config
.base_url
.as_deref()
.unwrap_or("https://api.deepseek.com")
);
println!(" · base_url: {}", config.deepseek_base_url());
let model = config
.default_text_model
.clone()
@@ -4278,7 +4272,7 @@ mod doctor_endpoint_tests {
let target = doctor_api_target(&config);
assert_eq!(target.provider, "deepseek");
assert_eq!(target.base_url, "https://api.deepseek.com");
assert_eq!(target.base_url, crate::config::DEFAULT_DEEPSEEK_BASE_URL);
assert_eq!(target.model, crate::config::DEFAULT_TEXT_MODEL);
}
+3 -3
View File
@@ -109,11 +109,11 @@ Current boundary note (v0.8.6):
#### DeepSeek API Endpoints
DeepSeek exposes OpenAI-compatible endpoints. The CLI uses:
- `https://api.deepseek.com/v1/chat/completions` - normal and streaming model turns
- `https://api.deepseek.com/v1/models` - live model discovery and health checks
- `https://api.deepseek.com/beta/chat/completions` - default v0.8.16 DeepSeek model turns
- `https://api.deepseek.com/beta/models` - default v0.8.16 live model discovery and health checks
`https://api.deepseek.com/v1` is accepted for OpenAI SDK compatibility, and
`https://api.deepseek.com/beta` can be configured for beta-only features such as
can still be configured explicitly to opt out of beta-only features such as
strict tool mode, chat prefix completion, and FIM completion. The public
DeepSeek docs do not document a Responses API path for this workflow; the engine
drives turns through Chat Completions.
+9 -11
View File
@@ -94,7 +94,7 @@ default_text_model = "deepseek-v4-pro"
[profiles.work]
api_key = "WORK_KEY"
base_url = "https://api.deepseek.com"
base_url = "https://api.deepseek.com/beta"
[profiles.nvidia-nim]
provider = "nvidia-nim"
@@ -296,7 +296,7 @@ separate:
| Quantity | Meaning | Allowed to drive |
|---|---|---|
| Active request input estimate | Conservative estimate of the next request's live system prompt and transcript payload. | Header/footer context percent, hard-cycle trigger, opt-in Flash seam trigger, and emergency overflow preflight. |
| Reserved response headroom | The requested `max_tokens` budget plus safety headroom. v0.7.5 keeps normal turns at `262144` output tokens and adds `1024` safety tokens for context-window checks. | Hard-cycle and emergency overflow budget checks only. |
| Reserved response headroom | The internal turn budget plus safety headroom. v0.8.16 keeps normal turns at `262144` reserved output tokens and adds `1024` safety tokens for context-window checks, even though V4 capability metadata reports the official `384000` max output. | Hard-cycle and emergency overflow budget checks only. |
| Cumulative API usage | Provider-reported input plus output tokens summed across completed API calls; multi-tool turns may count the same stable prefix more than once. | Session usage and approximate cost telemetry only. |
| Prompt cache hit/miss | Provider cache telemetry for the most recent call when available. | Cache-hit display and cost estimation only; never compaction, seam, or cycle triggers. |
| Context percent | Active request input estimate divided by the model context window. | Display only; it mirrors the active-input basis used by context safeguards. |
@@ -327,8 +327,8 @@ If you are upgrading from older releases:
- `provider` (string, optional): `deepseek` (default), `deepseek-cn`, `nvidia-nim`, `openrouter`, `novita`, `fireworks`, `sglang`, `vllm`, or `ollama`. `deepseek-cn` uses DeepSeek's mainland China endpoint (`https://api.deepseeki.com`); `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
- `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it.
- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API in v0.8.16, `https://api.deepseeki.com` for `provider = "deepseek-cn"`, or the provider-specific endpoint for hosted/self-hosted providers. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Ollama model tags are passed through unchanged. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.
- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`.
- `allow_shell` (bool, optional): defaults to `true` (sandboxed).
- `approval_policy` (string, optional): `on-request`, `untrusted`, or `never`. Runtime `approval_mode` editing in `/config` also accepts `on-request` and `untrusted` aliases.
@@ -550,14 +550,12 @@ The `capability` key contains per-provider capability info derived from
static knowledge (release docs, API guides) rather than live API probes.
Top-level sub-keys: `resolved_provider`, `resolved_model`, `context_window`,
`max_output`, `thinking_supported`, `cache_telemetry_supported`,
`request_payload_mode`, and `deprecation`. When the resolved model is a known
legacy alias (e.g. `deepseek-chat`, `deepseek-reasoner`), the `deprecation`
sub-object carries `alias`, `replacement`, and `notice` fields.
and `request_payload_mode`.
Use `capability.context_window` and `capability.max_output` for context-window
budgeting in CI scripts. Use `capability.thinking_supported` to decide whether
to configure reasoning effort. Use `capability.deprecation` to warn users about
legacy model aliases.
Use `capability.context_window` and `capability.max_output` for model-limit
checks in CI scripts; do not treat `capability.max_output` as the per-turn
request budget. Use `capability.thinking_supported` to decide whether to
configure reasoning effort.
## Setup status, clean, and extension dirs
+1 -1
View File
@@ -24,7 +24,7 @@ Symptoms:
Checks:
1. Inspect retry/health logs (`deepseek_cli::client`)
2. Verify endpoint connectivity:
- `curl -sS https://api.deepseek.com/v1/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"`
- `curl -sS https://api.deepseek.com/beta/models -H "Authorization: Bearer $DEEPSEEK_API_KEY"`
3. Confirm no local sandbox/permission deadlock in tool output
Actions:
+1 -1
View File
@@ -92,7 +92,7 @@ deepseek doctor --json
"api_key": {
"source": "env"
},
"base_url": "https://api.deepseek.com",
"base_url": "https://api.deepseek.com/beta",
"default_text_model": "deepseek-v4-pro",
"memory": {
"enabled": false,