feat(models): add Together AI provider and update model catalog for v0.8.55

- Add Together AI as a first-class provider (ProviderKind::Together)
  - Config block [providers.together], env TOGETHER_API_KEY/TOGETHER_BASE_URL/TOGETHER_MODEL
  - Default models: deepseek-ai/DeepSeek-V4-Pro, deepseek-ai/DeepSeek-V4-Flash
  - Base URL: https://api.together.xyz/v1
  - TUI ApiProvider::Together with picker, auth, and capability support
  - CLI auth list/status support

- Add model catalog entries:
  - Qwen 3.7 Max (qwen/qwen3.7-max on OpenRouter)
  - MiniMax 2.7 (minimax/minimax-2.7 on OpenRouter)
  - NVIDIA Nemotron 3 Ultra (nvidia/nemotron-3-ultra on OpenRouter)

- Update docs/PROVIDERS.md, docs/CONFIGURATION.md, config.example.toml
- Update check-provider-registry.py compatible surfaces
- Fix provider picker tests for new provider count

Closes #2906, #2907, #2910, #2912, #2913
This commit is contained in:
Hunter Bown
2026-06-08 15:12:42 -07:00
parent 9463266cb1
commit c13bc24805
14 changed files with 216 additions and 17 deletions
+12 -4
View File
@@ -340,10 +340,10 @@ max_subagents = 10 # optional (1-20)
# base_url = "https://openrouter.ai/api/v1"
# model = "deepseek/deepseek-v4-pro"
# Recent large model IDs also accepted here include arcee-ai/trinity-large-thinking,
# xiaomi/mimo-v2.5-pro, qwen/qwen3.6-flash, qwen/qwen3.6-35b-a3b,
# qwen/qwen3.6-max-preview, qwen/qwen3.6-27b, qwen/qwen3.6-plus,
# google/gemma-4-31b-it, z-ai/glm-5.1, moonshotai/kimi-k2.6, and
# nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free.
# minimax/minimax-m3, minimax/minimax-2.7, xiaomi/mimo-v2.5-pro, qwen/qwen3.6-flash,
# qwen/qwen3.6-35b-a3b, qwen/qwen3.6-max-preview, qwen/qwen3.6-27b, qwen/qwen3.6-plus,
# qwen/qwen3.7-max, google/gemma-4-31b-it, z-ai/glm-5.1, moonshotai/kimi-k2.6,
# nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free, and nvidia/nemotron-3-ultra.
# Xiaomi MiMo OpenAI-compatible endpoint (https://platform.xiaomimimo.com)
[providers.xiaomi_mimo]
@@ -420,6 +420,14 @@ max_subagents = 10 # optional (1-20)
# base_url = "https://router.huggingface.co/v1"
# model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash
# ─────────────────────────────────────────────────────────────────────────────────
# Together AI Provider (https://www.together.ai/)
# Env var aliases: TOGETHER_API_KEY, TOGETHER_BASE_URL, TOGETHER_MODEL
[providers.together]
# api_key = "YOUR_TOGETHER_API_KEY"
# base_url = "https://api.together.xyz/v1"
# model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash
# ─────────────────────────────────────────────────────────────────────────────────
# Web Search Provider
# ─────────────────────────────────────────────────────────────────────────────────
+53
View File
@@ -569,6 +569,59 @@ impl Default for ModelRegistry {
supports_tools: true,
supports_reasoning: true,
},
// Together AI provider models
ModelInfo {
id: "deepseek-ai/DeepSeek-V4-Pro".to_string(),
provider: ProviderKind::Together,
aliases: vec![
"deepseek-v4-pro".to_string(),
"together-deepseek-v4-pro".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
ModelInfo {
id: "deepseek-ai/DeepSeek-V4-Flash".to_string(),
provider: ProviderKind::Together,
aliases: vec![
"deepseek-v4-flash".to_string(),
"deepseek-chat".to_string(),
"together-deepseek-v4-flash".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
// Qwen 3.7 Max (OpenRouter)
ModelInfo {
id: "qwen/qwen3.7-max".to_string(),
provider: ProviderKind::Openrouter,
aliases: vec!["qwen3.7-max".to_string(), "qwen-3.7-max".to_string()],
supports_tools: true,
supports_reasoning: true,
},
// MiniMax 2.7 (OpenRouter)
ModelInfo {
id: "minimax/minimax-2.7".to_string(),
provider: ProviderKind::Openrouter,
aliases: vec![
"minimax-2.7".to_string(),
"minimax-2-7".to_string(),
"openrouter-minimax-2.7".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
// NVIDIA Nemotron 3 Ultra (OpenRouter)
ModelInfo {
id: "nvidia/nemotron-3-ultra".to_string(),
provider: ProviderKind::Openrouter,
aliases: vec![
"nemotron-3-ultra".to_string(),
"nvidia-nemotron-3-ultra".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
];
Self::new(models)
}
+4 -1
View File
@@ -765,11 +765,12 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
ProviderKind::Vllm => "vllm",
ProviderKind::Ollama => "ollama",
ProviderKind::Huggingface => "huggingface",
ProviderKind::Together => "together",
}
}
/// Provider order used by the `auth list` and `auth status` outputs.
const PROVIDER_LIST: [ProviderKind; 18] = [
const PROVIDER_LIST: [ProviderKind; 19] = [
ProviderKind::Deepseek,
ProviderKind::NvidiaNim,
ProviderKind::Openai,
@@ -788,6 +789,7 @@ const PROVIDER_LIST: [ProviderKind; 18] = [
ProviderKind::Vllm,
ProviderKind::Ollama,
ProviderKind::Huggingface,
ProviderKind::Together,
];
#[cfg(test)]
@@ -859,6 +861,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] {
"WANJIE_API_KEY",
"WANJIE_MAAS_API_KEY",
],
ProviderKind::Together => &["TOGETHER_API_KEY"],
}
}
+60 -1
View File
@@ -47,6 +47,7 @@ const OPENROUTER_QWEN_3_6_35B_A3B_MODEL: &str = "qwen/qwen3.6-35b-a3b";
const OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL: &str = "qwen/qwen3.6-max-preview";
const OPENROUTER_QWEN_3_6_27B_MODEL: &str = "qwen/qwen3.6-27b";
const OPENROUTER_QWEN_3_6_PLUS_MODEL: &str = "qwen/qwen3.6-plus";
const OPENROUTER_QWEN_3_7_MAX_MODEL: &str = "qwen/qwen3.7-max";
const OPENROUTER_TENCENT_HY3_PREVIEW_MODEL: &str = "tencent/hy3-preview";
const OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL: &str = "xiaomi/mimo-v2.5-pro";
const OPENROUTER_XIAOMI_MIMO_V2_5_MODEL: &str = "xiaomi/mimo-v2.5";
@@ -85,6 +86,9 @@ const DEFAULT_ARCEE_BASE_URL: &str = "https://api.arcee.ai/api/v1";
const DEFAULT_HUGGINGFACE_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
const DEFAULT_HUGGINGFACE_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
const DEFAULT_HUGGINGFACE_BASE_URL: &str = "https://router.huggingface.co/v1";
const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
const DEFAULT_TOGETHER_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1";
const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1";
const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
@@ -135,10 +139,12 @@ pub enum ProviderKind {
Ollama,
#[serde(alias = "hugging-face", alias = "hugging_face", alias = "hf")]
Huggingface,
#[serde(alias = "together-ai", alias = "together_ai")]
Together,
}
impl ProviderKind {
pub const ALL: [Self; 18] = [
pub const ALL: [Self; 19] = [
Self::Deepseek,
Self::NvidiaNim,
Self::Openai,
@@ -157,6 +163,7 @@ impl ProviderKind {
Self::Vllm,
Self::Ollama,
Self::Huggingface,
Self::Together,
];
#[must_use]
@@ -180,6 +187,7 @@ impl ProviderKind {
Self::Vllm => "vllm",
Self::Ollama => "ollama",
Self::Huggingface => "huggingface",
Self::Together => "together",
}
}
@@ -209,6 +217,7 @@ impl ProviderKind {
"vllm" | "v-llm" => Some(Self::Vllm),
"ollama" | "ollama-local" => Some(Self::Ollama),
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
"together" | "together-ai" | "together_ai" => Some(Self::Together),
_ => None,
}
}
@@ -277,6 +286,8 @@ pub struct ProvidersToml {
pub ollama: ProviderConfigToml,
#[serde(default)]
pub huggingface: ProviderConfigToml,
#[serde(default)]
pub together: ProviderConfigToml,
}
/// Sibling `permissions.toml` schema.
@@ -324,6 +335,7 @@ impl ProvidersToml {
ProviderKind::Vllm => &self.vllm,
ProviderKind::Ollama => &self.ollama,
ProviderKind::Huggingface => &self.huggingface,
ProviderKind::Together => &self.together,
}
}
@@ -346,6 +358,7 @@ impl ProvidersToml {
ProviderKind::Vllm => &mut self.vllm,
ProviderKind::Ollama => &mut self.ollama,
ProviderKind::Huggingface => &mut self.huggingface,
ProviderKind::Together => &mut self.together,
}
}
}
@@ -1207,6 +1220,12 @@ impl ConfigToml {
"providers.huggingface.http_headers" => {
serialize_http_headers(&self.providers.huggingface.http_headers)
}
"providers.together.api_key" => self.providers.together.api_key.clone(),
"providers.together.base_url" => self.providers.together.base_url.clone(),
"providers.together.model" => self.providers.together.model.clone(),
"providers.together.http_headers" => {
serialize_http_headers(&self.providers.together.http_headers)
}
_ => self.extras.get(key).map(toml::Value::to_string),
}
}
@@ -1458,6 +1477,18 @@ impl ConfigToml {
"providers.huggingface.http_headers" => {
self.providers.huggingface.http_headers = parse_http_headers(value)?;
}
"providers.together.api_key" => {
self.providers.together.api_key = Some(value.to_string());
}
"providers.together.base_url" => {
self.providers.together.base_url = Some(value.to_string());
}
"providers.together.model" => {
self.providers.together.model = Some(value.to_string());
}
"providers.together.http_headers" => {
self.providers.together.http_headers = parse_http_headers(value)?;
}
_ => {
self.extras
.insert(key.to_string(), toml::Value::String(value.to_string()));
@@ -1577,6 +1608,10 @@ impl ConfigToml {
"providers.huggingface.base_url" => self.providers.huggingface.base_url = None,
"providers.huggingface.model" => self.providers.huggingface.model = None,
"providers.huggingface.http_headers" => self.providers.huggingface.http_headers.clear(),
"providers.together.api_key" => self.providers.together.api_key = None,
"providers.together.base_url" => self.providers.together.base_url = None,
"providers.together.model" => self.providers.together.model = None,
"providers.together.http_headers" => self.providers.together.http_headers.clear(),
_ => {
self.extras.remove(key);
}
@@ -1959,6 +1994,7 @@ impl ConfigToml {
ProviderKind::Vllm => DEFAULT_VLLM_BASE_URL.to_string(),
ProviderKind::Ollama => DEFAULT_OLLAMA_BASE_URL.to_string(),
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL.to_string(),
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL.to_string(),
})
};
// CLI flag wins outright. Otherwise: config-file → injected secrets/env.
@@ -2251,6 +2287,14 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
"deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner"
| "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2",
) => DEFAULT_HUGGINGFACE_FLASH_MODEL.to_string(),
(ProviderKind::Together, "deepseek-v4-pro" | "deepseek-v4pro") => {
DEFAULT_TOGETHER_MODEL.to_string()
}
(
ProviderKind::Together,
"deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner"
| "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2",
) => DEFAULT_TOGETHER_FLASH_MODEL.to_string(),
_ => model.to_string(),
}
}
@@ -2342,6 +2386,9 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> {
OPENROUTER_QWEN_3_6_PLUS_MODEL | "qwen3.6-plus" | "qwen-3.6-plus" => {
Some(OPENROUTER_QWEN_3_6_PLUS_MODEL)
}
OPENROUTER_QWEN_3_7_MAX_MODEL | "qwen3.7-max" | "qwen-3.7-max" => {
Some(OPENROUTER_QWEN_3_7_MAX_MODEL)
}
OPENROUTER_TENCENT_HY3_PREVIEW_MODEL | "hy3-preview" | "tencent-hy3-preview" => {
Some(OPENROUTER_TENCENT_HY3_PREVIEW_MODEL)
}
@@ -2378,6 +2425,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Vllm => DEFAULT_VLLM_MODEL,
ProviderKind::Ollama => DEFAULT_OLLAMA_MODEL,
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
ProviderKind::Together => DEFAULT_TOGETHER_MODEL,
}
}
@@ -2401,6 +2449,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Vllm => DEFAULT_VLLM_BASE_URL,
ProviderKind::Ollama => DEFAULT_OLLAMA_BASE_URL,
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL,
}
}
@@ -3149,6 +3198,8 @@ struct EnvRuntimeOverrides {
ollama_base_url: Option<String>,
huggingface_base_url: Option<String>,
huggingface_model: Option<String>,
together_base_url: Option<String>,
together_model: Option<String>,
}
impl EnvRuntimeOverrides {
@@ -3298,6 +3349,12 @@ impl EnvRuntimeOverrides {
.or_else(|_| std::env::var("HF_MODEL"))
.ok()
.filter(|v| !v.trim().is_empty()),
together_base_url: std::env::var("TOGETHER_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
together_model: std::env::var("TOGETHER_MODEL")
.ok()
.filter(|v| !v.trim().is_empty()),
}
}
@@ -3324,6 +3381,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Vllm => self.vllm_base_url.clone(),
ProviderKind::Ollama => self.ollama_base_url.clone(),
ProviderKind::Huggingface => self.huggingface_base_url.clone(),
ProviderKind::Together => self.together_base_url.clone(),
}
}
@@ -3341,6 +3399,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Novita => self.novita_model.clone(),
ProviderKind::Fireworks => self.fireworks_model.clone(),
ProviderKind::Huggingface => self.huggingface_model.clone(),
ProviderKind::Together => self.together_model.clone(),
_ => None,
}?;
+14 -1
View File
@@ -14,6 +14,7 @@ use super::{
DEFAULT_OPENAI_BASE_URL, DEFAULT_OPENAI_MODEL, DEFAULT_OPENROUTER_BASE_URL,
DEFAULT_OPENROUTER_MODEL, DEFAULT_SGLANG_BASE_URL, DEFAULT_SGLANG_MODEL,
DEFAULT_SILICONFLOW_BASE_URL, DEFAULT_SILICONFLOW_CN_BASE_URL, DEFAULT_SILICONFLOW_MODEL,
DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL,
DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL, DEFAULT_VOLCENGINE_BASE_URL,
DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL, DEFAULT_WANJIE_ARK_MODEL,
DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL, ProviderKind,
@@ -274,6 +275,15 @@ provider!(
["HUGGINGFACE_API_KEY", "HF_TOKEN"],
"huggingface"
);
provider!(
Together,
Together,
"Together AI",
DEFAULT_TOGETHER_BASE_URL,
DEFAULT_TOGETHER_MODEL,
["TOGETHER_API_KEY"],
"together"
);
static DEEPSEEK: Deepseek = Deepseek;
static NVIDIA_NIM: NvidiaNim = NvidiaNim;
@@ -293,8 +303,9 @@ static SGLANG: Sglang = Sglang;
static VLLM: Vllm = Vllm;
static OLLAMA: Ollama = Ollama;
static HUGGINGFACE: Huggingface = Huggingface;
static TOGETHER: Together = Together;
static PROVIDER_REGISTRY: [&dyn Provider; 18] = [
static PROVIDER_REGISTRY: [&dyn Provider; 19] = [
&DEEPSEEK,
&NVIDIA_NIM,
&OPENAI,
@@ -313,6 +324,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 18] = [
&VLLM,
&OLLAMA,
&HUGGINGFACE,
&TOGETHER,
];
/// Return all built-in provider metadata entries in `ProviderKind::ALL` order.
@@ -359,5 +371,6 @@ pub fn provider_for_kind(kind: ProviderKind) -> &'static dyn Provider {
ProviderKind::Vllm => &VLLM,
ProviderKind::Ollama => &OLLAMA,
ProviderKind::Huggingface => &HUGGINGFACE,
ProviderKind::Together => &TOGETHER,
}
}
+5 -4
View File
@@ -1204,7 +1204,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Siliconflow
| ApiProvider::SiliconflowCn
| ApiProvider::Sglang
| ApiProvider::Volcengine => {
| ApiProvider::Volcengine
| ApiProvider::Together => {
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::Fireworks => {}
@@ -1246,10 +1247,10 @@ pub(super) fn apply_reasoning_effort(
body["reasoning_effort"] = json!("high");
body["thinking"] = json!({ "type": "enabled" });
}
// OpenRouter/Novita: pass through the actual user-chosen value.
// OpenRouter/Novita/Together: pass through the actual user-chosen value.
// OpenRouter's unified scale is none/minimal/low/medium/high/xhigh;
// DeepSeek models hosted there accept those directly.
ApiProvider::Openrouter | ApiProvider::Novita => {
ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Together => {
let value = match normalized.as_str() {
"low" | "minimal" => "low",
"medium" | "mid" => "medium",
@@ -1308,7 +1309,7 @@ pub(super) fn apply_reasoning_effort(
body["reasoning_effort"] = json!("max");
body["thinking"] = json!({ "type": "enabled" });
}
ApiProvider::Openrouter | ApiProvider::Novita => {
ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Together => {
body["reasoning_effort"] = json!("xhigh");
body["thinking"] = json!({ "type": "enabled" });
}
+50 -2
View File
@@ -75,6 +75,9 @@ pub const OPENROUTER_QWEN_3_6_35B_A3B_MODEL: &str = "qwen/qwen3.6-35b-a3b";
pub const OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL: &str = "qwen/qwen3.6-max-preview";
pub const OPENROUTER_QWEN_3_6_27B_MODEL: &str = "qwen/qwen3.6-27b";
pub const OPENROUTER_QWEN_3_6_PLUS_MODEL: &str = "qwen/qwen3.6-plus";
pub const OPENROUTER_QWEN_3_7_MAX_MODEL: &str = "qwen/qwen3.7-max";
pub const OPENROUTER_MINIMAX_2_7_MODEL: &str = "minimax/minimax-2.7";
pub const OPENROUTER_NEMOTRON_3_ULTRA_MODEL: &str = "nvidia/nemotron-3-ultra";
pub const OPENROUTER_TENCENT_HY3_PREVIEW_MODEL: &str = "tencent/hy3-preview";
pub const OPENROUTER_XIAOMI_MIMO_V2_5_PRO_MODEL: &str = "xiaomi/mimo-v2.5-pro";
pub const OPENROUTER_XIAOMI_MIMO_V2_5_MODEL: &str = "xiaomi/mimo-v2.5";
@@ -88,6 +91,9 @@ pub const RECENT_OPENROUTER_LARGE_MODELS: &[&str] = &[
OPENROUTER_QWEN_3_6_MAX_PREVIEW_MODEL,
OPENROUTER_QWEN_3_6_27B_MODEL,
OPENROUTER_QWEN_3_6_PLUS_MODEL,
OPENROUTER_QWEN_3_7_MAX_MODEL,
OPENROUTER_MINIMAX_2_7_MODEL,
OPENROUTER_NEMOTRON_3_ULTRA_MODEL,
OPENROUTER_KIMI_K2_6_MODEL,
OPENROUTER_GLM_5_1_MODEL,
OPENROUTER_TENCENT_HY3_PREVIEW_MODEL,
@@ -136,6 +142,8 @@ pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://localhost:11434/v1";
pub const DEFAULT_HUGGINGFACE_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
pub const DEFAULT_HUGGINGFACE_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
pub const DEFAULT_HUGGINGFACE_BASE_URL: &str = "https://router.huggingface.co/v1";
pub const DEFAULT_TOGETHER_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
pub const DEFAULT_TOGETHER_BASE_URL: &str = "https://api.together.xyz/v1";
/// Legacy `deepseek-cn` provider alias.
///
/// DeepSeek's official API host is the same worldwide. Keep this alias for
@@ -176,6 +184,7 @@ pub enum ApiProvider {
Vllm,
Ollama,
Huggingface,
Together,
}
impl ApiProvider {
@@ -224,6 +233,7 @@ impl ApiProvider {
"vllm" | "v-llm" => Some(Self::Vllm),
"ollama" | "ollama-local" => Some(Self::Ollama),
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
"together" | "together-ai" | "together_ai" => Some(Self::Together),
_ => None,
}
}
@@ -250,6 +260,7 @@ impl ApiProvider {
Self::Vllm => "vllm",
Self::Ollama => "ollama",
Self::Huggingface => "huggingface",
Self::Together => "together",
}
}
@@ -276,6 +287,7 @@ impl ApiProvider {
Self::Vllm => "vLLM",
Self::Ollama => "Ollama",
Self::Huggingface => "Hugging Face",
Self::Together => "Together AI",
}
}
@@ -301,6 +313,7 @@ impl ApiProvider {
Self::Vllm,
Self::Ollama,
Self::Huggingface,
Self::Together,
]
}
}
@@ -616,6 +629,9 @@ fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> {
OPENROUTER_QWEN_3_6_PLUS_MODEL | "qwen3.6-plus" | "qwen-3.6-plus" => {
Some(OPENROUTER_QWEN_3_6_PLUS_MODEL)
}
OPENROUTER_QWEN_3_7_MAX_MODEL | "qwen3.7-max" | "qwen-3.7-max" => {
Some(OPENROUTER_QWEN_3_7_MAX_MODEL)
}
OPENROUTER_TENCENT_HY3_PREVIEW_MODEL | "hy3-preview" | "tencent-hy3-preview" => {
Some(OPENROUTER_TENCENT_HY3_PREVIEW_MODEL)
}
@@ -800,6 +816,7 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati
ApiProvider::Volcengine => vec![DEFAULT_VOLCENGINE_MODEL, DEFAULT_VOLCENGINE_FLASH_MODEL],
ApiProvider::Ollama => Vec::new(),
ApiProvider::Openai | ApiProvider::Atlascloud => OFFICIAL_DEEPSEEK_MODELS.to_vec(),
ApiProvider::Together => vec![DEFAULT_TOGETHER_MODEL],
}
}
@@ -1939,6 +1956,8 @@ pub struct ProvidersConfig {
pub ollama: ProviderConfig,
#[serde(default, alias = "hugging-face", alias = "hf")]
pub huggingface: ProviderConfig,
#[serde(default, alias = "together-ai")]
pub together: ProviderConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
@@ -2102,6 +2121,7 @@ impl Config {
ApiProvider::Volcengine => "providers.volcengine",
ApiProvider::Huggingface => "providers.huggingface",
ApiProvider::NvidiaNim => "providers.nvidia_nim",
ApiProvider::Together => "providers.together",
ApiProvider::Deepseek | ApiProvider::DeepseekCN => return,
};
tracing::warn!(
@@ -2249,6 +2269,7 @@ impl Config {
ApiProvider::Ollama => &providers.ollama,
ApiProvider::Volcengine => &providers.volcengine,
ApiProvider::Huggingface => &providers.huggingface,
ApiProvider::Together => &providers.together,
})
}
@@ -2273,6 +2294,7 @@ impl Config {
ApiProvider::Ollama => &mut providers.ollama,
ApiProvider::Volcengine => &mut providers.volcengine,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
}
}
@@ -2384,6 +2406,7 @@ impl Config {
ApiProvider::Ollama => DEFAULT_OLLAMA_MODEL,
ApiProvider::Volcengine => DEFAULT_VOLCENGINE_MODEL,
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
ApiProvider::Together => DEFAULT_TOGETHER_MODEL,
}
.to_string()
}
@@ -2419,9 +2442,10 @@ impl Config {
| ApiProvider::Moonshot
| ApiProvider::Sglang
| ApiProvider::Vllm
| ApiProvider::Ollama
| ApiProvider::Ollama
| ApiProvider::Volcengine
| ApiProvider::Huggingface => None,
| ApiProvider::Huggingface
| ApiProvider::Together => None,
};
let configured_base_url = provider_base.or(root_base);
let base = if provider == ApiProvider::XiaomiMimo {
@@ -2466,6 +2490,7 @@ impl Config {
ApiProvider::Ollama => DEFAULT_OLLAMA_BASE_URL,
ApiProvider::Volcengine => DEFAULT_VOLCENGINE_BASE_URL,
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
}
.to_string()
})
@@ -2513,6 +2538,7 @@ impl Config {
ApiProvider::Ollama => "ollama",
ApiProvider::Volcengine => "volcengine",
ApiProvider::Huggingface => "huggingface",
ApiProvider::Together => "together",
};
// 0. DeepSeek compatibility slot. The legacy top-level `api_key`
@@ -2652,6 +2678,10 @@ impl Config {
For a Kimi Code plan key, set [providers.moonshot] base_url = \
\"https://api.kimi.com/coding/v1\" and model = \"kimi-for-coding\"."
),
ApiProvider::Together => anyhow::bail!(
"Together AI API key not found. Run 'codewhale auth set --provider together', \
set TOGETHER_API_KEY, or add [providers.together] api_key in ~/.codewhale/config.toml."
),
// Self-hosted deployments commonly run without auth on localhost.
// Return an empty key and let the client omit the Authorization header.
ApiProvider::Sglang | ApiProvider::Vllm | ApiProvider::Ollama => Ok(String::new()),
@@ -3451,6 +3481,13 @@ fn apply_env_overrides(config: &mut Config) {
.huggingface
.base_url = Some(value);
}
ApiProvider::Together => {
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.together
.base_url = Some(value);
}
}
}
if matches!(config.api_provider(), ApiProvider::NvidiaNim)
@@ -3657,6 +3694,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Ollama => &mut providers.ollama,
ApiProvider::Volcengine => &mut providers.volcengine,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
};
let mut provider_headers = entry.http_headers.clone().unwrap_or_default();
provider_headers.extend(headers);
@@ -3851,6 +3889,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Ollama => &mut providers.ollama,
ApiProvider::Volcengine => &mut providers.volcengine,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
};
entry.model = Some(value);
}
@@ -4173,6 +4212,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str {
ApiProvider::Ollama => DEFAULT_OLLAMA_BASE_URL,
ApiProvider::Volcengine => DEFAULT_VOLCENGINE_BASE_URL,
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
}
}
@@ -4594,6 +4634,7 @@ fn merge_providers(
ollama: merge_provider_config(base.ollama, override_cfg.ollama),
volcengine: merge_provider_config(base.volcengine, override_cfg.volcengine),
huggingface: merge_provider_config(base.huggingface, override_cfg.huggingface),
together: merge_provider_config(base.together, override_cfg.together),
}),
}
}
@@ -5080,6 +5121,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool {
|| std::env::var("VOLCENGINE_ARK_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|| std::env::var("ARK_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
ApiProvider::Together => {
std::env::var("TOGETHER_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
}
}
@@ -5106,6 +5150,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
ApiProvider::Siliconflow | ApiProvider::SiliconflowCn => "SILICONFLOW_API_KEY",
ApiProvider::Arcee => "ARCEE_API_KEY",
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY",
ApiProvider::Together => "TOGETHER_API_KEY",
ApiProvider::Moonshot => "MOONSHOT_API_KEY",
ApiProvider::Sglang => "SGLANG_API_KEY",
ApiProvider::Vllm => "VLLM_API_KEY",
@@ -5227,6 +5272,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::Vllm => "providers.vllm",
ApiProvider::Ollama => "providers.ollama",
ApiProvider::Volcengine => "providers.volcengine",
ApiProvider::Together => "providers.together",
};
// Parse existing TOML (or start fresh) so we can edit the right table
@@ -5270,6 +5316,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::Vllm => "vllm",
ApiProvider::Ollama => "ollama",
ApiProvider::Volcengine => "volcengine",
ApiProvider::Together => "together",
};
let entry = providers
.entry(key_inside.to_string())
@@ -5365,6 +5412,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> {
ApiProvider::Sglang => Ok("sglang"),
ApiProvider::Vllm => Ok("vllm"),
ApiProvider::Ollama => Ok("ollama"),
ApiProvider::Together => Ok("together"),
}
}
+1
View File
@@ -218,6 +218,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static
ApiProvider::Sglang => Ok("sglang"),
ApiProvider::Vllm => Ok("vllm"),
ApiProvider::Ollama => Ok("ollama"),
ApiProvider::Together => Ok("together"),
}
}
+1
View File
@@ -614,6 +614,7 @@ impl Engine {
ApiProvider::Vllm => "VLLM_API_KEY",
ApiProvider::Ollama => "OLLAMA_API_KEY",
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY/HF_TOKEN",
ApiProvider::Together => "TOGETHER_API_KEY",
};
Some(format!(
+5
View File
@@ -2044,6 +2044,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
"HUGGINGFACE_API_KEY/HF_TOKEN",
"codewhale auth set --provider huggingface",
),
crate::config::ApiProvider::Together => (
"TOGETHER_API_KEY",
"codewhale auth set --provider together --api-key \"...\"",
),
crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => {
("DEEPSEEK_API_KEY", "codewhale auth set --provider deepseek")
}
@@ -2069,6 +2073,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
crate::config::ApiProvider::Vllm => "vllm",
crate::config::ApiProvider::Ollama => "ollama",
crate::config::ApiProvider::Huggingface => "huggingface",
crate::config::ApiProvider::Together => "together",
crate::config::ApiProvider::Deepseek
| crate::config::ApiProvider::DeepseekCN => "deepseek",
}
+4 -2
View File
@@ -118,6 +118,7 @@ impl ProviderPickerView {
ApiProvider::Vllm => "VLLM_API_KEY",
ApiProvider::Ollama => "OLLAMA_API_KEY",
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY / HF_TOKEN",
ApiProvider::Together => "TOGETHER_API_KEY",
}
}
@@ -508,7 +509,8 @@ mod tests {
"SGLang",
"vLLM",
"Ollama",
"Hugging Face"
"Hugging Face",
"Together AI"
]
);
}
@@ -543,7 +545,7 @@ mod tests {
let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
picker.handle_key(key(KeyCode::Up));
assert_eq!(picker.selected_provider(), ApiProvider::Huggingface);
assert_eq!(picker.selected_provider(), ApiProvider::Together);
picker.handle_key(key(KeyCode::Down));
assert_eq!(picker.selected_provider(), ApiProvider::Deepseek);
+3
View File
@@ -7173,6 +7173,7 @@ fn render(f: &mut Frame, app: &mut App) {
crate::config::ApiProvider::Vllm => Some("vLLM"),
crate::config::ApiProvider::Ollama => Some("Ollama"),
crate::config::ApiProvider::Huggingface => Some("HF"),
crate::config::ApiProvider::Together => Some("Together"),
};
let status_indicator_started_at = if app.low_motion {
None
@@ -8213,6 +8214,7 @@ async fn apply_provider_picker_api_key(
ApiProvider::Vllm => &mut providers.vllm,
ApiProvider::Ollama => &mut providers.ollama,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
};
entry.api_key = Some(api_key);
}
@@ -8270,6 +8272,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider,
ApiProvider::Vllm => &mut providers.vllm,
ApiProvider::Ollama => &mut providers.ollama,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
};
entry.auth_mode = Some(auth_mode);
}
+1 -1
View File
@@ -860,7 +860,7 @@ If you are upgrading from older releases:
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
- `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing.
- `insecure_skip_tls_verify` (bool, optional provider-table key): disabled by default. When true on the active provider table, only the LLM provider HTTP client skips TLS certificate verification. Prefer `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports this setting when enabled.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.6` for Moonshot, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `google/gemma-4-31b-it`, and `moonshotai/kimi-k2.6`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.6` for Moonshot, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`.
- `allow_shell` (bool, optional): defaults to `false`; shell tools must be explicitly enabled.
- `approval_policy` (string, optional): `on-request`, `untrusted`, or `never`. Runtime `approval_mode` editing in `/config` also accepts `on-request` and `untrusted` aliases.
+3 -1
View File
@@ -135,6 +135,7 @@ endpoint.
| `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. |
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. |
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
### Hugging Face Provider vs MCP vs Hub
@@ -204,7 +205,7 @@ endpoint when the endpoint supports model listing.
| `atlascloud` | `deepseek-ai/deepseek-v4-flash`, `deepseek-ai/deepseek-v4-pro` | yes | yes |
| `wanjie-ark` | `deepseek-reasoner` | yes | yes |
| `volcengine` | `DeepSeek-V4-Pro`, `DeepSeek-V4-Flash` | yes | yes |
| `openrouter` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`, `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `xiaomi/mimo-v2.5-pro`, `xiaomi/mimo-v2.5`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `moonshotai/kimi-k2.6`, `z-ai/glm-5.1`, `tencent/hy3-preview`, `google/gemma-4-31b-it`, `google/gemma-4-26b-a4b-it`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free` | yes | yes |
| `openrouter` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash`, `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `xiaomi/mimo-v2.5`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `moonshotai/kimi-k2.6`, `z-ai/glm-5.1`, `tencent/hy3-preview`, `google/gemma-4-31b-it`, `google/gemma-4-26b-a4b-it`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, `nvidia/nemotron-3-ultra` | yes | yes |
| `xiaomi-mimo` | `mimo-v2.5-pro`, `mimo-v2.5`; speech/TTS IDs are selected through `codewhale speech` / `tts` | yes | yes for chat models; no for speech/TTS models |
| `novita` | `deepseek/deepseek-v4-pro`, `deepseek/deepseek-v4-flash` | yes | yes |
| `fireworks` | `accounts/fireworks/models/deepseek-v4-pro` | yes | yes |
@@ -215,6 +216,7 @@ endpoint when the endpoint supports model listing.
| `vllm` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
| `ollama` | `deepseek-coder:1.3b`; custom tags pass through when provider hint is `ollama` | yes | no |
| `huggingface` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | no |
| `together` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
AtlasCloud keeps the same default model as the config layer and adds
provider-scoped aliases for the Pro and Flash rows. Other AtlasCloud model IDs