Merge PR #2879: Hugging Face provider docs and tests

Harvested from PR #2879 by @mvanhorn

Co-authored-by: mvanhorn <455140+mvanhorn@users.noreply.github.com>
This commit is contained in:
CodeWhale Agent
2026-06-12 13:56:03 -07:00
6 changed files with 390 additions and 12 deletions
+3 -1
View File
@@ -269,7 +269,8 @@ max_subagents = 10 # optional (1-20)
# SGLang: SGLANG_BASE_URL, SGLANG_MODEL, optional SGLANG_API_KEY # SGLang: SGLANG_BASE_URL, SGLANG_MODEL, optional SGLANG_API_KEY
# vLLM: VLLM_BASE_URL, VLLM_MODEL, optional VLLM_API_KEY # vLLM: VLLM_BASE_URL, VLLM_MODEL, optional VLLM_API_KEY
# Ollama: OLLAMA_BASE_URL, OLLAMA_MODEL, optional OLLAMA_API_KEY # Ollama: OLLAMA_BASE_URL, OLLAMA_MODEL, optional OLLAMA_API_KEY
# Hugging Face: HUGGINGFACE_API_KEY (or HF_TOKEN), HUGGINGFACE_BASE_URL (or HF_BASE_URL), HUGGINGFACE_MODEL (or HF_MODEL) # Hugging Face: HUGGINGFACE_API_KEY (or HF_TOKEN), HUGGINGFACE_BASE_URL (or HF_BASE_URL),
# HUGGINGFACE_MODEL (or HF_MODEL)
# #
# Custom DeepSeek-compatible APIs usually do not need a new provider table: # Custom DeepSeek-compatible APIs usually do not need a new provider table:
# set `provider = "deepseek"` and override [providers.deepseek].base_url/model. # set `provider = "deepseek"` and override [providers.deepseek].base_url/model.
@@ -420,6 +421,7 @@ max_subagents = 10 # optional (1-20)
# model = "deepseek-coder:1.3b" # or any local Ollama tag # model = "deepseek-coder:1.3b" # or any local Ollama tag
# Hugging Face Inference Providers (https://huggingface.co/docs/api-inference) # Hugging Face Inference Providers (https://huggingface.co/docs/api-inference)
# Provider aliases: huggingface, hugging-face, hugging_face, hf
# Env var aliases: HUGGINGFACE_API_KEY / HF_TOKEN, HUGGINGFACE_BASE_URL / HF_BASE_URL, # Env var aliases: HUGGINGFACE_API_KEY / HF_TOKEN, HUGGINGFACE_BASE_URL / HF_BASE_URL,
# HUGGINGFACE_MODEL / HF_MODEL # HUGGINGFACE_MODEL / HF_MODEL
[providers.huggingface] [providers.huggingface]
+164 -4
View File
@@ -185,6 +185,39 @@ impl ProviderKind {
Self::Anthropic, Self::Anthropic,
]; ];
#[must_use]
pub fn all() -> &'static [Self] {
&[
Self::Deepseek,
Self::NvidiaNim,
Self::Openai,
Self::Atlascloud,
Self::WanjieArk,
Self::Volcengine,
Self::Openrouter,
Self::XiaomiMimo,
Self::Novita,
Self::Fireworks,
Self::Siliconflow,
Self::SiliconflowCN,
Self::Arcee,
Self::Moonshot,
Self::Sglang,
Self::Vllm,
Self::Ollama,
Self::Huggingface,
]
}
#[must_use]
pub fn names_hint() -> String {
Self::all()
.iter()
.map(|provider| provider.as_str())
.collect::<Vec<_>>()
.join(", ")
}
#[must_use] #[must_use]
pub fn as_str(self) -> &'static str { pub fn as_str(self) -> &'static str {
self.provider().id() self.provider().id()
@@ -1258,8 +1291,12 @@ impl ConfigToml {
pub fn set_value(&mut self, key: &str, value: &str) -> Result<()> { pub fn set_value(&mut self, key: &str, value: &str) -> Result<()> {
match key { match key {
"provider" => { "provider" => {
self.provider = ProviderKind::parse(value) self.provider = ProviderKind::parse(value).with_context(|| {
.with_context(|| format!("unknown provider '{value}'"))?; format!(
"unknown provider '{value}': expected {}",
ProviderKind::names_hint()
)
})?;
} }
"api_key" => self.api_key = Some(value.to_string()), "api_key" => self.api_key = Some(value.to_string()),
"base_url" => self.base_url = Some(value.to_string()), "base_url" => self.base_url = Some(value.to_string()),
@@ -2088,7 +2125,7 @@ impl ConfigToml {
} else if let Some(value) = xiaomi_mimo_env_api_key.filter(|v| !v.trim().is_empty()) { } else if let Some(value) = xiaomi_mimo_env_api_key.filter(|v| !v.trim().is_empty()) {
(Some(value), Some(RuntimeApiKeySource::Env)) (Some(value), Some(RuntimeApiKeySource::Env))
} else if should_skip_secret_store_for_provider(provider, &base_url, auth_mode.as_deref()) { } else if should_skip_secret_store_for_provider(provider, &base_url, auth_mode.as_deref()) {
match codewhale_secrets::env_for(provider.as_str()) { match env_api_key_for_provider(provider) {
Some(value) => (Some(value), Some(RuntimeApiKeySource::Env)), Some(value) => (Some(value), Some(RuntimeApiKeySource::Env)),
None => (None, None), None => (None, None),
} }
@@ -2101,7 +2138,10 @@ impl ConfigToml {
}; };
(Some(value), Some(source)) (Some(value), Some(source))
} }
None => (None, None), None => match env_api_key_for_provider(provider) {
Some(value) => (Some(value), Some(RuntimeApiKeySource::Env)),
None => (None, None),
},
} }
}; };
@@ -2745,6 +2785,21 @@ fn should_skip_secret_store_for_provider(
) || base_url_uses_local_host(base_url) ) || base_url_uses_local_host(base_url)
} }
fn env_api_key_for_provider(provider: ProviderKind) -> Option<String> {
if provider == ProviderKind::Huggingface {
return std::env::var("HUGGINGFACE_API_KEY")
.ok()
.filter(|value| !value.trim().is_empty())
.or_else(|| {
std::env::var("HF_TOKEN")
.ok()
.filter(|value| !value.trim().is_empty())
});
}
codewhale_secrets::env_for(provider.as_str())
}
fn auth_mode_requires_api_key(auth_mode: Option<&str>) -> bool { fn auth_mode_requires_api_key(auth_mode: Option<&str>) -> bool {
matches!( matches!(
auth_mode auth_mode
@@ -3962,6 +4017,12 @@ action = "mode.agent"
vllm_base_url: Option<OsString>, vllm_base_url: Option<OsString>,
ollama_api_key: Option<OsString>, ollama_api_key: Option<OsString>,
ollama_base_url: Option<OsString>, ollama_base_url: Option<OsString>,
huggingface_api_key: Option<OsString>,
huggingface_token: Option<OsString>,
huggingface_base_url: Option<OsString>,
hf_base_url: Option<OsString>,
huggingface_model: Option<OsString>,
hf_model: Option<OsString>,
codewhale_provider: Option<OsString>, codewhale_provider: Option<OsString>,
codewhale_model: Option<OsString>, codewhale_model: Option<OsString>,
codewhale_base_url: Option<OsString>, codewhale_base_url: Option<OsString>,
@@ -4039,6 +4100,12 @@ action = "mode.agent"
vllm_base_url: env::var_os("VLLM_BASE_URL"), vllm_base_url: env::var_os("VLLM_BASE_URL"),
ollama_api_key: env::var_os("OLLAMA_API_KEY"), ollama_api_key: env::var_os("OLLAMA_API_KEY"),
ollama_base_url: env::var_os("OLLAMA_BASE_URL"), ollama_base_url: env::var_os("OLLAMA_BASE_URL"),
huggingface_api_key: env::var_os("HUGGINGFACE_API_KEY"),
huggingface_token: env::var_os("HF_TOKEN"),
huggingface_base_url: env::var_os("HUGGINGFACE_BASE_URL"),
hf_base_url: env::var_os("HF_BASE_URL"),
huggingface_model: env::var_os("HUGGINGFACE_MODEL"),
hf_model: env::var_os("HF_MODEL"),
}; };
// Safety: test-only environment mutation guarded by a module mutex. // Safety: test-only environment mutation guarded by a module mutex.
unsafe { unsafe {
@@ -4111,6 +4178,12 @@ action = "mode.agent"
env::remove_var("VLLM_BASE_URL"); env::remove_var("VLLM_BASE_URL");
env::remove_var("OLLAMA_API_KEY"); env::remove_var("OLLAMA_API_KEY");
env::remove_var("OLLAMA_BASE_URL"); env::remove_var("OLLAMA_BASE_URL");
env::remove_var("HUGGINGFACE_API_KEY");
env::remove_var("HF_TOKEN");
env::remove_var("HUGGINGFACE_BASE_URL");
env::remove_var("HF_BASE_URL");
env::remove_var("HUGGINGFACE_MODEL");
env::remove_var("HF_MODEL");
} }
guard guard
} }
@@ -4209,6 +4282,12 @@ action = "mode.agent"
Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take()); Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take());
Self::restore_var("OLLAMA_API_KEY", self.ollama_api_key.take()); Self::restore_var("OLLAMA_API_KEY", self.ollama_api_key.take());
Self::restore_var("OLLAMA_BASE_URL", self.ollama_base_url.take()); Self::restore_var("OLLAMA_BASE_URL", self.ollama_base_url.take());
Self::restore_var("HUGGINGFACE_API_KEY", self.huggingface_api_key.take());
Self::restore_var("HF_TOKEN", self.huggingface_token.take());
Self::restore_var("HUGGINGFACE_BASE_URL", self.huggingface_base_url.take());
Self::restore_var("HF_BASE_URL", self.hf_base_url.take());
Self::restore_var("HUGGINGFACE_MODEL", self.huggingface_model.take());
Self::restore_var("HF_MODEL", self.hf_model.take());
} }
} }
} }
@@ -5195,6 +5274,13 @@ unix_socket_path = "/tmp/cw-hooks.sock"
ProviderKind::parse("ark_wanjie"), ProviderKind::parse("ark_wanjie"),
Some(ProviderKind::WanjieArk) Some(ProviderKind::WanjieArk)
); );
for alias in ["huggingface", "hugging-face", "hugging_face", "hf"] {
assert_eq!(ProviderKind::parse(alias), Some(ProviderKind::Huggingface));
let parsed: ConfigToml =
toml::from_str(&format!("provider = \"{alias}\"")).expect("huggingface alias");
assert_eq!(parsed.provider, ProviderKind::Huggingface);
}
let parsed: ConfigToml = let parsed: ConfigToml =
toml::from_str("provider = \"ark-wanjie\"").expect("wanjie provider alias"); toml::from_str("provider = \"ark-wanjie\"").expect("wanjie provider alias");
@@ -5205,6 +5291,17 @@ unix_socket_path = "/tmp/cw-hooks.sock"
assert_eq!(parsed.provider, ProviderKind::Siliconflow); assert_eq!(parsed.provider, ProviderKind::Siliconflow);
} }
#[test]
fn unknown_provider_error_lists_huggingface() {
let mut config = ConfigToml::default();
let err = config
.set_value("provider", "not-a-provider")
.expect_err("unknown provider should fail");
let message = err.to_string();
assert!(message.contains("unknown provider 'not-a-provider'"));
assert!(message.contains("huggingface"));
}
#[test] #[test]
fn provider_kind_accepts_legacy_deepseek_cn_aliases() { fn provider_kind_accepts_legacy_deepseek_cn_aliases() {
for alias in [ for alias in [
@@ -6177,6 +6274,69 @@ mode = "token-plan-usa"
assert_eq!(resolved.model, ARCEE_TRINITY_LARGE_PREVIEW_MODEL); assert_eq!(resolved.model, ARCEE_TRINITY_LARGE_PREVIEW_MODEL);
} }
#[test]
fn huggingface_env_precedence_prefers_documented_names() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
// Safety: test-only environment mutation guarded by a module mutex.
unsafe {
env::set_var("CODEWHALE_PROVIDER", "hf");
env::set_var("HUGGINGFACE_API_KEY", "hf-full-key");
env::set_var("HF_TOKEN", "hf-token-fallback");
env::set_var("HUGGINGFACE_BASE_URL", "https://hf-full.example/v1");
env::set_var("HF_BASE_URL", "https://hf-short.example/v1");
env::set_var("HUGGINGFACE_MODEL", "org/full-model");
env::set_var("HF_MODEL", "org/short-model");
}
let resolved =
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Huggingface);
assert_eq!(resolved.api_key.as_deref(), Some("hf-full-key"));
assert_eq!(resolved.base_url, "https://hf-full.example/v1");
assert_eq!(resolved.model, "org/full-model");
}
#[test]
fn huggingface_short_env_fallbacks_resolve_when_primary_names_are_absent() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
// Safety: test-only environment mutation guarded by a module mutex.
unsafe {
env::set_var("CODEWHALE_PROVIDER", "huggingface");
env::set_var("HF_TOKEN", "hf-token-fallback");
env::set_var("HF_BASE_URL", "https://hf-short.example/v1");
env::set_var("HF_MODEL", "org/short-model");
}
let resolved =
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Huggingface);
assert_eq!(resolved.api_key.as_deref(), Some("hf-token-fallback"));
assert_eq!(resolved.base_url, "https://hf-short.example/v1");
assert_eq!(resolved.model, "org/short-model");
}
#[test]
fn huggingface_token_fallback_resolves_when_primary_api_key_is_blank() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
// Safety: test-only environment mutation guarded by a module mutex.
unsafe {
env::set_var("CODEWHALE_PROVIDER", "huggingface");
env::set_var("HUGGINGFACE_API_KEY", " ");
env::set_var("HF_TOKEN", "hf-token-fallback");
}
let resolved =
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Huggingface);
assert_eq!(resolved.api_key.as_deref(), Some("hf-token-fallback"));
}
#[test] #[test]
fn siliconflow_cn_base_url_env_normalizes_model_aliases() { fn siliconflow_cn_base_url_env_normalizes_model_aliases() {
let _lock = env_lock(); let _lock = env_lock();
+81
View File
@@ -11344,6 +11344,25 @@ model = "deepseek-ai/deepseek-v4-pro"
assert_eq!(tui.status_items, None); assert_eq!(tui.status_items, None);
} }
#[test]
fn huggingface_provider_aliases_parse() {
for alias in ["huggingface", "hugging-face", "hugging_face", "hf"] {
assert_eq!(ApiProvider::parse(alias), Some(ApiProvider::Huggingface));
}
}
#[test]
fn invalid_provider_error_lists_huggingface() {
let config = Config {
provider: Some("not-a-provider".to_string()),
..Default::default()
};
let err = config.validate().expect_err("unknown provider should fail");
let message = err.to_string();
assert!(message.contains("Invalid provider 'not-a-provider'"));
assert!(message.contains("huggingface"));
}
#[test] #[test]
fn huggingface_provider_uses_direct_defaults() -> Result<()> { fn huggingface_provider_uses_direct_defaults() -> Result<()> {
let _lock = lock_test_env(); let _lock = lock_test_env();
@@ -11398,6 +11417,35 @@ model = "deepseek-ai/deepseek-v4-pro"
Ok(()) Ok(())
} }
#[test]
fn huggingface_missing_key_error_mentions_env_fallbacks() -> Result<()> {
let _lock = lock_test_env();
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let temp_root = env::temp_dir().join(format!(
"codewhale-tui-huggingface-missing-key-test-{}-{}",
std::process::id(),
nanos
));
fs::create_dir_all(&temp_root)?;
let _guard = EnvGuard::new(&temp_root);
let config = Config {
provider: Some("huggingface".to_string()),
..Default::default()
};
config.validate()?;
let err = config.deepseek_api_key().expect_err("missing key");
let message = err.to_string();
assert!(message.contains("Hugging Face API key not found"));
assert!(message.contains("HUGGINGFACE_API_KEY"));
assert!(message.contains("HF_TOKEN"));
Ok(())
}
#[test] #[test]
fn huggingface_env_overrides_key_base_url_and_model() -> Result<()> { fn huggingface_env_overrides_key_base_url_and_model() -> Result<()> {
let _lock = lock_test_env(); let _lock = lock_test_env();
@@ -11419,8 +11467,11 @@ model = "deepseek-ai/deepseek-v4-pro"
unsafe { unsafe {
env::set_var("CODEWHALE_PROVIDER", "huggingface"); env::set_var("CODEWHALE_PROVIDER", "huggingface");
env::set_var("HUGGINGFACE_API_KEY", "hf-env-key"); env::set_var("HUGGINGFACE_API_KEY", "hf-env-key");
env::set_var("HF_TOKEN", "hf-token-fallback");
env::set_var("HUGGINGFACE_BASE_URL", "https://custom-hf.example/v1"); env::set_var("HUGGINGFACE_BASE_URL", "https://custom-hf.example/v1");
env::set_var("HF_BASE_URL", "https://fallback-hf.example/v1");
env::set_var("HUGGINGFACE_MODEL", "meta-llama/Llama-3-70B"); env::set_var("HUGGINGFACE_MODEL", "meta-llama/Llama-3-70B");
env::set_var("HF_MODEL", "fallback/model");
} }
let config = Config::load(None, None)?; let config = Config::load(None, None)?;
@@ -11469,4 +11520,34 @@ model = "deepseek-ai/deepseek-v4-pro"
Some(std::path::Path::new("E:\\google\\downloads\\xm4114.wav")) Some(std::path::Path::new("E:\\google\\downloads\\xm4114.wav"))
); );
} }
#[test]
fn huggingface_short_env_fallbacks_configure_route() -> Result<()> {
let _lock = lock_test_env();
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let temp_root = env::temp_dir().join(format!(
"codewhale-tui-huggingface-short-env-test-{}-{}",
std::process::id(),
nanos
));
fs::create_dir_all(&temp_root)?;
let _guard = EnvGuard::new(&temp_root);
unsafe {
env::set_var("CODEWHALE_PROVIDER", "hf");
env::set_var("HF_TOKEN", "hf-token-value");
env::set_var("HF_BASE_URL", "https://short-hf.example/v1");
env::set_var("HF_MODEL", "org/short-model");
}
let config = Config::load(None, None)?;
assert_eq!(config.api_provider(), ApiProvider::Huggingface);
assert_eq!(config.deepseek_api_key()?, "hf-token-value");
assert_eq!(config.deepseek_base_url(), "https://short-hf.example/v1");
assert_eq!(config.default_model(), "org/short-model");
Ok(())
}
} }
+19 -5
View File
@@ -7,8 +7,7 @@ those models become discoverable, evaluable, routable, servable, and exportable
without weakening the current terminal-agent contract: local workspace control, without weakening the current terminal-agent contract: local workspace control,
explicit provider auth, approval gates, and clear privacy boundaries. explicit provider auth, approval gates, and clear privacy boundaries.
This document is roadmap language. It does not mean every workset below is This document is roadmap language. Some worksets below are roadmap-only.
implemented today.
## Implemented Today ## Implemented Today
@@ -19,6 +18,10 @@ implemented today.
Face Inference Providers, generic OpenAI-compatible endpoints, SGLang, vLLM, Face Inference Providers, generic OpenAI-compatible endpoints, SGLang, vLLM,
and Ollama are supported provider paths where their IDs appear in and Ollama are supported provider paths where their IDs appear in
`/provider`, `codewhale --provider`, or `codewhale models`. `/provider`, `codewhale --provider`, or `codewhale models`.
- Hugging Face Inference Providers are available through the
OpenAI-compatible router at `https://router.huggingface.co/v1`. Select the
route with `huggingface`, `hugging-face`, `hugging_face`, or `hf`; configure
`HUGGINGFACE_API_KEY` or `HF_TOKEN` for auth.
- Model auto-routing chooses a concrete DeepSeek model and thinking level per - Model auto-routing chooses a concrete DeepSeek model and thinking level per
turn. It is not a TUI mode. turn. It is not a TUI mode.
- Fin is the fast `deepseek-v4-flash` thinking-off path for routing, - Fin is the fast `deepseek-v4-flash` thinking-off path for routing,
@@ -27,7 +30,7 @@ implemented today.
- Self-hosted OpenAI-compatible endpoints can be used through SGLang, vLLM, - Self-hosted OpenAI-compatible endpoints can be used through SGLang, vLLM,
Ollama, or the generic `openai` provider configuration. Ollama, or the generic `openai` provider configuration.
## Not Implemented Yet ## Still Planned
- A native Hugging Face Hub browser, model passport picker, or direct Hub search - A native Hugging Face Hub browser, model passport picker, or direct Hub search
workflow. The OpenAI-compatible Hugging Face Inference Providers route is workflow. The OpenAI-compatible Hugging Face Inference Providers route is
@@ -59,6 +62,14 @@ describe a model as available before CodeWhale can actually route to it.
## Hugging Face Workset ## Hugging Face Workset
Implemented today:
- Hugging Face Inference Providers as an explicit OpenAI-compatible router
provider, selected with `huggingface`, `hugging-face`, `hugging_face`, or
`hf`.
- Model IDs are sent to the router exactly as selected, including
org-prefixed Hugging Face model IDs.
Planned scope: Planned scope:
- Hub API auth and model discovery. - Hub API auth and model discovery.
@@ -71,6 +82,8 @@ Planned scope:
Non-goal for now: claiming native Hub search, model passports, Spaces/Jobs, or Non-goal for now: claiming native Hub search, model passports, Spaces/Jobs, or
Model Lab UI exists before those surfaces are implemented in code. Model Lab UI exists before those surfaces are implemented in code.
The inference-provider API key does not imply Hub browsing/export, upload, or
Jobs authorization.
## Unsloth Workset ## Unsloth Workset
@@ -140,8 +153,9 @@ Planned scope:
- Local files, prompts, transcripts, traces, model outputs, eval results, - Local files, prompts, transcripts, traces, model outputs, eval results,
adapters, datasets, and checkpoints should remain local unless the user adapters, datasets, and checkpoints should remain local unless the user
explicitly chooses a provider or export destination. explicitly chooses a provider or export destination.
- Provider auth must remain explicit. `DEEPSEEK_*`, OpenRouter, Hugging Face, - Provider auth must remain explicit. `DEEPSEEK_*`, OpenRouter,
and self-hosted credentials should not be inferred from unrelated config. `HUGGINGFACE_API_KEY` / `HF_TOKEN`, and self-hosted credentials should not be
inferred from unrelated config.
- Exportable artifacts should include provenance: source model, provider, - Exportable artifacts should include provenance: source model, provider,
route, tool policy, eval inputs, and redaction status. route, tool policy, eval inputs, and redaction status.
- Public sharing, hosted telemetry, sponsorship badges, and external branding - Public sharing, hosted telemetry, sponsorship badges, and external branding
+18 -1
View File
@@ -44,6 +44,11 @@ Use any of these surfaces to select a provider:
as legacy aliases for `deepseek`. They do not select a different official host; as legacy aliases for `deepseek`. They do not select a different official host;
DeepSeek uses the same official API host worldwide. DeepSeek uses the same official API host worldwide.
`huggingface`, `hugging-face`, `hugging_face`, and `hf` all select the
Hugging Face Inference Providers route. This is the OpenAI-compatible router
path for chat/inference, not Hub browsing, model-card inspection, uploads, or
artifact export.
Fresh shared config writes to `~/.codewhale/config.toml`. Existing Fresh shared config writes to `~/.codewhale/config.toml`. Existing
`~/.deepseek/config.toml` files are still read for compatibility. `~/.deepseek/config.toml` files are still read for compatibility.
@@ -134,7 +139,7 @@ endpoint.
| `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. | | `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. |
| `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. | | `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. |
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. | | `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. | | `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`, `HF_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible router route. Accepted aliases: `huggingface`, `hugging-face`, `hugging_face`, `hf`. Org-prefixed model IDs pass through. `HUGGINGFACE_MODEL` and `HF_MODEL` are accepted. Hub browsing/export are separate future features. |
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. | | `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. | | `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. | | `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
@@ -264,6 +269,18 @@ the endpoint's ability to accept OpenAI-compatible `tools` payloads. A custom
OpenAI-compatible or local endpoint can still reject tool calls even if OpenAI-compatible or local endpoint can still reject tool calls even if
CodeWhale can send the schema. CodeWhale can send the schema.
### Hugging Face Inference Providers Notes
The shipped Hugging Face route targets the OpenAI-compatible Inference Providers
router at `https://router.huggingface.co/v1`. Configure auth with
`HUGGINGFACE_API_KEY` first, or `HF_TOKEN` as a fallback. Configure the endpoint
with `HUGGINGFACE_BASE_URL` first, or `HF_BASE_URL` as a fallback; configure the
model with `HUGGINGFACE_MODEL` first, or `HF_MODEL` as a fallback.
This route does not imply Hub browsing, model-card metadata, dataset access,
Jobs, uploads, or export. Those remain explicit Model Lab work items so
provider auth and artifact movement stay separate.
### When a Local Model Prints Tool JSON ### When a Local Model Prints Tool JSON
CodeWhale only executes tools when the provider returns Chat Completions CodeWhale only executes tools when the provider returns Chat Completions
+105 -1
View File
@@ -31,6 +31,10 @@ API_PROVIDER_ONLY_IDS = {"deepseek-cn"}
SHARED_PROVIDER_TABLES = { SHARED_PROVIDER_TABLES = {
"siliconflow-CN": "siliconflow_cn", "siliconflow-CN": "siliconflow_cn",
} }
HUGGINGFACE_ALIASES = {"huggingface", "hugging-face", "hugging_face", "hf"}
HUGGINGFACE_API_KEY_ENV_ORDER = ["HUGGINGFACE_API_KEY", "HF_TOKEN"]
HUGGINGFACE_BASE_URL_ENV_ORDER = ["HUGGINGFACE_BASE_URL", "HF_BASE_URL"]
HUGGINGFACE_MODEL_ENV_ORDER = ["HUGGINGFACE_MODEL", "HF_MODEL"]
def read(path: Path) -> str: def read(path: Path) -> str:
@@ -69,6 +73,35 @@ def extract_match_block(
raise ValueError(f"could not parse match block after {signature!r}") raise ValueError(f"could not parse match block after {signature!r}")
def parse_aliases_for_variant(source: str, enum_name: str, variant: str, context: str) -> set[str]:
impl_start = require_index(source, f"impl {enum_name}", context)
block = extract_match_block(
source,
"pub fn parse(value: &str) -> Option<Self>",
context,
impl_start,
)
match_arm = re.search(
rf'((?:"[^"]+"\s*\|\s*)*"[^"]+")\s*=>\s*Some\(Self::{variant}\)',
block,
)
if match_arm:
return set(re.findall(r'"([^"]+)"', match_arm.group(1)))
if enum_name in {"ProviderKind", "ApiProvider"}:
provider_rs = read(PROVIDER_RS)
provider_macro = re.search(
rf'provider!\(\s*\n\s*\w+,\s*\n\s*{variant},\s*\n\s*"([^"]+)".*?'
r"aliases:\s*\[(.*?)\]\s*\);",
provider_rs,
re.DOTALL,
)
if provider_macro:
return {provider_macro.group(1)} | set(
re.findall(r'"([^"]+)"', provider_macro.group(2))
)
raise ValueError(f"{context}: missing parse arm for {variant}")
def provider_kind_ids(config_rs: str) -> dict[str, str]: def provider_kind_ids(config_rs: str) -> dict[str, str]:
provider_rs = read(PROVIDER_RS) provider_rs = read(PROVIDER_RS)
pairs = re.findall( pairs = re.findall(
@@ -201,6 +234,76 @@ def report_provider_enum_drift(
return errors return errors
def report_huggingface_coverage(
config_rs: str, tui_config_rs: str, providers_md: str
) -> list[str]:
errors = []
config_aliases = parse_aliases_for_variant(
config_rs, "ProviderKind", "Huggingface", "crates/config/src/lib.rs"
)
tui_aliases = parse_aliases_for_variant(
tui_config_rs, "ApiProvider", "Huggingface", "crates/tui/src/config.rs"
)
errors += report_set(
"ProviderKind Hugging Face aliases",
HUGGINGFACE_ALIASES,
config_aliases & HUGGINGFACE_ALIASES,
)
errors += report_set(
"ApiProvider Hugging Face aliases",
HUGGINGFACE_ALIASES,
tui_aliases & HUGGINGFACE_ALIASES,
)
inline_source = re.sub(r"```.*?```", "", providers_md, flags=re.DOTALL)
code_spans = set(re.findall(r"`([^`]+)`", inline_source))
errors += report_set(
"documented Hugging Face aliases",
HUGGINGFACE_ALIASES,
code_spans & HUGGINGFACE_ALIASES,
)
for label, env_order in [
("Hugging Face API key env precedence", HUGGINGFACE_API_KEY_ENV_ORDER),
("Hugging Face base URL env precedence", HUGGINGFACE_BASE_URL_ENV_ORDER),
("Hugging Face model env precedence", HUGGINGFACE_MODEL_ENV_ORDER),
]:
errors += report_env_lookup_order(
label, config_rs, env_order, "crates/config/src/lib.rs"
)
errors += report_env_lookup_order(
label, tui_config_rs, env_order, "crates/tui/src/config.rs"
)
errors += report_string_order(label, providers_md, env_order, "docs/PROVIDERS.md")
return errors
def report_env_lookup_order(
label: str, source: str, expected_order: list[str], context: str
) -> list[str]:
lookup_needles = [f'std::env::var("{name}")' for name in expected_order]
return report_string_order(label, source, lookup_needles, context)
def report_string_order(
label: str, source: str, expected_order: list[str], context: str
) -> list[str]:
positions = []
for needle in expected_order:
index = source.find(needle)
if index == -1:
return [f"{label} missing {needle!r} in {context}"]
positions.append(index)
if positions != sorted(positions):
return [
f"{label} has wrong order in {context}: expected "
+ " before ".join(expected_order)
]
return []
def provider_table_name(provider_id: str) -> str: def provider_table_name(provider_id: str) -> str:
return SHARED_PROVIDER_TABLES.get(provider_id, provider_id.replace("-", "_")) return SHARED_PROVIDER_TABLES.get(provider_id, provider_id.replace("-", "_"))
@@ -219,6 +322,7 @@ def main() -> int:
errors: list[str] = [] errors: list[str] = []
errors += report_provider_enum_drift(canonical_ids, live_api_provider_ids) errors += report_provider_enum_drift(canonical_ids, live_api_provider_ids)
errors += report_huggingface_coverage(config_rs, tui_config_rs, providers_md)
errors += report_set( errors += report_set(
"shipped provider rows", "shipped provider rows",
canonical_ids, canonical_ids,
@@ -256,4 +360,4 @@ def main() -> int:
if __name__ == "__main__": if __name__ == "__main__":
raise SystemExit(main()) raise SystemExit(main())