Merge PR #2879: Hugging Face provider docs and tests
Harvested from PR #2879 by @mvanhorn Co-authored-by: mvanhorn <455140+mvanhorn@users.noreply.github.com>
This commit is contained in:
+3
-1
@@ -269,7 +269,8 @@ max_subagents = 10 # optional (1-20)
|
||||
# SGLang: SGLANG_BASE_URL, SGLANG_MODEL, optional SGLANG_API_KEY
|
||||
# vLLM: VLLM_BASE_URL, VLLM_MODEL, optional VLLM_API_KEY
|
||||
# Ollama: OLLAMA_BASE_URL, OLLAMA_MODEL, optional OLLAMA_API_KEY
|
||||
# Hugging Face: HUGGINGFACE_API_KEY (or HF_TOKEN), HUGGINGFACE_BASE_URL (or HF_BASE_URL), HUGGINGFACE_MODEL (or HF_MODEL)
|
||||
# Hugging Face: HUGGINGFACE_API_KEY (or HF_TOKEN), HUGGINGFACE_BASE_URL (or HF_BASE_URL),
|
||||
# HUGGINGFACE_MODEL (or HF_MODEL)
|
||||
#
|
||||
# Custom DeepSeek-compatible APIs usually do not need a new provider table:
|
||||
# set `provider = "deepseek"` and override [providers.deepseek].base_url/model.
|
||||
@@ -420,6 +421,7 @@ max_subagents = 10 # optional (1-20)
|
||||
# model = "deepseek-coder:1.3b" # or any local Ollama tag
|
||||
|
||||
# Hugging Face Inference Providers (https://huggingface.co/docs/api-inference)
|
||||
# Provider aliases: huggingface, hugging-face, hugging_face, hf
|
||||
# Env var aliases: HUGGINGFACE_API_KEY / HF_TOKEN, HUGGINGFACE_BASE_URL / HF_BASE_URL,
|
||||
# HUGGINGFACE_MODEL / HF_MODEL
|
||||
[providers.huggingface]
|
||||
|
||||
+164
-4
@@ -185,6 +185,39 @@ impl ProviderKind {
|
||||
Self::Anthropic,
|
||||
];
|
||||
|
||||
#[must_use]
|
||||
pub fn all() -> &'static [Self] {
|
||||
&[
|
||||
Self::Deepseek,
|
||||
Self::NvidiaNim,
|
||||
Self::Openai,
|
||||
Self::Atlascloud,
|
||||
Self::WanjieArk,
|
||||
Self::Volcengine,
|
||||
Self::Openrouter,
|
||||
Self::XiaomiMimo,
|
||||
Self::Novita,
|
||||
Self::Fireworks,
|
||||
Self::Siliconflow,
|
||||
Self::SiliconflowCN,
|
||||
Self::Arcee,
|
||||
Self::Moonshot,
|
||||
Self::Sglang,
|
||||
Self::Vllm,
|
||||
Self::Ollama,
|
||||
Self::Huggingface,
|
||||
]
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn names_hint() -> String {
|
||||
Self::all()
|
||||
.iter()
|
||||
.map(|provider| provider.as_str())
|
||||
.collect::<Vec<_>>()
|
||||
.join(", ")
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn as_str(self) -> &'static str {
|
||||
self.provider().id()
|
||||
@@ -1258,8 +1291,12 @@ impl ConfigToml {
|
||||
pub fn set_value(&mut self, key: &str, value: &str) -> Result<()> {
|
||||
match key {
|
||||
"provider" => {
|
||||
self.provider = ProviderKind::parse(value)
|
||||
.with_context(|| format!("unknown provider '{value}'"))?;
|
||||
self.provider = ProviderKind::parse(value).with_context(|| {
|
||||
format!(
|
||||
"unknown provider '{value}': expected {}",
|
||||
ProviderKind::names_hint()
|
||||
)
|
||||
})?;
|
||||
}
|
||||
"api_key" => self.api_key = Some(value.to_string()),
|
||||
"base_url" => self.base_url = Some(value.to_string()),
|
||||
@@ -2088,7 +2125,7 @@ impl ConfigToml {
|
||||
} else if let Some(value) = xiaomi_mimo_env_api_key.filter(|v| !v.trim().is_empty()) {
|
||||
(Some(value), Some(RuntimeApiKeySource::Env))
|
||||
} else if should_skip_secret_store_for_provider(provider, &base_url, auth_mode.as_deref()) {
|
||||
match codewhale_secrets::env_for(provider.as_str()) {
|
||||
match env_api_key_for_provider(provider) {
|
||||
Some(value) => (Some(value), Some(RuntimeApiKeySource::Env)),
|
||||
None => (None, None),
|
||||
}
|
||||
@@ -2101,7 +2138,10 @@ impl ConfigToml {
|
||||
};
|
||||
(Some(value), Some(source))
|
||||
}
|
||||
None => (None, None),
|
||||
None => match env_api_key_for_provider(provider) {
|
||||
Some(value) => (Some(value), Some(RuntimeApiKeySource::Env)),
|
||||
None => (None, None),
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2745,6 +2785,21 @@ fn should_skip_secret_store_for_provider(
|
||||
) || base_url_uses_local_host(base_url)
|
||||
}
|
||||
|
||||
fn env_api_key_for_provider(provider: ProviderKind) -> Option<String> {
|
||||
if provider == ProviderKind::Huggingface {
|
||||
return std::env::var("HUGGINGFACE_API_KEY")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.or_else(|| {
|
||||
std::env::var("HF_TOKEN")
|
||||
.ok()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
});
|
||||
}
|
||||
|
||||
codewhale_secrets::env_for(provider.as_str())
|
||||
}
|
||||
|
||||
fn auth_mode_requires_api_key(auth_mode: Option<&str>) -> bool {
|
||||
matches!(
|
||||
auth_mode
|
||||
@@ -3962,6 +4017,12 @@ action = "mode.agent"
|
||||
vllm_base_url: Option<OsString>,
|
||||
ollama_api_key: Option<OsString>,
|
||||
ollama_base_url: Option<OsString>,
|
||||
huggingface_api_key: Option<OsString>,
|
||||
huggingface_token: Option<OsString>,
|
||||
huggingface_base_url: Option<OsString>,
|
||||
hf_base_url: Option<OsString>,
|
||||
huggingface_model: Option<OsString>,
|
||||
hf_model: Option<OsString>,
|
||||
codewhale_provider: Option<OsString>,
|
||||
codewhale_model: Option<OsString>,
|
||||
codewhale_base_url: Option<OsString>,
|
||||
@@ -4039,6 +4100,12 @@ action = "mode.agent"
|
||||
vllm_base_url: env::var_os("VLLM_BASE_URL"),
|
||||
ollama_api_key: env::var_os("OLLAMA_API_KEY"),
|
||||
ollama_base_url: env::var_os("OLLAMA_BASE_URL"),
|
||||
huggingface_api_key: env::var_os("HUGGINGFACE_API_KEY"),
|
||||
huggingface_token: env::var_os("HF_TOKEN"),
|
||||
huggingface_base_url: env::var_os("HUGGINGFACE_BASE_URL"),
|
||||
hf_base_url: env::var_os("HF_BASE_URL"),
|
||||
huggingface_model: env::var_os("HUGGINGFACE_MODEL"),
|
||||
hf_model: env::var_os("HF_MODEL"),
|
||||
};
|
||||
// Safety: test-only environment mutation guarded by a module mutex.
|
||||
unsafe {
|
||||
@@ -4111,6 +4178,12 @@ action = "mode.agent"
|
||||
env::remove_var("VLLM_BASE_URL");
|
||||
env::remove_var("OLLAMA_API_KEY");
|
||||
env::remove_var("OLLAMA_BASE_URL");
|
||||
env::remove_var("HUGGINGFACE_API_KEY");
|
||||
env::remove_var("HF_TOKEN");
|
||||
env::remove_var("HUGGINGFACE_BASE_URL");
|
||||
env::remove_var("HF_BASE_URL");
|
||||
env::remove_var("HUGGINGFACE_MODEL");
|
||||
env::remove_var("HF_MODEL");
|
||||
}
|
||||
guard
|
||||
}
|
||||
@@ -4209,6 +4282,12 @@ action = "mode.agent"
|
||||
Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take());
|
||||
Self::restore_var("OLLAMA_API_KEY", self.ollama_api_key.take());
|
||||
Self::restore_var("OLLAMA_BASE_URL", self.ollama_base_url.take());
|
||||
Self::restore_var("HUGGINGFACE_API_KEY", self.huggingface_api_key.take());
|
||||
Self::restore_var("HF_TOKEN", self.huggingface_token.take());
|
||||
Self::restore_var("HUGGINGFACE_BASE_URL", self.huggingface_base_url.take());
|
||||
Self::restore_var("HF_BASE_URL", self.hf_base_url.take());
|
||||
Self::restore_var("HUGGINGFACE_MODEL", self.huggingface_model.take());
|
||||
Self::restore_var("HF_MODEL", self.hf_model.take());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5195,6 +5274,13 @@ unix_socket_path = "/tmp/cw-hooks.sock"
|
||||
ProviderKind::parse("ark_wanjie"),
|
||||
Some(ProviderKind::WanjieArk)
|
||||
);
|
||||
for alias in ["huggingface", "hugging-face", "hugging_face", "hf"] {
|
||||
assert_eq!(ProviderKind::parse(alias), Some(ProviderKind::Huggingface));
|
||||
|
||||
let parsed: ConfigToml =
|
||||
toml::from_str(&format!("provider = \"{alias}\"")).expect("huggingface alias");
|
||||
assert_eq!(parsed.provider, ProviderKind::Huggingface);
|
||||
}
|
||||
|
||||
let parsed: ConfigToml =
|
||||
toml::from_str("provider = \"ark-wanjie\"").expect("wanjie provider alias");
|
||||
@@ -5205,6 +5291,17 @@ unix_socket_path = "/tmp/cw-hooks.sock"
|
||||
assert_eq!(parsed.provider, ProviderKind::Siliconflow);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_provider_error_lists_huggingface() {
|
||||
let mut config = ConfigToml::default();
|
||||
let err = config
|
||||
.set_value("provider", "not-a-provider")
|
||||
.expect_err("unknown provider should fail");
|
||||
let message = err.to_string();
|
||||
assert!(message.contains("unknown provider 'not-a-provider'"));
|
||||
assert!(message.contains("huggingface"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_kind_accepts_legacy_deepseek_cn_aliases() {
|
||||
for alias in [
|
||||
@@ -6177,6 +6274,69 @@ mode = "token-plan-usa"
|
||||
assert_eq!(resolved.model, ARCEE_TRINITY_LARGE_PREVIEW_MODEL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_env_precedence_prefers_documented_names() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
// Safety: test-only environment mutation guarded by a module mutex.
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "hf");
|
||||
env::set_var("HUGGINGFACE_API_KEY", "hf-full-key");
|
||||
env::set_var("HF_TOKEN", "hf-token-fallback");
|
||||
env::set_var("HUGGINGFACE_BASE_URL", "https://hf-full.example/v1");
|
||||
env::set_var("HF_BASE_URL", "https://hf-short.example/v1");
|
||||
env::set_var("HUGGINGFACE_MODEL", "org/full-model");
|
||||
env::set_var("HF_MODEL", "org/short-model");
|
||||
}
|
||||
|
||||
let resolved =
|
||||
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, ProviderKind::Huggingface);
|
||||
assert_eq!(resolved.api_key.as_deref(), Some("hf-full-key"));
|
||||
assert_eq!(resolved.base_url, "https://hf-full.example/v1");
|
||||
assert_eq!(resolved.model, "org/full-model");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_short_env_fallbacks_resolve_when_primary_names_are_absent() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
// Safety: test-only environment mutation guarded by a module mutex.
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "huggingface");
|
||||
env::set_var("HF_TOKEN", "hf-token-fallback");
|
||||
env::set_var("HF_BASE_URL", "https://hf-short.example/v1");
|
||||
env::set_var("HF_MODEL", "org/short-model");
|
||||
}
|
||||
|
||||
let resolved =
|
||||
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, ProviderKind::Huggingface);
|
||||
assert_eq!(resolved.api_key.as_deref(), Some("hf-token-fallback"));
|
||||
assert_eq!(resolved.base_url, "https://hf-short.example/v1");
|
||||
assert_eq!(resolved.model, "org/short-model");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_token_fallback_resolves_when_primary_api_key_is_blank() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
// Safety: test-only environment mutation guarded by a module mutex.
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "huggingface");
|
||||
env::set_var("HUGGINGFACE_API_KEY", " ");
|
||||
env::set_var("HF_TOKEN", "hf-token-fallback");
|
||||
}
|
||||
|
||||
let resolved =
|
||||
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, ProviderKind::Huggingface);
|
||||
assert_eq!(resolved.api_key.as_deref(), Some("hf-token-fallback"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn siliconflow_cn_base_url_env_normalizes_model_aliases() {
|
||||
let _lock = env_lock();
|
||||
|
||||
@@ -11344,6 +11344,25 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
assert_eq!(tui.status_items, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_provider_aliases_parse() {
|
||||
for alias in ["huggingface", "hugging-face", "hugging_face", "hf"] {
|
||||
assert_eq!(ApiProvider::parse(alias), Some(ApiProvider::Huggingface));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_provider_error_lists_huggingface() {
|
||||
let config = Config {
|
||||
provider: Some("not-a-provider".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
let err = config.validate().expect_err("unknown provider should fail");
|
||||
let message = err.to_string();
|
||||
assert!(message.contains("Invalid provider 'not-a-provider'"));
|
||||
assert!(message.contains("huggingface"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_provider_uses_direct_defaults() -> Result<()> {
|
||||
let _lock = lock_test_env();
|
||||
@@ -11398,6 +11417,35 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_missing_key_error_mentions_env_fallbacks() -> Result<()> {
|
||||
let _lock = lock_test_env();
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos();
|
||||
let temp_root = env::temp_dir().join(format!(
|
||||
"codewhale-tui-huggingface-missing-key-test-{}-{}",
|
||||
std::process::id(),
|
||||
nanos
|
||||
));
|
||||
fs::create_dir_all(&temp_root)?;
|
||||
let _guard = EnvGuard::new(&temp_root);
|
||||
|
||||
let config = Config {
|
||||
provider: Some("huggingface".to_string()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
config.validate()?;
|
||||
let err = config.deepseek_api_key().expect_err("missing key");
|
||||
let message = err.to_string();
|
||||
assert!(message.contains("Hugging Face API key not found"));
|
||||
assert!(message.contains("HUGGINGFACE_API_KEY"));
|
||||
assert!(message.contains("HF_TOKEN"));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_env_overrides_key_base_url_and_model() -> Result<()> {
|
||||
let _lock = lock_test_env();
|
||||
@@ -11419,8 +11467,11 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "huggingface");
|
||||
env::set_var("HUGGINGFACE_API_KEY", "hf-env-key");
|
||||
env::set_var("HF_TOKEN", "hf-token-fallback");
|
||||
env::set_var("HUGGINGFACE_BASE_URL", "https://custom-hf.example/v1");
|
||||
env::set_var("HF_BASE_URL", "https://fallback-hf.example/v1");
|
||||
env::set_var("HUGGINGFACE_MODEL", "meta-llama/Llama-3-70B");
|
||||
env::set_var("HF_MODEL", "fallback/model");
|
||||
}
|
||||
|
||||
let config = Config::load(None, None)?;
|
||||
@@ -11469,4 +11520,34 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
Some(std::path::Path::new("E:\\google\\downloads\\xm4114.wav"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn huggingface_short_env_fallbacks_configure_route() -> Result<()> {
|
||||
let _lock = lock_test_env();
|
||||
let nanos = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_nanos();
|
||||
let temp_root = env::temp_dir().join(format!(
|
||||
"codewhale-tui-huggingface-short-env-test-{}-{}",
|
||||
std::process::id(),
|
||||
nanos
|
||||
));
|
||||
fs::create_dir_all(&temp_root)?;
|
||||
let _guard = EnvGuard::new(&temp_root);
|
||||
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "hf");
|
||||
env::set_var("HF_TOKEN", "hf-token-value");
|
||||
env::set_var("HF_BASE_URL", "https://short-hf.example/v1");
|
||||
env::set_var("HF_MODEL", "org/short-model");
|
||||
}
|
||||
|
||||
let config = Config::load(None, None)?;
|
||||
assert_eq!(config.api_provider(), ApiProvider::Huggingface);
|
||||
assert_eq!(config.deepseek_api_key()?, "hf-token-value");
|
||||
assert_eq!(config.deepseek_base_url(), "https://short-hf.example/v1");
|
||||
assert_eq!(config.default_model(), "org/short-model");
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
+19
-5
@@ -7,8 +7,7 @@ those models become discoverable, evaluable, routable, servable, and exportable
|
||||
without weakening the current terminal-agent contract: local workspace control,
|
||||
explicit provider auth, approval gates, and clear privacy boundaries.
|
||||
|
||||
This document is roadmap language. It does not mean every workset below is
|
||||
implemented today.
|
||||
This document is roadmap language. Some worksets below are roadmap-only.
|
||||
|
||||
## Implemented Today
|
||||
|
||||
@@ -19,6 +18,10 @@ implemented today.
|
||||
Face Inference Providers, generic OpenAI-compatible endpoints, SGLang, vLLM,
|
||||
and Ollama are supported provider paths where their IDs appear in
|
||||
`/provider`, `codewhale --provider`, or `codewhale models`.
|
||||
- Hugging Face Inference Providers are available through the
|
||||
OpenAI-compatible router at `https://router.huggingface.co/v1`. Select the
|
||||
route with `huggingface`, `hugging-face`, `hugging_face`, or `hf`; configure
|
||||
`HUGGINGFACE_API_KEY` or `HF_TOKEN` for auth.
|
||||
- Model auto-routing chooses a concrete DeepSeek model and thinking level per
|
||||
turn. It is not a TUI mode.
|
||||
- Fin is the fast `deepseek-v4-flash` thinking-off path for routing,
|
||||
@@ -27,7 +30,7 @@ implemented today.
|
||||
- Self-hosted OpenAI-compatible endpoints can be used through SGLang, vLLM,
|
||||
Ollama, or the generic `openai` provider configuration.
|
||||
|
||||
## Not Implemented Yet
|
||||
## Still Planned
|
||||
|
||||
- A native Hugging Face Hub browser, model passport picker, or direct Hub search
|
||||
workflow. The OpenAI-compatible Hugging Face Inference Providers route is
|
||||
@@ -59,6 +62,14 @@ describe a model as available before CodeWhale can actually route to it.
|
||||
|
||||
## Hugging Face Workset
|
||||
|
||||
Implemented today:
|
||||
|
||||
- Hugging Face Inference Providers as an explicit OpenAI-compatible router
|
||||
provider, selected with `huggingface`, `hugging-face`, `hugging_face`, or
|
||||
`hf`.
|
||||
- Model IDs are sent to the router exactly as selected, including
|
||||
org-prefixed Hugging Face model IDs.
|
||||
|
||||
Planned scope:
|
||||
|
||||
- Hub API auth and model discovery.
|
||||
@@ -71,6 +82,8 @@ Planned scope:
|
||||
|
||||
Non-goal for now: claiming native Hub search, model passports, Spaces/Jobs, or
|
||||
Model Lab UI exists before those surfaces are implemented in code.
|
||||
The inference-provider API key does not imply Hub browsing/export, upload, or
|
||||
Jobs authorization.
|
||||
|
||||
## Unsloth Workset
|
||||
|
||||
@@ -140,8 +153,9 @@ Planned scope:
|
||||
- Local files, prompts, transcripts, traces, model outputs, eval results,
|
||||
adapters, datasets, and checkpoints should remain local unless the user
|
||||
explicitly chooses a provider or export destination.
|
||||
- Provider auth must remain explicit. `DEEPSEEK_*`, OpenRouter, Hugging Face,
|
||||
and self-hosted credentials should not be inferred from unrelated config.
|
||||
- Provider auth must remain explicit. `DEEPSEEK_*`, OpenRouter,
|
||||
`HUGGINGFACE_API_KEY` / `HF_TOKEN`, and self-hosted credentials should not be
|
||||
inferred from unrelated config.
|
||||
- Exportable artifacts should include provenance: source model, provider,
|
||||
route, tool policy, eval inputs, and redaction status.
|
||||
- Public sharing, hosted telemetry, sponsorship badges, and external branding
|
||||
|
||||
+18
-1
@@ -44,6 +44,11 @@ Use any of these surfaces to select a provider:
|
||||
as legacy aliases for `deepseek`. They do not select a different official host;
|
||||
DeepSeek uses the same official API host worldwide.
|
||||
|
||||
`huggingface`, `hugging-face`, `hugging_face`, and `hf` all select the
|
||||
Hugging Face Inference Providers route. This is the OpenAI-compatible router
|
||||
path for chat/inference, not Hub browsing, model-card inspection, uploads, or
|
||||
artifact export.
|
||||
|
||||
Fresh shared config writes to `~/.codewhale/config.toml`. Existing
|
||||
`~/.deepseek/config.toml` files are still read for compatibility.
|
||||
|
||||
@@ -134,7 +139,7 @@ endpoint.
|
||||
| `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. |
|
||||
| `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. |
|
||||
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
|
||||
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. |
|
||||
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`, `HF_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible router route. Accepted aliases: `huggingface`, `hugging-face`, `hugging_face`, `hf`. Org-prefixed model IDs pass through. `HUGGINGFACE_MODEL` and `HF_MODEL` are accepted. Hub browsing/export are separate future features. |
|
||||
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
|
||||
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
|
||||
| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
|
||||
@@ -264,6 +269,18 @@ the endpoint's ability to accept OpenAI-compatible `tools` payloads. A custom
|
||||
OpenAI-compatible or local endpoint can still reject tool calls even if
|
||||
CodeWhale can send the schema.
|
||||
|
||||
### Hugging Face Inference Providers Notes
|
||||
|
||||
The shipped Hugging Face route targets the OpenAI-compatible Inference Providers
|
||||
router at `https://router.huggingface.co/v1`. Configure auth with
|
||||
`HUGGINGFACE_API_KEY` first, or `HF_TOKEN` as a fallback. Configure the endpoint
|
||||
with `HUGGINGFACE_BASE_URL` first, or `HF_BASE_URL` as a fallback; configure the
|
||||
model with `HUGGINGFACE_MODEL` first, or `HF_MODEL` as a fallback.
|
||||
|
||||
This route does not imply Hub browsing, model-card metadata, dataset access,
|
||||
Jobs, uploads, or export. Those remain explicit Model Lab work items so
|
||||
provider auth and artifact movement stay separate.
|
||||
|
||||
### When a Local Model Prints Tool JSON
|
||||
|
||||
CodeWhale only executes tools when the provider returns Chat Completions
|
||||
|
||||
@@ -31,6 +31,10 @@ API_PROVIDER_ONLY_IDS = {"deepseek-cn"}
|
||||
SHARED_PROVIDER_TABLES = {
|
||||
"siliconflow-CN": "siliconflow_cn",
|
||||
}
|
||||
HUGGINGFACE_ALIASES = {"huggingface", "hugging-face", "hugging_face", "hf"}
|
||||
HUGGINGFACE_API_KEY_ENV_ORDER = ["HUGGINGFACE_API_KEY", "HF_TOKEN"]
|
||||
HUGGINGFACE_BASE_URL_ENV_ORDER = ["HUGGINGFACE_BASE_URL", "HF_BASE_URL"]
|
||||
HUGGINGFACE_MODEL_ENV_ORDER = ["HUGGINGFACE_MODEL", "HF_MODEL"]
|
||||
|
||||
|
||||
def read(path: Path) -> str:
|
||||
@@ -69,6 +73,35 @@ def extract_match_block(
|
||||
raise ValueError(f"could not parse match block after {signature!r}")
|
||||
|
||||
|
||||
def parse_aliases_for_variant(source: str, enum_name: str, variant: str, context: str) -> set[str]:
|
||||
impl_start = require_index(source, f"impl {enum_name}", context)
|
||||
block = extract_match_block(
|
||||
source,
|
||||
"pub fn parse(value: &str) -> Option<Self>",
|
||||
context,
|
||||
impl_start,
|
||||
)
|
||||
match_arm = re.search(
|
||||
rf'((?:"[^"]+"\s*\|\s*)*"[^"]+")\s*=>\s*Some\(Self::{variant}\)',
|
||||
block,
|
||||
)
|
||||
if match_arm:
|
||||
return set(re.findall(r'"([^"]+)"', match_arm.group(1)))
|
||||
if enum_name in {"ProviderKind", "ApiProvider"}:
|
||||
provider_rs = read(PROVIDER_RS)
|
||||
provider_macro = re.search(
|
||||
rf'provider!\(\s*\n\s*\w+,\s*\n\s*{variant},\s*\n\s*"([^"]+)".*?'
|
||||
r"aliases:\s*\[(.*?)\]\s*\);",
|
||||
provider_rs,
|
||||
re.DOTALL,
|
||||
)
|
||||
if provider_macro:
|
||||
return {provider_macro.group(1)} | set(
|
||||
re.findall(r'"([^"]+)"', provider_macro.group(2))
|
||||
)
|
||||
raise ValueError(f"{context}: missing parse arm for {variant}")
|
||||
|
||||
|
||||
def provider_kind_ids(config_rs: str) -> dict[str, str]:
|
||||
provider_rs = read(PROVIDER_RS)
|
||||
pairs = re.findall(
|
||||
@@ -201,6 +234,76 @@ def report_provider_enum_drift(
|
||||
return errors
|
||||
|
||||
|
||||
def report_huggingface_coverage(
|
||||
config_rs: str, tui_config_rs: str, providers_md: str
|
||||
) -> list[str]:
|
||||
errors = []
|
||||
|
||||
config_aliases = parse_aliases_for_variant(
|
||||
config_rs, "ProviderKind", "Huggingface", "crates/config/src/lib.rs"
|
||||
)
|
||||
tui_aliases = parse_aliases_for_variant(
|
||||
tui_config_rs, "ApiProvider", "Huggingface", "crates/tui/src/config.rs"
|
||||
)
|
||||
errors += report_set(
|
||||
"ProviderKind Hugging Face aliases",
|
||||
HUGGINGFACE_ALIASES,
|
||||
config_aliases & HUGGINGFACE_ALIASES,
|
||||
)
|
||||
errors += report_set(
|
||||
"ApiProvider Hugging Face aliases",
|
||||
HUGGINGFACE_ALIASES,
|
||||
tui_aliases & HUGGINGFACE_ALIASES,
|
||||
)
|
||||
|
||||
inline_source = re.sub(r"```.*?```", "", providers_md, flags=re.DOTALL)
|
||||
code_spans = set(re.findall(r"`([^`]+)`", inline_source))
|
||||
errors += report_set(
|
||||
"documented Hugging Face aliases",
|
||||
HUGGINGFACE_ALIASES,
|
||||
code_spans & HUGGINGFACE_ALIASES,
|
||||
)
|
||||
|
||||
for label, env_order in [
|
||||
("Hugging Face API key env precedence", HUGGINGFACE_API_KEY_ENV_ORDER),
|
||||
("Hugging Face base URL env precedence", HUGGINGFACE_BASE_URL_ENV_ORDER),
|
||||
("Hugging Face model env precedence", HUGGINGFACE_MODEL_ENV_ORDER),
|
||||
]:
|
||||
errors += report_env_lookup_order(
|
||||
label, config_rs, env_order, "crates/config/src/lib.rs"
|
||||
)
|
||||
errors += report_env_lookup_order(
|
||||
label, tui_config_rs, env_order, "crates/tui/src/config.rs"
|
||||
)
|
||||
errors += report_string_order(label, providers_md, env_order, "docs/PROVIDERS.md")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def report_env_lookup_order(
|
||||
label: str, source: str, expected_order: list[str], context: str
|
||||
) -> list[str]:
|
||||
lookup_needles = [f'std::env::var("{name}")' for name in expected_order]
|
||||
return report_string_order(label, source, lookup_needles, context)
|
||||
|
||||
|
||||
def report_string_order(
|
||||
label: str, source: str, expected_order: list[str], context: str
|
||||
) -> list[str]:
|
||||
positions = []
|
||||
for needle in expected_order:
|
||||
index = source.find(needle)
|
||||
if index == -1:
|
||||
return [f"{label} missing {needle!r} in {context}"]
|
||||
positions.append(index)
|
||||
if positions != sorted(positions):
|
||||
return [
|
||||
f"{label} has wrong order in {context}: expected "
|
||||
+ " before ".join(expected_order)
|
||||
]
|
||||
return []
|
||||
|
||||
|
||||
def provider_table_name(provider_id: str) -> str:
|
||||
return SHARED_PROVIDER_TABLES.get(provider_id, provider_id.replace("-", "_"))
|
||||
|
||||
@@ -219,6 +322,7 @@ def main() -> int:
|
||||
|
||||
errors: list[str] = []
|
||||
errors += report_provider_enum_drift(canonical_ids, live_api_provider_ids)
|
||||
errors += report_huggingface_coverage(config_rs, tui_config_rs, providers_md)
|
||||
errors += report_set(
|
||||
"shipped provider rows",
|
||||
canonical_ids,
|
||||
@@ -256,4 +360,4 @@ def main() -> int:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
raise SystemExit(main())
|
||||
|
||||
Reference in New Issue
Block a user