Merge PR #3054 from Hmbown: native Anthropic Messages API adapter
feat(client): native Anthropic Messages API adapter — cache_control, thinking blocks, tool streaming (#3014)
This commit is contained in:
+12
-1
@@ -20,7 +20,7 @@
|
||||
# `api_key` / `base_url` are
|
||||
# still read as DeepSeek defaults when `[providers.deepseek]` is absent
|
||||
# (backward compatibility).
|
||||
provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface
|
||||
provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface | together | openai-codex | anthropic
|
||||
api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
|
||||
base_url = "https://api.deepseek.com/beta"
|
||||
# provider = "deepseek-cn" # legacy alias (official host is still https://api.deepseek.com)
|
||||
@@ -440,6 +440,17 @@ max_subagents = 10 # optional (1-20)
|
||||
# base_url = "https://chatgpt.com/backend-api"
|
||||
# model = "gpt-5.5"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Anthropic Provider (native Messages API)
|
||||
# Talks to https://api.anthropic.com/v1/messages with x-api-key auth — not an
|
||||
# OpenAI-compatible route. Models: claude-opus-4-8, claude-sonnet-4-6 (default),
|
||||
# claude-haiku-4-5. Env vars: ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL,
|
||||
# ANTHROPIC_MODEL.
|
||||
[providers.anthropic]
|
||||
# api_key = "sk-ant-..."
|
||||
# base_url = "https://api.anthropic.com"
|
||||
# model = "claude-sonnet-4-6"
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Web Search Provider
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -607,6 +607,28 @@ impl Default for ModelRegistry {
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
// Anthropic native Messages API models (#3014)
|
||||
ModelInfo {
|
||||
id: "claude-opus-4-8".to_string(),
|
||||
provider: ProviderKind::Anthropic,
|
||||
aliases: vec!["opus".to_string(), "claude-opus".to_string()],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "claude-sonnet-4-6".to_string(),
|
||||
provider: ProviderKind::Anthropic,
|
||||
aliases: vec!["sonnet".to_string(), "claude-sonnet".to_string()],
|
||||
supports_tools: true,
|
||||
supports_reasoning: true,
|
||||
},
|
||||
ModelInfo {
|
||||
id: "claude-haiku-4-5".to_string(),
|
||||
provider: ProviderKind::Anthropic,
|
||||
aliases: vec!["haiku".to_string(), "claude-haiku".to_string()],
|
||||
supports_tools: true,
|
||||
supports_reasoning: false,
|
||||
},
|
||||
// MiniMax 2.7 (OpenRouter)
|
||||
ModelInfo {
|
||||
id: "minimax/minimax-2.7".to_string(),
|
||||
|
||||
@@ -771,6 +771,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Huggingface => "huggingface",
|
||||
ProviderKind::Together => "together",
|
||||
ProviderKind::OpenaiCodex => "openai-codex",
|
||||
ProviderKind::Anthropic => "anthropic",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -895,6 +896,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] {
|
||||
],
|
||||
ProviderKind::Together => &["TOGETHER_API_KEY"],
|
||||
ProviderKind::OpenaiCodex => &["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"],
|
||||
ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -27,6 +27,8 @@ const DEFAULT_OPENAI_MODEL: &str = "deepseek-v4-pro";
|
||||
const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
|
||||
const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
|
||||
const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5";
|
||||
const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
|
||||
const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
|
||||
const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api";
|
||||
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
|
||||
const DEFAULT_ATLASCLOUD_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
|
||||
@@ -152,10 +154,12 @@ pub enum ProviderKind {
|
||||
alias = "chatgpt_codex"
|
||||
)]
|
||||
OpenaiCodex,
|
||||
#[serde(alias = "claude")]
|
||||
Anthropic,
|
||||
}
|
||||
|
||||
impl ProviderKind {
|
||||
pub const ALL: [Self; 20] = [
|
||||
pub const ALL: [Self; 21] = [
|
||||
Self::Deepseek,
|
||||
Self::NvidiaNim,
|
||||
Self::Openai,
|
||||
@@ -176,6 +180,7 @@ impl ProviderKind {
|
||||
Self::Huggingface,
|
||||
Self::Together,
|
||||
Self::OpenaiCodex,
|
||||
Self::Anthropic,
|
||||
];
|
||||
|
||||
#[must_use]
|
||||
@@ -201,6 +206,7 @@ impl ProviderKind {
|
||||
Self::Huggingface => "huggingface",
|
||||
Self::Together => "together",
|
||||
Self::OpenaiCodex => "openai-codex",
|
||||
Self::Anthropic => "anthropic",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +237,7 @@ impl ProviderKind {
|
||||
"ollama" | "ollama-local" => Some(Self::Ollama),
|
||||
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
|
||||
"together" | "together-ai" | "together_ai" => Some(Self::Together),
|
||||
"anthropic" | "claude" => Some(Self::Anthropic),
|
||||
"openai-codex" | "openai_codex" | "openaicodex" | "codex" | "chatgpt"
|
||||
| "chatgpt-codex" | "chatgpt_codex" | "chatgptcodex" => Some(Self::OpenaiCodex),
|
||||
_ => None,
|
||||
@@ -312,6 +319,8 @@ pub struct ProvidersToml {
|
||||
alias = "chatgpt-codex"
|
||||
)]
|
||||
pub openai_codex: ProviderConfigToml,
|
||||
#[serde(default)]
|
||||
pub anthropic: ProviderConfigToml,
|
||||
}
|
||||
|
||||
/// Sibling `permissions.toml` schema.
|
||||
@@ -361,6 +370,7 @@ impl ProvidersToml {
|
||||
ProviderKind::Huggingface => &self.huggingface,
|
||||
ProviderKind::Together => &self.together,
|
||||
ProviderKind::OpenaiCodex => &self.openai_codex,
|
||||
ProviderKind::Anthropic => &self.anthropic,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -385,6 +395,7 @@ impl ProvidersToml {
|
||||
ProviderKind::Huggingface => &mut self.huggingface,
|
||||
ProviderKind::Together => &mut self.together,
|
||||
ProviderKind::OpenaiCodex => &mut self.openai_codex,
|
||||
ProviderKind::Anthropic => &mut self.anthropic,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2022,6 +2033,7 @@ impl ConfigToml {
|
||||
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL.to_string(),
|
||||
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL.to_string(),
|
||||
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL.to_string(),
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL.to_string(),
|
||||
})
|
||||
};
|
||||
// CLI flag wins outright. Otherwise: config-file → injected secrets/env.
|
||||
@@ -2454,6 +2466,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
|
||||
ProviderKind::Together => DEFAULT_TOGETHER_MODEL,
|
||||
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_MODEL,
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_MODEL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2479,6 +2492,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
|
||||
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL,
|
||||
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3231,6 +3245,8 @@ struct EnvRuntimeOverrides {
|
||||
together_model: Option<String>,
|
||||
openai_codex_base_url: Option<String>,
|
||||
openai_codex_model: Option<String>,
|
||||
anthropic_base_url: Option<String>,
|
||||
anthropic_model: Option<String>,
|
||||
}
|
||||
|
||||
impl EnvRuntimeOverrides {
|
||||
@@ -3394,6 +3410,12 @@ impl EnvRuntimeOverrides {
|
||||
.or_else(|_| std::env::var("CODEX_MODEL"))
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
anthropic_base_url: std::env::var("ANTHROPIC_BASE_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
anthropic_model: std::env::var("ANTHROPIC_MODEL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3422,6 +3444,7 @@ impl EnvRuntimeOverrides {
|
||||
ProviderKind::Huggingface => self.huggingface_base_url.clone(),
|
||||
ProviderKind::Together => self.together_base_url.clone(),
|
||||
ProviderKind::OpenaiCodex => self.openai_codex_base_url.clone(),
|
||||
ProviderKind::Anthropic => self.anthropic_base_url.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3441,6 +3464,7 @@ impl EnvRuntimeOverrides {
|
||||
ProviderKind::Huggingface => self.huggingface_model.clone(),
|
||||
ProviderKind::Together => self.together_model.clone(),
|
||||
ProviderKind::OpenaiCodex => self.openai_codex_model.clone(),
|
||||
ProviderKind::Anthropic => self.anthropic_model.clone(),
|
||||
_ => None,
|
||||
}?;
|
||||
|
||||
@@ -5132,10 +5156,12 @@ unix_socket_path = "/tmp/cw-hooks.sock"
|
||||
);
|
||||
assert!(!provider.display_name().trim().is_empty());
|
||||
assert!(!provider.env_vars().is_empty());
|
||||
// OpenAI Codex (ChatGPT) speaks the Responses API; every other
|
||||
// built-in provider is OpenAI-compatible Chat Completions.
|
||||
// OpenAI Codex (ChatGPT) speaks the Responses API and Anthropic
|
||||
// speaks the native Messages API; every other built-in provider
|
||||
// is OpenAI-compatible Chat Completions.
|
||||
let expected_wire = match kind {
|
||||
ProviderKind::OpenaiCodex => provider::WireFormat::Responses,
|
||||
ProviderKind::Anthropic => provider::WireFormat::AnthropicMessages,
|
||||
_ => provider::WireFormat::ChatCompletions,
|
||||
};
|
||||
assert_eq!(provider.wire(), expected_wire);
|
||||
|
||||
@@ -27,6 +27,8 @@ pub enum WireFormat {
|
||||
ChatCompletions,
|
||||
/// OpenAI Responses API (`/responses`).
|
||||
Responses,
|
||||
/// Native Anthropic Messages API (`/v1/messages`).
|
||||
AnthropicMessages,
|
||||
}
|
||||
|
||||
/// Static metadata for a built-in model provider.
|
||||
@@ -320,6 +322,39 @@ impl Provider for OpenaiCodex {
|
||||
}
|
||||
}
|
||||
|
||||
/// Native Anthropic Messages API provider (#3014).
|
||||
pub struct Anthropic;
|
||||
|
||||
impl Provider for Anthropic {
|
||||
fn kind(&self) -> ProviderKind {
|
||||
ProviderKind::Anthropic
|
||||
}
|
||||
|
||||
fn display_name(&self) -> &'static str {
|
||||
"Anthropic"
|
||||
}
|
||||
|
||||
fn default_base_url(&self) -> &'static str {
|
||||
crate::DEFAULT_ANTHROPIC_BASE_URL
|
||||
}
|
||||
|
||||
fn default_model(&self) -> &'static str {
|
||||
crate::DEFAULT_ANTHROPIC_MODEL
|
||||
}
|
||||
|
||||
fn env_vars(&self) -> &'static [&'static str] {
|
||||
&["ANTHROPIC_API_KEY"]
|
||||
}
|
||||
|
||||
fn provider_config_key(&self) -> &'static str {
|
||||
"anthropic"
|
||||
}
|
||||
|
||||
fn wire(&self) -> WireFormat {
|
||||
WireFormat::AnthropicMessages
|
||||
}
|
||||
}
|
||||
|
||||
static DEEPSEEK: Deepseek = Deepseek;
|
||||
static NVIDIA_NIM: NvidiaNim = NvidiaNim;
|
||||
static OPENAI: Openai = Openai;
|
||||
@@ -340,8 +375,9 @@ static OLLAMA: Ollama = Ollama;
|
||||
static HUGGINGFACE: Huggingface = Huggingface;
|
||||
static TOGETHER: Together = Together;
|
||||
static OPENAI_CODEX: OpenaiCodex = OpenaiCodex;
|
||||
static ANTHROPIC: Anthropic = Anthropic;
|
||||
|
||||
static PROVIDER_REGISTRY: [&dyn Provider; 20] = [
|
||||
static PROVIDER_REGISTRY: [&dyn Provider; 21] = [
|
||||
&DEEPSEEK,
|
||||
&NVIDIA_NIM,
|
||||
&OPENAI,
|
||||
@@ -362,6 +398,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 20] = [
|
||||
&HUGGINGFACE,
|
||||
&TOGETHER,
|
||||
&OPENAI_CODEX,
|
||||
&ANTHROPIC,
|
||||
];
|
||||
|
||||
/// Return all built-in provider metadata entries in `ProviderKind::ALL` order.
|
||||
@@ -410,5 +447,6 @@ pub fn provider_for_kind(kind: ProviderKind) -> &'static dyn Provider {
|
||||
ProviderKind::Huggingface => &HUGGINGFACE,
|
||||
ProviderKind::Together => &TOGETHER,
|
||||
ProviderKind::OpenaiCodex => &OPENAI_CODEX,
|
||||
ProviderKind::Anthropic => &ANTHROPIC,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -754,6 +754,7 @@ pub fn env_for(name: &str) -> Option<String> {
|
||||
"vllm" | "v-llm" => &["VLLM_API_KEY"],
|
||||
"ollama" | "ollama-local" => &["OLLAMA_API_KEY"],
|
||||
"openai" => &["OPENAI_API_KEY"],
|
||||
"anthropic" | "claude" => &["ANTHROPIC_API_KEY"],
|
||||
"atlascloud" | "atlas-cloud" | "atlas_cloud" | "atlas" => &["ATLASCLOUD_API_KEY"],
|
||||
"volcengine" | "volcengine-ark" | "volcengine_ark" | "ark" | "volc-ark"
|
||||
| "volcengineark" => &[
|
||||
|
||||
@@ -717,7 +717,18 @@ fn build_default_headers(
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
|
||||
let api_key = api_key.trim();
|
||||
let auth_header_name = if !api_key.is_empty()
|
||||
if api_provider == ApiProvider::Anthropic {
|
||||
// #3014: the Messages API authenticates with `x-api-key` (never
|
||||
// `Authorization: Bearer`) and pins the wire contract via
|
||||
// `anthropic-version`.
|
||||
headers.insert(
|
||||
HeaderName::from_static("anthropic-version"),
|
||||
HeaderValue::from_static("2023-06-01"),
|
||||
);
|
||||
}
|
||||
let auth_header_name = if !api_key.is_empty() && api_provider == ApiProvider::Anthropic {
|
||||
Some(HeaderName::from_static("x-api-key"))
|
||||
} else if !api_key.is_empty()
|
||||
&& api_provider == ApiProvider::XiaomiMimo
|
||||
&& (xiaomi_mimo_base_url_uses_token_plan(base_url)
|
||||
|| xiaomi_mimo_api_key_uses_token_plan(api_key))
|
||||
@@ -1141,6 +1152,9 @@ impl LlmClient for DeepSeekClient {
|
||||
if self.api_provider == ApiProvider::OpenaiCodex {
|
||||
return self.handle_responses_message(request).await;
|
||||
}
|
||||
if self.api_provider == ApiProvider::Anthropic {
|
||||
return self.handle_anthropic_message(request).await;
|
||||
}
|
||||
self.create_message_chat(&request).await
|
||||
}
|
||||
|
||||
@@ -1151,6 +1165,9 @@ impl LlmClient for DeepSeekClient {
|
||||
if self.api_provider == ApiProvider::OpenaiCodex {
|
||||
return self.handle_responses_stream(request).await;
|
||||
}
|
||||
if self.api_provider == ApiProvider::Anthropic {
|
||||
return self.handle_anthropic_stream(request).await;
|
||||
}
|
||||
self.handle_chat_completion_stream(request).await
|
||||
}
|
||||
}
|
||||
@@ -1260,6 +1277,11 @@ pub(super) fn apply_reasoning_effort(
|
||||
// #3024: Ollama OpenAI-compat endpoint accepts think param.
|
||||
body["think"] = json!(false);
|
||||
}
|
||||
ApiProvider::Anthropic => {
|
||||
// #3014: thinking/effort shaping happens natively inside
|
||||
// client/anthropic.rs (adaptive thinking + output_config),
|
||||
// not via OpenAI-dialect fields.
|
||||
}
|
||||
ApiProvider::NvidiaNim => {
|
||||
body["chat_template_kwargs"] = json!({
|
||||
"thinking": false,
|
||||
@@ -1327,6 +1349,11 @@ pub(super) fn apply_reasoning_effort(
|
||||
// #3024: Ollama think param.
|
||||
body["think"] = json!(true);
|
||||
}
|
||||
ApiProvider::Anthropic => {
|
||||
// #3014: thinking/effort shaping happens natively inside
|
||||
// client/anthropic.rs (adaptive thinking + output_config),
|
||||
// not via OpenAI-dialect fields.
|
||||
}
|
||||
ApiProvider::NvidiaNim => {
|
||||
body["chat_template_kwargs"] = json!({
|
||||
"thinking": true,
|
||||
@@ -1375,6 +1402,11 @@ pub(super) fn apply_reasoning_effort(
|
||||
// #3024: Ollama think param.
|
||||
body["think"] = json!(true);
|
||||
}
|
||||
ApiProvider::Anthropic => {
|
||||
// #3014: thinking/effort shaping happens natively inside
|
||||
// client/anthropic.rs (adaptive thinking + output_config),
|
||||
// not via OpenAI-dialect fields.
|
||||
}
|
||||
ApiProvider::NvidiaNim => {
|
||||
body["chat_template_kwargs"] = json!({
|
||||
"thinking": true,
|
||||
@@ -1500,6 +1532,7 @@ impl DeepSeekClient {
|
||||
}
|
||||
}
|
||||
|
||||
mod anthropic;
|
||||
mod chat;
|
||||
mod responses;
|
||||
|
||||
@@ -1867,6 +1900,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "plan".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
@@ -1905,6 +1939,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "plan".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
@@ -1954,6 +1989,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need to call a tool".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
@@ -2005,6 +2041,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need to call a tool".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
@@ -2075,6 +2112,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Internal explanation plan".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
@@ -2118,6 +2156,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "I should explain step by step.".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
@@ -2760,7 +2799,7 @@ mod tests {
|
||||
|
||||
assert!(matches!(
|
||||
response.content.first(),
|
||||
Some(ContentBlock::Thinking { thinking }) if thinking == "thinking via NIM"
|
||||
Some(ContentBlock::Thinking { thinking, .. }) if thinking == "thinking via NIM"
|
||||
));
|
||||
assert!(matches!(
|
||||
response.content.get(1),
|
||||
@@ -2902,6 +2941,7 @@ mod tests {
|
||||
let message = Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "plan".to_string(),
|
||||
}],
|
||||
};
|
||||
@@ -3045,6 +3085,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need to inspect the directory".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
@@ -3085,6 +3126,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need to search".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
@@ -3174,6 +3216,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need to list files".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
|
||||
@@ -0,0 +1,958 @@
|
||||
//! Native Anthropic Messages API adapter (#3014).
|
||||
//!
|
||||
//! CodeWhale's internal wire types are already Anthropic-shaped (the harness
|
||||
//! speaks Messages internally and translates *out* to OpenAI dialects), so
|
||||
//! this adapter is mostly native serialization plus an SSE pass-through:
|
||||
//! `StreamEvent` deserializes Anthropic's `message_start` /
|
||||
//! `content_block_*` / `message_delta` / `message_stop` / `ping` events
|
||||
//! directly. What the adapter adds on top:
|
||||
//!
|
||||
//! - request shaping: adaptive thinking + `output_config.effort` from
|
||||
//! CodeWhale's `reasoning_effort` tiers, sampling-parameter rules for
|
||||
//! models that reject them, and `cache_control` breakpoint placement
|
||||
//! aligned with the prefix-zone model in `prefix_cache.rs`;
|
||||
//! - usage normalization (#2961): `prompt_cache_hit_tokens` comes from
|
||||
//! `cache_read_input_tokens`, `prompt_cache_miss_tokens` is `input_tokens`
|
||||
//! plus `cache_creation_input_tokens`, and the normalized `input_tokens`
|
||||
//! is the sum of all three (total prompt, the DeepSeek convention);
|
||||
//! - signed-thinking handling: `signature_delta` is captured into
|
||||
//! [`crate::models::Delta::SignatureDelta`] and assistant thinking blocks
|
||||
//! replay verbatim (signature included); unsigned thinking blocks are
|
||||
//! dropped from replay because the API rejects them.
|
||||
//!
|
||||
//! Modeled on `client/responses.rs` (separate file per dialect, no protocol
|
||||
//! hacks in the shared paths).
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use crate::llm_client::StreamEventBox;
|
||||
use crate::logging;
|
||||
use crate::models::{ContentBlock, MessageRequest, MessageResponse, StreamEvent, Usage};
|
||||
|
||||
use super::{DeepSeekClient, ERROR_BODY_MAX_BYTES, bounded_error_text};
|
||||
|
||||
/// Maximum `cache_control` breakpoints Anthropic accepts per request.
|
||||
const MAX_CACHE_BREAKPOINTS: usize = 4;
|
||||
|
||||
impl DeepSeekClient {
|
||||
/// Build the native Messages API request body from a [`MessageRequest`].
|
||||
pub(super) fn build_anthropic_body(&self, request: &MessageRequest, stream: bool) -> Value {
|
||||
let mut body = json!({
|
||||
"model": request.model,
|
||||
"max_tokens": request.max_tokens,
|
||||
"stream": stream,
|
||||
});
|
||||
|
||||
if let Some(system) = request.system.as_ref() {
|
||||
body["system"] = match system {
|
||||
crate::models::SystemPrompt::Text(text) => json!(text),
|
||||
crate::models::SystemPrompt::Blocks(blocks) => json!(
|
||||
blocks
|
||||
.iter()
|
||||
.map(|block| {
|
||||
let mut value = json!({
|
||||
"type": "text",
|
||||
"text": block.text,
|
||||
});
|
||||
if let Some(cache) = block.cache_control.as_ref() {
|
||||
value["cache_control"] = json!({ "type": cache.cache_type });
|
||||
}
|
||||
value
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
body["messages"] = json!(
|
||||
request
|
||||
.messages
|
||||
.iter()
|
||||
.filter_map(message_to_anthropic)
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
if let Some(tools) = request.tools.as_ref()
|
||||
&& !tools.is_empty()
|
||||
{
|
||||
body["tools"] = json!(
|
||||
tools
|
||||
.iter()
|
||||
.map(|tool| {
|
||||
let mut value = json!({
|
||||
"name": tool.name,
|
||||
"description": tool.description,
|
||||
"input_schema": tool.input_schema,
|
||||
});
|
||||
if let Some(strict) = tool.strict {
|
||||
value["strict"] = json!(strict);
|
||||
}
|
||||
if let Some(cache) = tool.cache_control.as_ref() {
|
||||
value["cache_control"] = json!({ "type": cache.cache_type });
|
||||
}
|
||||
value
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(tool_choice) = request.tool_choice.as_ref() {
|
||||
body["tool_choice"] = anthropic_tool_choice(tool_choice);
|
||||
}
|
||||
|
||||
// Thinking + effort shaping. "off" omits thinking entirely; any other
|
||||
// tier enables adaptive thinking, with `output_config.effort` only on
|
||||
// models the capability matrix marks as thinking-capable.
|
||||
let thinking_capable = crate::models::model_supports_reasoning(&request.model);
|
||||
let effort = request
|
||||
.reasoning_effort
|
||||
.as_deref()
|
||||
.map(|raw| raw.trim().to_ascii_lowercase());
|
||||
match effort.as_deref() {
|
||||
Some("off" | "disabled" | "none" | "false") => {}
|
||||
Some(level) if thinking_capable => {
|
||||
body["thinking"] = json!({ "type": "adaptive" });
|
||||
let mapped = match level {
|
||||
"low" | "minimal" => "low",
|
||||
"medium" | "mid" => "medium",
|
||||
"max" | "xhigh" | "highest" => "max",
|
||||
_ => "high",
|
||||
};
|
||||
body["output_config"] = json!({ "effort": mapped });
|
||||
}
|
||||
None if thinking_capable => {
|
||||
body["thinking"] = json!({ "type": "adaptive" });
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Sampling parameters: Claude 4.7+ rejects temperature/top_p
|
||||
// entirely; earlier models reject the two together. Send at most one
|
||||
// (temperature wins), or neither for models that forbid them.
|
||||
if !anthropic_model_rejects_sampling(&request.model) {
|
||||
if let Some(temperature) = request.temperature {
|
||||
body["temperature"] = json!(temperature);
|
||||
} else if let Some(top_p) = request.top_p {
|
||||
body["top_p"] = json!(top_p);
|
||||
}
|
||||
}
|
||||
|
||||
apply_anthropic_cache_breakpoints(&mut body);
|
||||
body
|
||||
}
|
||||
|
||||
async fn send_anthropic_request(&self, body: &Value) -> Result<reqwest::Response> {
|
||||
let url = anthropic_messages_url(&self.base_url);
|
||||
self.wait_for_rate_limit().await;
|
||||
let response = self
|
||||
.http_client
|
||||
.post(&url)
|
||||
.header("Accept", "text/event-stream")
|
||||
.json(body)
|
||||
.send()
|
||||
.await
|
||||
.context("Anthropic Messages API request failed")?;
|
||||
|
||||
let status = response.status();
|
||||
if !status.is_success() {
|
||||
let raw = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
|
||||
let (error_type, message) = parse_anthropic_error_envelope(&raw);
|
||||
self.mark_request_failure(&format!("anthropic status={status}"))
|
||||
.await;
|
||||
anyhow::bail!("Anthropic API error (HTTP {status} {error_type}): {message}");
|
||||
}
|
||||
self.mark_request_success().await;
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Handle a streaming Messages API request.
|
||||
pub(super) async fn handle_anthropic_stream(
|
||||
&self,
|
||||
request: MessageRequest,
|
||||
) -> Result<StreamEventBox> {
|
||||
let body = self.build_anthropic_body(&request, true);
|
||||
let response = self.send_anthropic_request(&body).await?;
|
||||
|
||||
let stream_idle_timeout = self.stream_idle_timeout;
|
||||
let byte_stream = response.bytes_stream();
|
||||
|
||||
let stream = async_stream::stream! {
|
||||
use futures_util::StreamExt;
|
||||
|
||||
let mut buffer = String::new();
|
||||
tokio::pin!(byte_stream);
|
||||
|
||||
loop {
|
||||
let chunk = match tokio::time::timeout(stream_idle_timeout, byte_stream.next()).await {
|
||||
Ok(Some(Ok(chunk))) => chunk,
|
||||
Ok(Some(Err(e))) => {
|
||||
yield Err(anyhow::anyhow!("Stream read error: {e}"));
|
||||
return;
|
||||
}
|
||||
Ok(None) => break,
|
||||
Err(_) => {
|
||||
yield Err(anyhow::anyhow!("Stream idle timeout"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
buffer.push_str(&String::from_utf8_lossy(&chunk));
|
||||
|
||||
while let Some(line_end) = buffer.find('\n') {
|
||||
let line = buffer[..line_end].trim().to_string();
|
||||
buffer = buffer[line_end + 1..].to_string();
|
||||
|
||||
// `event:` lines are redundant (the data payload carries
|
||||
// `type`) and comment/heartbeat lines are ignorable.
|
||||
let Some(data) = line.strip_prefix("data: ") else {
|
||||
continue;
|
||||
};
|
||||
|
||||
match convert_anthropic_sse_data(data) {
|
||||
Some(Ok(StreamEvent::Error { error })) => {
|
||||
let (error_type, message) = anthropic_error_fields(&error);
|
||||
yield Err(anyhow::anyhow!(
|
||||
"Anthropic stream error ({error_type}): {message}"
|
||||
));
|
||||
return;
|
||||
}
|
||||
Some(Ok(event)) => {
|
||||
let is_stop = matches!(event, StreamEvent::MessageStop);
|
||||
yield Ok(event);
|
||||
if is_stop {
|
||||
return;
|
||||
}
|
||||
}
|
||||
Some(Err(e)) => {
|
||||
logging::warn(format!("Failed to parse Anthropic SSE event: {e}"));
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
/// Handle a non-streaming Messages API request.
|
||||
pub(super) async fn handle_anthropic_message(
|
||||
&self,
|
||||
request: MessageRequest,
|
||||
) -> Result<MessageResponse> {
|
||||
let body = self.build_anthropic_body(&request, false);
|
||||
let response = self.send_anthropic_request(&body).await?;
|
||||
let mut value: Value = response
|
||||
.json()
|
||||
.await
|
||||
.context("Failed to parse Anthropic Messages response")?;
|
||||
if let Some(usage) = value.get_mut("usage") {
|
||||
*usage = json!(parse_anthropic_usage(usage));
|
||||
}
|
||||
serde_json::from_value(value).context("Failed to decode Anthropic Messages response")
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the `/v1/messages` endpoint URL, tolerating base URLs that already
|
||||
/// carry a `/v1` suffix.
|
||||
fn anthropic_messages_url(base_url: &str) -> String {
|
||||
let trimmed = base_url.trim_end_matches('/');
|
||||
if trimmed.ends_with("/v1") {
|
||||
format!("{trimmed}/messages")
|
||||
} else {
|
||||
format!("{trimmed}/v1/messages")
|
||||
}
|
||||
}
|
||||
|
||||
/// Models that reject `temperature` / `top_p` outright (Claude 4.7+).
|
||||
fn anthropic_model_rejects_sampling(model: &str) -> bool {
|
||||
let lower = model.to_ascii_lowercase();
|
||||
lower.contains("opus-4-7")
|
||||
|| lower.contains("opus-4-8")
|
||||
|| lower.contains("fable")
|
||||
|| lower.contains("mythos")
|
||||
}
|
||||
|
||||
/// Convert the engine's `tool_choice` value (OpenAI-style string or object)
|
||||
/// to the Anthropic object form.
|
||||
fn anthropic_tool_choice(tool_choice: &Value) -> Value {
|
||||
match tool_choice.as_str() {
|
||||
Some("auto") => json!({ "type": "auto" }),
|
||||
Some("none") => json!({ "type": "none" }),
|
||||
Some("any" | "required") => json!({ "type": "any" }),
|
||||
Some(name) => json!({ "type": "tool", "name": name }),
|
||||
None => tool_choice.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert one internal message to the Anthropic wire shape. Returns `None`
|
||||
/// when no blocks survive conversion (Anthropic rejects empty content).
|
||||
fn message_to_anthropic(message: &crate::models::Message) -> Option<Value> {
|
||||
let blocks: Vec<Value> = message
|
||||
.content
|
||||
.iter()
|
||||
.filter_map(content_block_to_anthropic)
|
||||
.collect();
|
||||
if blocks.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(json!({ "role": message.role, "content": blocks }))
|
||||
}
|
||||
|
||||
fn content_block_to_anthropic(block: &ContentBlock) -> Option<Value> {
|
||||
match block {
|
||||
ContentBlock::Text {
|
||||
text,
|
||||
cache_control,
|
||||
} => {
|
||||
let mut value = json!({ "type": "text", "text": text });
|
||||
if let Some(cache) = cache_control {
|
||||
value["cache_control"] = json!({ "type": cache.cache_type });
|
||||
}
|
||||
Some(value)
|
||||
}
|
||||
ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature,
|
||||
} => {
|
||||
// Anthropic rejects unsigned thinking blocks on replay (and the
|
||||
// DeepSeek-era "(reasoning omitted)" placeholders mean nothing to
|
||||
// it), so only signed blocks are replayed — verbatim, signature
|
||||
// included.
|
||||
signature.as_ref().map(|signature| {
|
||||
json!({
|
||||
"type": "thinking",
|
||||
"thinking": thinking,
|
||||
"signature": signature,
|
||||
})
|
||||
})
|
||||
}
|
||||
ContentBlock::ToolUse {
|
||||
id, name, input, ..
|
||||
} => Some(json!({
|
||||
"type": "tool_use",
|
||||
"id": id,
|
||||
"name": name,
|
||||
"input": input,
|
||||
})),
|
||||
ContentBlock::ToolResult {
|
||||
tool_use_id,
|
||||
content,
|
||||
is_error,
|
||||
..
|
||||
} => {
|
||||
let mut value = json!({
|
||||
"type": "tool_result",
|
||||
"tool_use_id": tool_use_id,
|
||||
"content": content,
|
||||
});
|
||||
if let Some(is_error) = is_error {
|
||||
value["is_error"] = json!(is_error);
|
||||
}
|
||||
Some(value)
|
||||
}
|
||||
ContentBlock::ImageUrl { image_url } => Some(json!({
|
||||
"type": "image",
|
||||
"source": { "type": "url", "url": image_url.url },
|
||||
})),
|
||||
// Server-tool block types are DeepSeek/internal concepts with no
|
||||
// Anthropic client-side wire equivalent.
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Enforce the prefix-zone breakpoint policy (#3014):
|
||||
/// 1. the last tool in the catalog (or, with no tools, the last system
|
||||
/// block) — caches the immutable prefix;
|
||||
/// 2. the last content block of the most recent user turn — caches the
|
||||
/// append-only history.
|
||||
///
|
||||
/// Caller-provided breakpoints are preserved, but the total is capped at
|
||||
/// [`MAX_CACHE_BREAKPOINTS`] by dropping the earliest markers first (the
|
||||
/// latest markers cover the longest prefixes).
|
||||
fn apply_anthropic_cache_breakpoints(body: &mut Value) {
|
||||
// Place breakpoint 1: prefer the last tool; otherwise last system block.
|
||||
let mut placed_prefix = false;
|
||||
if let Some(tools) = body.get_mut("tools").and_then(Value::as_array_mut)
|
||||
&& let Some(last) = tools.last_mut()
|
||||
{
|
||||
last["cache_control"] = json!({ "type": "ephemeral" });
|
||||
placed_prefix = true;
|
||||
}
|
||||
if !placed_prefix
|
||||
&& let Some(system) = body.get_mut("system").and_then(Value::as_array_mut)
|
||||
&& let Some(last) = system.last_mut()
|
||||
{
|
||||
last["cache_control"] = json!({ "type": "ephemeral" });
|
||||
}
|
||||
|
||||
// Place breakpoint 2: last content block of the latest user message.
|
||||
if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut)
|
||||
&& let Some(last_user) = messages
|
||||
.iter_mut()
|
||||
.rev()
|
||||
.find(|message| message.get("role").and_then(Value::as_str) == Some("user"))
|
||||
&& let Some(last_block) = last_user
|
||||
.get_mut("content")
|
||||
.and_then(Value::as_array_mut)
|
||||
.and_then(|blocks| blocks.last_mut())
|
||||
{
|
||||
last_block["cache_control"] = json!({ "type": "ephemeral" });
|
||||
}
|
||||
|
||||
// Cap at MAX_CACHE_BREAKPOINTS in render order (tools → system →
|
||||
// messages), dropping the earliest extras.
|
||||
let mut marked: Vec<*mut Value> = Vec::new();
|
||||
let collect = |value: Option<&mut Value>| {
|
||||
let Some(array) = value.and_then(Value::as_array_mut) else {
|
||||
return Vec::new();
|
||||
};
|
||||
array
|
||||
.iter_mut()
|
||||
.filter(|item| item.get("cache_control").is_some())
|
||||
.map(|item| item as *mut Value)
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
marked.extend(collect(body.get_mut("tools")));
|
||||
marked.extend(collect(body.get_mut("system")));
|
||||
if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) {
|
||||
for message in messages.iter_mut() {
|
||||
if let Some(blocks) = message.get_mut("content").and_then(Value::as_array_mut) {
|
||||
marked.extend(
|
||||
blocks
|
||||
.iter_mut()
|
||||
.filter(|block| block.get("cache_control").is_some())
|
||||
.map(|block| block as *mut Value),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if marked.len() > MAX_CACHE_BREAKPOINTS {
|
||||
let excess = marked.len() - MAX_CACHE_BREAKPOINTS;
|
||||
for pointer in marked.into_iter().take(excess) {
|
||||
// SAFETY: the pointers were collected from `body`, which is
|
||||
// exclusively borrowed for the duration of this function, and
|
||||
// each pointer targets a distinct JSON node.
|
||||
unsafe {
|
||||
if let Some(map) = (*pointer).as_object_mut() {
|
||||
map.remove("cache_control");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert one SSE `data:` payload into a [`StreamEvent`], normalizing usage
|
||||
/// objects to the #2961 convention. Returns `None` for ignorable payloads.
|
||||
fn convert_anthropic_sse_data(data: &str) -> Option<Result<StreamEvent>> {
|
||||
let trimmed = data.trim();
|
||||
if trimmed.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let mut value: Value = match serde_json::from_str(trimmed) {
|
||||
Ok(value) => value,
|
||||
Err(e) => return Some(Err(anyhow::anyhow!("invalid SSE JSON: {e}"))),
|
||||
};
|
||||
|
||||
match value.get("type").and_then(Value::as_str) {
|
||||
Some("message_start") => {
|
||||
if let Some(usage) = value
|
||||
.get_mut("message")
|
||||
.and_then(|message| message.get_mut("usage"))
|
||||
{
|
||||
*usage = json!(parse_anthropic_usage(usage));
|
||||
}
|
||||
}
|
||||
Some("message_delta") => {
|
||||
if let Some(usage) = value.get_mut("usage") {
|
||||
*usage = json!(parse_anthropic_usage(usage));
|
||||
}
|
||||
}
|
||||
// Tolerate unknown event types (e.g. future additions) silently.
|
||||
Some(known)
|
||||
if !matches!(
|
||||
known,
|
||||
"message_start"
|
||||
| "content_block_start"
|
||||
| "content_block_delta"
|
||||
| "content_block_stop"
|
||||
| "message_delta"
|
||||
| "message_stop"
|
||||
| "ping"
|
||||
| "error"
|
||||
) =>
|
||||
{
|
||||
return None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
Some(serde_json::from_value(value).map_err(|e| anyhow::anyhow!("unrecognized SSE event: {e}")))
|
||||
}
|
||||
|
||||
/// Map Anthropic's usage payload onto the normalized [`Usage`] convention
|
||||
/// (#2961): hit = cache reads, miss = uncached input + cache writes,
|
||||
/// `input_tokens` = the total prompt across all three.
|
||||
fn parse_anthropic_usage(usage: &Value) -> Usage {
|
||||
let field = |name: &str| {
|
||||
usage
|
||||
.get(name)
|
||||
.and_then(Value::as_u64)
|
||||
.and_then(|value| u32::try_from(value).ok())
|
||||
.unwrap_or(0)
|
||||
};
|
||||
let input_raw = field("input_tokens");
|
||||
let cache_creation = field("cache_creation_input_tokens");
|
||||
let cache_read = field("cache_read_input_tokens");
|
||||
let output = field("output_tokens");
|
||||
|
||||
Usage {
|
||||
input_tokens: input_raw
|
||||
.saturating_add(cache_creation)
|
||||
.saturating_add(cache_read),
|
||||
output_tokens: output,
|
||||
prompt_cache_hit_tokens: Some(cache_read),
|
||||
prompt_cache_miss_tokens: Some(input_raw.saturating_add(cache_creation)),
|
||||
reasoning_tokens: None,
|
||||
reasoning_replay_tokens: None,
|
||||
server_tool_use: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract `error.type` / `error.message` from an Anthropic error envelope
|
||||
/// (`{"type":"error","error":{"type":...,"message":...}}`), falling back to
|
||||
/// the raw body so nothing is swallowed.
|
||||
fn parse_anthropic_error_envelope(raw: &str) -> (String, String) {
|
||||
let Ok(value) = serde_json::from_str::<Value>(raw) else {
|
||||
return ("unknown".to_string(), raw.to_string());
|
||||
};
|
||||
let error = value.get("error").unwrap_or(&value);
|
||||
anthropic_error_fields(error)
|
||||
}
|
||||
|
||||
fn anthropic_error_fields(error: &Value) -> (String, String) {
|
||||
let error_type = error
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
.unwrap_or_else(|| error.to_string());
|
||||
(error_type, message)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::models::{CacheControl, Message, SystemBlock, SystemPrompt, Tool};
|
||||
|
||||
fn request_with(
|
||||
model: &str,
|
||||
reasoning_effort: Option<&str>,
|
||||
temperature: Option<f32>,
|
||||
top_p: Option<f32>,
|
||||
) -> MessageRequest {
|
||||
MessageRequest {
|
||||
model: model.to_string(),
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "hello".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
}],
|
||||
max_tokens: 1024,
|
||||
system: Some(SystemPrompt::Blocks(vec![SystemBlock {
|
||||
block_type: "text".to_string(),
|
||||
text: "be helpful".to_string(),
|
||||
cache_control: Some(CacheControl {
|
||||
cache_type: "ephemeral".to_string(),
|
||||
}),
|
||||
}])),
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: reasoning_effort.map(str::to_string),
|
||||
stream: Some(true),
|
||||
temperature,
|
||||
top_p,
|
||||
}
|
||||
}
|
||||
|
||||
fn test_client() -> DeepSeekClient {
|
||||
let _ = rustls::crypto::ring::default_provider().install_default();
|
||||
let config = crate::config::Config {
|
||||
provider: Some("anthropic".to_string()),
|
||||
providers: Some(crate::config::ProvidersConfig {
|
||||
anthropic: crate::config::ProviderConfig {
|
||||
api_key: Some("test-key".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
..Default::default()
|
||||
}),
|
||||
..Default::default()
|
||||
};
|
||||
DeepSeekClient::new(&config).expect("anthropic client constructs")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_keeps_native_cache_control_on_system_and_tools() {
|
||||
let client = test_client();
|
||||
let mut request = request_with("claude-sonnet-4-6", Some("high"), None, None);
|
||||
request.tools = Some(vec![Tool {
|
||||
tool_type: None,
|
||||
name: "read_file".to_string(),
|
||||
description: "Read a file".to_string(),
|
||||
input_schema: json!({"type": "object", "additionalProperties": false}),
|
||||
allowed_callers: None,
|
||||
defer_loading: None,
|
||||
input_examples: None,
|
||||
strict: Some(true),
|
||||
cache_control: None,
|
||||
}]);
|
||||
|
||||
let body = client.build_anthropic_body(&request, true);
|
||||
|
||||
assert_eq!(
|
||||
body.pointer("/system/0/cache_control/type")
|
||||
.and_then(Value::as_str),
|
||||
Some("ephemeral"),
|
||||
"system cache_control must survive natively: {body}"
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/tools/0/strict").and_then(Value::as_bool),
|
||||
Some(true)
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/tools/0/cache_control/type")
|
||||
.and_then(Value::as_str),
|
||||
Some("ephemeral"),
|
||||
"breakpoint 1 lands on the last tool: {body}"
|
||||
);
|
||||
// Breakpoint 2 lands on the latest user turn's last block.
|
||||
assert_eq!(
|
||||
body.pointer("/messages/0/content/0/cache_control/type")
|
||||
.and_then(Value::as_str),
|
||||
Some("ephemeral")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_maps_reasoning_effort_to_adaptive_thinking_and_effort() {
|
||||
let client = test_client();
|
||||
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-sonnet-4-6", Some("high"), None, None),
|
||||
true,
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/thinking/type").and_then(Value::as_str),
|
||||
Some("adaptive")
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/output_config/effort")
|
||||
.and_then(Value::as_str),
|
||||
Some("high")
|
||||
);
|
||||
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-opus-4-8", Some("xhigh"), None, None),
|
||||
true,
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/output_config/effort")
|
||||
.and_then(Value::as_str),
|
||||
Some("max")
|
||||
);
|
||||
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-sonnet-4-6", Some("off"), None, None),
|
||||
true,
|
||||
);
|
||||
assert!(body.get("thinking").is_none(), "off omits thinking: {body}");
|
||||
assert!(body.get("output_config").is_none());
|
||||
|
||||
// Haiku is not thinking-capable: no thinking, no effort.
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-haiku-4-5", Some("high"), None, None),
|
||||
true,
|
||||
);
|
||||
assert!(body.get("thinking").is_none(), "{body}");
|
||||
assert!(body.get("output_config").is_none(), "{body}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_drops_sampling_params_for_models_that_reject_them() {
|
||||
let client = test_client();
|
||||
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-opus-4-8", None, Some(0.7), Some(0.9)),
|
||||
true,
|
||||
);
|
||||
assert!(body.get("temperature").is_none(), "{body}");
|
||||
assert!(body.get("top_p").is_none(), "{body}");
|
||||
|
||||
// Older models accept ONE of temperature / top_p (temperature wins).
|
||||
let body = client.build_anthropic_body(
|
||||
&request_with("claude-sonnet-4-6", None, Some(0.7), Some(0.9)),
|
||||
true,
|
||||
);
|
||||
assert_eq!(
|
||||
body.get("temperature").and_then(Value::as_f64),
|
||||
Some(f64::from(0.7f32))
|
||||
);
|
||||
assert!(body.get("top_p").is_none(), "never send both: {body}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_replays_signed_thinking_and_drops_unsigned_placeholders() {
|
||||
let client = test_client();
|
||||
let mut request = request_with("claude-sonnet-4-6", None, None, None);
|
||||
request.messages = vec![
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "do the thing".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
},
|
||||
Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: "signed reasoning".to_string(),
|
||||
signature: Some("sig-abc".to_string()),
|
||||
},
|
||||
ContentBlock::Thinking {
|
||||
thinking: "(reasoning omitted)".to_string(),
|
||||
signature: None,
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
id: "toolu_1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: json!({"path": "a.txt"}),
|
||||
caller: None,
|
||||
},
|
||||
],
|
||||
},
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::ToolResult {
|
||||
tool_use_id: "toolu_1".to_string(),
|
||||
content: "contents".to_string(),
|
||||
is_error: None,
|
||||
content_blocks: None,
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
let body = client.build_anthropic_body(&request, true);
|
||||
let assistant = &body["messages"][1]["content"];
|
||||
assert_eq!(assistant.as_array().map(Vec::len), Some(2));
|
||||
assert_eq!(
|
||||
assistant[0]["signature"].as_str(),
|
||||
Some("sig-abc"),
|
||||
"signed thinking replays verbatim: {assistant}"
|
||||
);
|
||||
assert_eq!(assistant[1]["type"].as_str(), Some("tool_use"));
|
||||
assert!(
|
||||
assistant[1].get("caller").is_none(),
|
||||
"internal caller metadata must not reach the wire"
|
||||
);
|
||||
assert_eq!(
|
||||
body["messages"][2]["content"][0]["type"].as_str(),
|
||||
Some("tool_result")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn breakpoints_are_capped_at_four_dropping_earliest() {
|
||||
let client = test_client();
|
||||
let mut request = request_with("claude-sonnet-4-6", None, None, None);
|
||||
// Five caller-marked user turns + the two placed breakpoints.
|
||||
request.messages = (0..5)
|
||||
.map(|i| Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: format!("turn {i}"),
|
||||
cache_control: Some(CacheControl {
|
||||
cache_type: "ephemeral".to_string(),
|
||||
}),
|
||||
}],
|
||||
})
|
||||
.collect();
|
||||
|
||||
let body = client.build_anthropic_body(&request, true);
|
||||
let mut count = 0;
|
||||
if body.pointer("/system/0/cache_control").is_some() {
|
||||
count += 1;
|
||||
}
|
||||
for message in body["messages"].as_array().unwrap() {
|
||||
for block in message["content"].as_array().unwrap() {
|
||||
if block.get("cache_control").is_some() {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
count <= MAX_CACHE_BREAKPOINTS,
|
||||
"breakpoints must be capped at {MAX_CACHE_BREAKPOINTS}, got {count}: {body}"
|
||||
);
|
||||
// The latest user turn keeps its marker (longest prefix coverage).
|
||||
assert!(
|
||||
body.pointer("/messages/4/content/0/cache_control")
|
||||
.is_some(),
|
||||
"{body}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sse_fixture_decodes_text_thinking_signature_and_tool_use() {
|
||||
use crate::models::{ContentBlockStart, Delta};
|
||||
|
||||
let events = [
|
||||
r#"{"type":"message_start","message":{"id":"msg_01","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":2045,"cache_read_input_tokens":18000,"output_tokens":1}}}"#,
|
||||
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Let me check"}}"#,
|
||||
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig-xyz"}}"#,
|
||||
r#"{"type":"content_block_stop","index":0}"#,
|
||||
r#"{"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}"#,
|
||||
r#"{"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"Reading the file."}}"#,
|
||||
r#"{"type":"content_block_stop","index":1}"#,
|
||||
r#"{"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_9","name":"read_file","input":{}}}"#,
|
||||
r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"path\":"}}"#,
|
||||
r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"a.txt\"}"}}"#,
|
||||
r#"{"type":"content_block_stop","index":2}"#,
|
||||
r#"{"type":"ping"}"#,
|
||||
r#"{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":42}}"#,
|
||||
r#"{"type":"message_stop"}"#,
|
||||
];
|
||||
|
||||
let decoded: Vec<StreamEvent> = events
|
||||
.iter()
|
||||
.map(|data| {
|
||||
convert_anthropic_sse_data(data)
|
||||
.expect("known event")
|
||||
.expect("decodes")
|
||||
})
|
||||
.collect();
|
||||
|
||||
// message_start usage normalized to the #2961 convention.
|
||||
let StreamEvent::MessageStart { message } = &decoded[0] else {
|
||||
panic!("expected MessageStart, got {:?}", decoded[0]);
|
||||
};
|
||||
assert_eq!(message.usage.input_tokens, 3 + 2045 + 18000);
|
||||
assert_eq!(message.usage.prompt_cache_hit_tokens, Some(18000));
|
||||
assert_eq!(message.usage.prompt_cache_miss_tokens, Some(3 + 2045));
|
||||
|
||||
assert!(matches!(
|
||||
&decoded[1],
|
||||
StreamEvent::ContentBlockStart {
|
||||
content_block: ContentBlockStart::Thinking { .. },
|
||||
..
|
||||
}
|
||||
));
|
||||
assert!(matches!(
|
||||
&decoded[3],
|
||||
StreamEvent::ContentBlockDelta {
|
||||
delta: Delta::SignatureDelta { signature },
|
||||
..
|
||||
} if signature == "sig-xyz"
|
||||
));
|
||||
assert!(matches!(
|
||||
&decoded[6],
|
||||
StreamEvent::ContentBlockDelta {
|
||||
delta: Delta::TextDelta { text },
|
||||
..
|
||||
} if text == "Reading the file."
|
||||
));
|
||||
let mut tool_json = String::new();
|
||||
for event in &decoded {
|
||||
if let StreamEvent::ContentBlockDelta {
|
||||
delta: Delta::InputJsonDelta { partial_json },
|
||||
..
|
||||
} = event
|
||||
{
|
||||
tool_json.push_str(partial_json);
|
||||
}
|
||||
}
|
||||
assert_eq!(
|
||||
serde_json::from_str::<Value>(&tool_json).expect("accumulated tool args parse"),
|
||||
json!({"path": "a.txt"})
|
||||
);
|
||||
assert!(matches!(&decoded[12], StreamEvent::Ping));
|
||||
let StreamEvent::MessageDelta { delta, usage } = &decoded[13] else {
|
||||
panic!("expected MessageDelta");
|
||||
};
|
||||
assert_eq!(delta.stop_reason.as_deref(), Some("tool_use"));
|
||||
assert_eq!(usage.as_ref().map(|u| u.output_tokens), Some(42));
|
||||
assert!(matches!(&decoded[14], StreamEvent::MessageStop));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sse_error_event_and_unknown_events_are_handled() {
|
||||
let error = convert_anthropic_sse_data(
|
||||
r#"{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}"#,
|
||||
)
|
||||
.expect("error event decodes")
|
||||
.expect("error event is a StreamEvent");
|
||||
let StreamEvent::Error { error } = error else {
|
||||
panic!("expected StreamEvent::Error");
|
||||
};
|
||||
let (error_type, message) = anthropic_error_fields(&error);
|
||||
assert_eq!(error_type, "overloaded_error");
|
||||
assert_eq!(message, "Overloaded");
|
||||
|
||||
assert!(
|
||||
convert_anthropic_sse_data(r#"{"type":"content_block_started_v2","index":0}"#)
|
||||
.is_none(),
|
||||
"unknown event types are tolerated"
|
||||
);
|
||||
assert!(convert_anthropic_sse_data(" ").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usage_mapping_handles_missing_cache_fields() {
|
||||
let usage = parse_anthropic_usage(&json!({"input_tokens": 10, "output_tokens": 5}));
|
||||
assert_eq!(usage.input_tokens, 10);
|
||||
assert_eq!(usage.output_tokens, 5);
|
||||
assert_eq!(usage.prompt_cache_hit_tokens, Some(0));
|
||||
assert_eq!(usage.prompt_cache_miss_tokens, Some(10));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_envelope_parses_type_and_message() {
|
||||
let (error_type, message) = parse_anthropic_error_envelope(
|
||||
r#"{"type":"error","error":{"type":"rate_limit_error","message":"Too many requests"},"request_id":"req_1"}"#,
|
||||
);
|
||||
assert_eq!(error_type, "rate_limit_error");
|
||||
assert_eq!(message, "Too many requests");
|
||||
|
||||
let (error_type, message) = parse_anthropic_error_envelope("upstream blew up");
|
||||
assert_eq!(error_type, "unknown");
|
||||
assert_eq!(message, "upstream blew up");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn messages_url_tolerates_v1_suffix() {
|
||||
assert_eq!(
|
||||
anthropic_messages_url("https://api.anthropic.com"),
|
||||
"https://api.anthropic.com/v1/messages"
|
||||
);
|
||||
assert_eq!(
|
||||
anthropic_messages_url("https://api.anthropic.com/"),
|
||||
"https://api.anthropic.com/v1/messages"
|
||||
);
|
||||
assert_eq!(
|
||||
anthropic_messages_url("https://gateway.example/v1"),
|
||||
"https://gateway.example/v1/messages"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1415,7 +1415,7 @@ fn build_chat_messages_with_reasoning(
|
||||
},
|
||||
}));
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => thinking_parts.push(thinking.clone()),
|
||||
ContentBlock::Thinking { thinking, .. } => thinking_parts.push(thinking.clone()),
|
||||
ContentBlock::ToolUse {
|
||||
id,
|
||||
name,
|
||||
@@ -2045,6 +2045,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
|
||||
reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty())
|
||||
{
|
||||
content_blocks.push(ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: reasoning.to_string(),
|
||||
});
|
||||
}
|
||||
@@ -2143,7 +2144,7 @@ fn build_stream_events(response: &MessageResponse) -> Vec<StreamEvent> {
|
||||
}
|
||||
events.push(StreamEvent::ContentBlockStop { index });
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
events.push(StreamEvent::ContentBlockStart {
|
||||
index,
|
||||
content_block: ContentBlockStart::Thinking {
|
||||
|
||||
@@ -401,9 +401,10 @@ impl DeepSeekClient {
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlockStart::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking }
|
||||
}
|
||||
ContentBlockStart::Thinking { thinking } => ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature: None,
|
||||
},
|
||||
ContentBlockStart::ToolUse {
|
||||
id,
|
||||
name,
|
||||
@@ -433,8 +434,9 @@ impl DeepSeekClient {
|
||||
}
|
||||
}
|
||||
Delta::ThinkingDelta { thinking } => {
|
||||
if let Some(ContentBlock::Thinking { thinking: existing }) =
|
||||
response.content.get_mut(i)
|
||||
if let Some(ContentBlock::Thinking {
|
||||
thinking: existing, ..
|
||||
}) = response.content.get_mut(i)
|
||||
{
|
||||
existing.push_str(&thinking);
|
||||
}
|
||||
@@ -444,6 +446,10 @@ impl DeepSeekClient {
|
||||
buf.push_str(&partial_json);
|
||||
}
|
||||
}
|
||||
Delta::SignatureDelta { .. } => {
|
||||
// Anthropic-native signature deltas never occur on
|
||||
// the Responses bridge (#3014).
|
||||
}
|
||||
}
|
||||
}
|
||||
StreamEvent::ContentBlockStop { index } => {
|
||||
@@ -551,7 +557,7 @@ fn convert_messages_to_responses_input(request: &MessageRequest) -> Vec<Value> {
|
||||
"arguments": serde_json::to_string(input).unwrap_or_default(),
|
||||
}));
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
items.push(json!({
|
||||
"type": "reasoning",
|
||||
"summary": [{
|
||||
|
||||
@@ -1477,10 +1477,12 @@ mod tests {
|
||||
#[test]
|
||||
fn config_command_provider_rejects_unknown_provider() {
|
||||
let mut app = create_test_app();
|
||||
let result = config_command(&mut app, Some("provider anthropic"));
|
||||
// "anthropic" became a real provider in #3014; probe with an id that
|
||||
// stays unknown.
|
||||
let result = config_command(&mut app, Some("provider not-a-provider"));
|
||||
assert!(result.is_error);
|
||||
let msg = result.message.unwrap();
|
||||
assert!(msg.contains("Unknown provider 'anthropic'"));
|
||||
assert!(msg.contains("Unknown provider 'not-a-provider'"));
|
||||
assert!(msg.contains("openrouter"));
|
||||
assert!(msg.contains("xiaomi-mimo"));
|
||||
}
|
||||
|
||||
@@ -140,7 +140,9 @@ mod tests {
|
||||
#[test]
|
||||
fn unknown_provider_returns_error() {
|
||||
let mut app = create_test_app();
|
||||
let result = provider(&mut app, Some("anthropic"));
|
||||
// "anthropic" became a real provider in #3014; probe with an id that
|
||||
// stays unknown.
|
||||
let result = provider(&mut app, Some("not-a-provider"));
|
||||
let msg = result.message.expect("expected error message");
|
||||
assert!(msg.contains("Unknown provider"));
|
||||
assert!(msg.contains("openrouter"));
|
||||
|
||||
@@ -607,7 +607,7 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
|
||||
ContentBlock::Text { text, .. } => text.len() / 4,
|
||||
// Historical reasoning blocks are UI/session metadata for DeepSeek.
|
||||
// Only current-turn tool-call reasoning is sent back to the API.
|
||||
ContentBlock::Thinking { thinking } if include_thinking => thinking.len() / 4,
|
||||
ContentBlock::Thinking { thinking, .. } if include_thinking => thinking.len() / 4,
|
||||
ContentBlock::Thinking { .. } => 0,
|
||||
ContentBlock::ToolUse { input, .. } => serde_json::to_string(input)
|
||||
.map(|s| s.len() / 4)
|
||||
@@ -1958,6 +1958,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: thinking.clone(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
|
||||
@@ -163,6 +163,10 @@ pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[
|
||||
"deepseek/deepseek-v4-flash",
|
||||
];
|
||||
pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"];
|
||||
pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
|
||||
pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8";
|
||||
pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5";
|
||||
pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -188,6 +192,7 @@ pub enum ApiProvider {
|
||||
Huggingface,
|
||||
Together,
|
||||
OpenaiCodex,
|
||||
Anthropic,
|
||||
}
|
||||
|
||||
impl ApiProvider {
|
||||
@@ -237,6 +242,7 @@ impl ApiProvider {
|
||||
"ollama" | "ollama-local" => Some(Self::Ollama),
|
||||
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
|
||||
"together" | "together-ai" | "together_ai" => Some(Self::Together),
|
||||
"anthropic" | "claude" => Some(Self::Anthropic),
|
||||
"openai-codex" | "openai_codex" | "openaicodex" | "codex" | "chatgpt"
|
||||
| "chatgpt-codex" | "chatgpt_codex" | "chatgptcodex" => Some(Self::OpenaiCodex),
|
||||
_ => None,
|
||||
@@ -267,6 +273,7 @@ impl ApiProvider {
|
||||
Self::Huggingface => "huggingface",
|
||||
Self::Together => "together",
|
||||
Self::OpenaiCodex => "openai-codex",
|
||||
Self::Anthropic => "anthropic",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,6 +302,7 @@ impl ApiProvider {
|
||||
Self::Huggingface => "Hugging Face",
|
||||
Self::Together => "Together AI",
|
||||
Self::OpenaiCodex => "OpenAI Codex (ChatGPT)",
|
||||
Self::Anthropic => "Anthropic",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -322,6 +330,7 @@ impl ApiProvider {
|
||||
Self::Huggingface,
|
||||
Self::Together,
|
||||
Self::OpenaiCodex,
|
||||
Self::Anthropic,
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -378,6 +387,8 @@ pub struct ModelAliasDeprecation {
|
||||
pub enum RequestPayloadMode {
|
||||
/// Standard OpenAI-compatible `/v1/chat/completions` payload.
|
||||
ChatCompletions,
|
||||
/// Native Anthropic Messages API `/v1/messages` payload (#3014).
|
||||
AnthropicMessages,
|
||||
}
|
||||
|
||||
/// Resolve the provider capability for a given [`ApiProvider`] and resolved
|
||||
@@ -387,6 +398,23 @@ pub enum RequestPayloadMode {
|
||||
/// in the API payload (after normalization / provider-specific mapping).
|
||||
#[must_use]
|
||||
pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
|
||||
if matches!(provider, ApiProvider::Anthropic) {
|
||||
return ProviderCapability {
|
||||
provider,
|
||||
resolved_model: resolved_model.to_string(),
|
||||
// 200K is the conservative Anthropic floor; 4.6+ models resolve
|
||||
// their 1M windows from models.rs rows (#3014).
|
||||
context_window: crate::models::context_window_for_model(resolved_model)
|
||||
.unwrap_or(200_000),
|
||||
max_output: crate::models::max_output_tokens_for_model(resolved_model)
|
||||
.unwrap_or(64_000),
|
||||
thinking_supported: crate::models::model_supports_reasoning(resolved_model),
|
||||
cache_telemetry_supported: true,
|
||||
request_payload_mode: RequestPayloadMode::AnthropicMessages,
|
||||
alias_deprecation: None,
|
||||
};
|
||||
}
|
||||
|
||||
// #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
|
||||
// providers use the generic model-based path below, which correctly
|
||||
// resolves context windows, output limits, and thinking support from
|
||||
@@ -819,6 +847,11 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati
|
||||
ApiProvider::Openai | ApiProvider::Atlascloud => OFFICIAL_DEEPSEEK_MODELS.to_vec(),
|
||||
ApiProvider::Together => vec![DEFAULT_TOGETHER_MODEL],
|
||||
ApiProvider::OpenaiCodex => vec![DEFAULT_OPENAI_CODEX_MODEL],
|
||||
ApiProvider::Anthropic => vec![
|
||||
ANTHROPIC_OPUS_MODEL,
|
||||
DEFAULT_ANTHROPIC_MODEL,
|
||||
ANTHROPIC_HAIKU_MODEL,
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1962,6 +1995,8 @@ pub struct ProvidersConfig {
|
||||
pub together: ProviderConfig,
|
||||
#[serde(default, alias = "openai-codex", alias = "codex", alias = "chatgpt")]
|
||||
pub openai_codex: ProviderConfig,
|
||||
#[serde(default, alias = "claude")]
|
||||
pub anthropic: ProviderConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
@@ -2127,6 +2162,7 @@ impl Config {
|
||||
ApiProvider::NvidiaNim => "providers.nvidia_nim",
|
||||
ApiProvider::Together => "providers.together",
|
||||
ApiProvider::OpenaiCodex => "providers.openai_codex",
|
||||
ApiProvider::Anthropic => "providers.anthropic",
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => return,
|
||||
};
|
||||
tracing::warn!(
|
||||
@@ -2276,6 +2312,7 @@ impl Config {
|
||||
ApiProvider::Huggingface => &providers.huggingface,
|
||||
ApiProvider::Together => &providers.together,
|
||||
ApiProvider::OpenaiCodex => &providers.openai_codex,
|
||||
ApiProvider::Anthropic => &providers.anthropic,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2302,6 +2339,7 @@ impl Config {
|
||||
ApiProvider::Huggingface => &mut providers.huggingface,
|
||||
ApiProvider::Together => &mut providers.together,
|
||||
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2425,6 +2463,7 @@ impl Config {
|
||||
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
|
||||
ApiProvider::Together => DEFAULT_TOGETHER_MODEL,
|
||||
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_MODEL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL,
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
@@ -2448,6 +2487,7 @@ impl Config {
|
||||
.filter(|base| base.contains("integrate.api.nvidia.com"))
|
||||
.cloned(),
|
||||
ApiProvider::Openai
|
||||
| ApiProvider::Anthropic
|
||||
| ApiProvider::Atlascloud
|
||||
| ApiProvider::WanjieArk
|
||||
| ApiProvider::Openrouter
|
||||
@@ -2511,6 +2551,7 @@ impl Config {
|
||||
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
|
||||
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
|
||||
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
}
|
||||
.to_string()
|
||||
})
|
||||
@@ -2560,6 +2601,7 @@ impl Config {
|
||||
ApiProvider::Huggingface => "huggingface",
|
||||
ApiProvider::Together => "together",
|
||||
ApiProvider::OpenaiCodex => "openai_codex",
|
||||
ApiProvider::Anthropic => "anthropic",
|
||||
};
|
||||
|
||||
// 0. DeepSeek compatibility slot. The legacy top-level `api_key`
|
||||
@@ -2726,6 +2768,11 @@ impl Config {
|
||||
"Together AI API key not found. Run 'codewhale auth set --provider together', \
|
||||
set TOGETHER_API_KEY, or add [providers.together] api_key in ~/.codewhale/config.toml."
|
||||
),
|
||||
ApiProvider::Anthropic => anyhow::bail!(
|
||||
"Anthropic API key not found. Run 'codewhale auth set --provider anthropic', \
|
||||
set ANTHROPIC_API_KEY, or add [providers.anthropic] api_key in ~/.codewhale/config.toml. \
|
||||
Keys are created at https://platform.claude.com/."
|
||||
),
|
||||
ApiProvider::OpenaiCodex => anyhow::bail!(
|
||||
"OpenAI Codex OAuth credentials not found.\n\
|
||||
\n\
|
||||
@@ -3437,6 +3484,13 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
.openai
|
||||
.base_url = Some(value);
|
||||
}
|
||||
ApiProvider::Anthropic => {
|
||||
config
|
||||
.providers
|
||||
.get_or_insert_with(ProvidersConfig::default)
|
||||
.anthropic
|
||||
.base_url = Some(value);
|
||||
}
|
||||
ApiProvider::Openrouter => {
|
||||
config
|
||||
.providers
|
||||
@@ -3757,6 +3811,7 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
ApiProvider::Huggingface => &mut providers.huggingface,
|
||||
ApiProvider::Together => &mut providers.together,
|
||||
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
};
|
||||
let mut provider_headers = entry.http_headers.clone().unwrap_or_default();
|
||||
provider_headers.extend(headers);
|
||||
@@ -3953,6 +4008,7 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
ApiProvider::Huggingface => &mut providers.huggingface,
|
||||
ApiProvider::Together => &mut providers.together,
|
||||
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
};
|
||||
entry.model = Some(value);
|
||||
}
|
||||
@@ -4277,6 +4333,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str {
|
||||
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
|
||||
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
|
||||
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4684,6 +4741,7 @@ fn merge_providers(
|
||||
deepseek_cn: merge_provider_config(base.deepseek_cn, override_cfg.deepseek_cn),
|
||||
nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim),
|
||||
openai: merge_provider_config(base.openai, override_cfg.openai),
|
||||
anthropic: merge_provider_config(base.anthropic, override_cfg.anthropic),
|
||||
atlascloud: merge_provider_config(base.atlascloud, override_cfg.atlascloud),
|
||||
wanjie_ark: merge_provider_config(base.wanjie_ark, override_cfg.wanjie_ark),
|
||||
openrouter: merge_provider_config(base.openrouter, override_cfg.openrouter),
|
||||
@@ -5152,6 +5210,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool {
|
||||
|| std::env::var("NVIDIA_NIM_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
}
|
||||
ApiProvider::Openai => std::env::var("OPENAI_API_KEY").is_ok_and(|k| !k.trim().is_empty()),
|
||||
ApiProvider::Anthropic => {
|
||||
std::env::var("ANTHROPIC_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
}
|
||||
ApiProvider::Atlascloud => {
|
||||
std::env::var("ATLASCLOUD_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
}
|
||||
@@ -5216,6 +5277,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
|
||||
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
|
||||
ApiProvider::Openai => "OPENAI_API_KEY",
|
||||
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
|
||||
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
|
||||
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY",
|
||||
ApiProvider::Openrouter => "OPENROUTER_API_KEY",
|
||||
@@ -5342,6 +5404,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
|
||||
}
|
||||
ApiProvider::NvidiaNim => "providers.nvidia_nim",
|
||||
ApiProvider::Openai => "providers.openai",
|
||||
ApiProvider::Anthropic => "providers.anthropic",
|
||||
ApiProvider::Atlascloud => "providers.atlascloud",
|
||||
ApiProvider::WanjieArk => "providers.wanjie_ark",
|
||||
ApiProvider::Openrouter => "providers.openrouter",
|
||||
@@ -5386,6 +5449,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
|
||||
}
|
||||
ApiProvider::NvidiaNim => "nvidia_nim",
|
||||
ApiProvider::Openai => "openai",
|
||||
ApiProvider::Anthropic => "anthropic",
|
||||
ApiProvider::Atlascloud => "atlascloud",
|
||||
ApiProvider::WanjieArk => "wanjie_ark",
|
||||
ApiProvider::Openrouter => "openrouter",
|
||||
@@ -5483,6 +5547,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> {
|
||||
}
|
||||
ApiProvider::NvidiaNim => Ok("nvidia_nim"),
|
||||
ApiProvider::Openai => Ok("openai"),
|
||||
ApiProvider::Anthropic => Ok("anthropic"),
|
||||
ApiProvider::Atlascloud => Ok("atlascloud"),
|
||||
ApiProvider::WanjieArk => Ok("wanjie_ark"),
|
||||
ApiProvider::Volcengine => Ok("volcengine"),
|
||||
|
||||
@@ -204,6 +204,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static
|
||||
}
|
||||
ApiProvider::NvidiaNim => Ok("nvidia_nim"),
|
||||
ApiProvider::Openai => Ok("openai"),
|
||||
ApiProvider::Anthropic => Ok("anthropic"),
|
||||
ApiProvider::Atlascloud => Ok("atlascloud"),
|
||||
ApiProvider::WanjieArk => Ok("wanjie_ark"),
|
||||
ApiProvider::Volcengine => Ok("volcengine"),
|
||||
|
||||
@@ -604,6 +604,7 @@ impl Engine {
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
|
||||
ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY",
|
||||
ApiProvider::Openai => "OPENAI_API_KEY",
|
||||
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
|
||||
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
|
||||
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY",
|
||||
ApiProvider::Volcengine => "VOLCENGINE_API_KEY/VOLCENGINE_ARK_API_KEY/ARK_API_KEY",
|
||||
|
||||
@@ -1680,6 +1680,7 @@ async fn session_update_preserves_reasoning_tool_only_turn() {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "Need a tool before answering.".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
|
||||
@@ -430,6 +430,9 @@ impl Engine {
|
||||
let mut current_text_raw = String::new();
|
||||
let mut current_text_visible = String::new();
|
||||
let mut current_thinking = String::new();
|
||||
// #3014: Anthropic signed-thinking signature for the current
|
||||
// thinking block; must be replayed verbatim in tool loops.
|
||||
let mut current_thinking_signature: Option<String> = None;
|
||||
let mut tool_uses: Vec<ToolUseState> = Vec::new();
|
||||
let mut usage = Usage {
|
||||
input_tokens: 0,
|
||||
@@ -757,6 +760,14 @@ impl Engine {
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Delta::SignatureDelta { signature } => {
|
||||
// #3014: capture (and concatenate, defensively)
|
||||
// the signed-thinking signature for replay.
|
||||
match current_thinking_signature.as_mut() {
|
||||
Some(existing) => existing.push_str(&signature),
|
||||
None => current_thinking_signature = Some(signature),
|
||||
}
|
||||
}
|
||||
Delta::InputJsonDelta { partial_json } => {
|
||||
if let Some(&tool_idx) = current_tool_indices.get(&index)
|
||||
&& let Some(tool_state) = tool_uses.get_mut(tool_idx)
|
||||
@@ -857,6 +868,14 @@ impl Engine {
|
||||
}
|
||||
}
|
||||
StreamEvent::MessageStop | StreamEvent::Ping => {}
|
||||
StreamEvent::Error { error } => {
|
||||
// #3014: Anthropic SSE error event. The adapter
|
||||
// surfaces fatal errors as stream Err items; this
|
||||
// defensive arm keeps any passed-through error
|
||||
// visible instead of silently dropped.
|
||||
crate::logging::warn(format!("Provider stream error event: {error}"));
|
||||
stream_errors += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -944,7 +963,10 @@ impl Engine {
|
||||
None
|
||||
};
|
||||
if let Some(thinking) = thinking_to_persist {
|
||||
content_blocks.push(ContentBlock::Thinking { thinking });
|
||||
content_blocks.push(ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature: current_thinking_signature.clone(),
|
||||
});
|
||||
}
|
||||
let mut final_text = current_text_visible.clone();
|
||||
if tool_uses.is_empty() && tool_parser::has_tool_call_markers(¤t_text_raw) {
|
||||
|
||||
@@ -2026,6 +2026,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
|
||||
"OPENAI_API_KEY",
|
||||
"codewhale auth set --provider openai --api-key \"...\"",
|
||||
),
|
||||
crate::config::ApiProvider::Anthropic => (
|
||||
"ANTHROPIC_API_KEY",
|
||||
"codewhale auth set --provider anthropic --api-key \"...\"",
|
||||
),
|
||||
crate::config::ApiProvider::Atlascloud => (
|
||||
"ATLASCLOUD_API_KEY",
|
||||
"codewhale auth set --provider atlascloud --api-key \"...\"",
|
||||
@@ -2100,6 +2104,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
|
||||
match config.api_provider() {
|
||||
crate::config::ApiProvider::NvidiaNim => "nvidia_nim",
|
||||
crate::config::ApiProvider::Openai => "openai",
|
||||
crate::config::ApiProvider::Anthropic => "anthropic",
|
||||
crate::config::ApiProvider::Atlascloud => "atlascloud",
|
||||
crate::config::ApiProvider::WanjieArk => "wanjie_ark",
|
||||
crate::config::ApiProvider::Volcengine => "volcengine",
|
||||
|
||||
@@ -468,7 +468,7 @@ fn message_response_text(response: &MessageResponse) -> String {
|
||||
ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
|
||||
append_router_text(&mut out, text);
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
append_router_text(&mut out, thinking);
|
||||
}
|
||||
ContentBlock::ToolUse { name, .. } => {
|
||||
|
||||
@@ -91,7 +91,15 @@ pub enum ContentBlock {
|
||||
#[serde(rename = "image_url")]
|
||||
ImageUrl { image_url: ImageUrlContent },
|
||||
#[serde(rename = "thinking")]
|
||||
Thinking { thinking: String },
|
||||
Thinking {
|
||||
thinking: String,
|
||||
/// Anthropic signed-thinking signature (#3014). Only populated on the
|
||||
/// native Messages dialect and serde-skipped when absent so OpenAI
|
||||
/// dialects are unaffected. Anthropic rejects tool loops that drop or
|
||||
/// modify signed thinking blocks, so replay this verbatim.
|
||||
#[serde(skip_serializing_if = "Option::is_none", default)]
|
||||
signature: Option<String>,
|
||||
},
|
||||
#[serde(rename = "tool_use")]
|
||||
ToolUse {
|
||||
id: String,
|
||||
@@ -249,6 +257,9 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
|
||||
|
||||
fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
|
||||
match model_lower {
|
||||
// Anthropic 4.6+ models carry a 1M window; Haiku stays at 200K (#3014).
|
||||
"claude-opus-4-8" | "claude-sonnet-4-6" => Some(1_000_000),
|
||||
"claude-haiku-4-5" => Some(200_000),
|
||||
"trinity-mini" => Some(128_000),
|
||||
"arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "trinity-large-preview" => {
|
||||
Some(262_144)
|
||||
@@ -289,6 +300,8 @@ pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
|
||||
return Some(384_000);
|
||||
}
|
||||
match lower.as_str() {
|
||||
"claude-opus-4-8" => Some(128_000),
|
||||
"claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000),
|
||||
"arcee-ai/trinity-large-thinking"
|
||||
| "trinity-large-thinking"
|
||||
| "moonshotai/kimi-k2.6"
|
||||
@@ -326,7 +339,9 @@ pub fn model_supports_reasoning(model: &str) -> bool {
|
||||
}
|
||||
matches!(
|
||||
lower.as_str(),
|
||||
"arcee-ai/trinity-large-thinking"
|
||||
"claude-opus-4-8"
|
||||
| "claude-sonnet-4-6"
|
||||
| "arcee-ai/trinity-large-thinking"
|
||||
| "trinity-large-thinking"
|
||||
| "google/gemma-4-31b-it"
|
||||
| "google/gemma-4-31b-it:free"
|
||||
@@ -441,6 +456,9 @@ pub enum StreamEvent {
|
||||
MessageStop,
|
||||
#[serde(rename = "ping")]
|
||||
Ping,
|
||||
/// Anthropic SSE error event (#3014).
|
||||
#[serde(rename = "error")]
|
||||
Error { error: serde_json::Value },
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
@@ -480,6 +498,10 @@ pub enum Delta {
|
||||
ThinkingDelta { thinking: String },
|
||||
#[serde(rename = "input_json_delta")]
|
||||
InputJsonDelta { partial_json: String },
|
||||
/// Anthropic signed-thinking signature delta (#3014); arrives at the end
|
||||
/// of a thinking block on the native Messages stream.
|
||||
#[serde(rename = "signature_delta")]
|
||||
SignatureDelta { signature: String },
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
|
||||
@@ -767,6 +767,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "let me think...".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
|
||||
@@ -346,7 +346,7 @@ fn compact_content_block(block: &ContentBlock) -> Value {
|
||||
"type": "text",
|
||||
"text": text,
|
||||
}),
|
||||
ContentBlock::Thinking { thinking } => json!({
|
||||
ContentBlock::Thinking { thinking, .. } => json!({
|
||||
"type": "thinking",
|
||||
"redacted": true,
|
||||
"chars": thinking.chars().count(),
|
||||
|
||||
@@ -952,6 +952,7 @@ mod tests {
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "skip".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
|
||||
@@ -5345,7 +5345,7 @@ fn message_response_text(blocks: &[ContentBlock]) -> String {
|
||||
}
|
||||
out.push_str(text);
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
if !out.is_empty() {
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
@@ -171,6 +171,7 @@ mod tests {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
signature: None,
|
||||
thinking: "The user seems to be asking me to classify myself.".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
|
||||
@@ -638,7 +638,7 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
if let Some(HistoryCell::Thinking { content, .. }) = cells.last_mut() {
|
||||
if !content.is_empty() {
|
||||
content.push('\n');
|
||||
|
||||
@@ -104,6 +104,7 @@ impl ProviderPickerView {
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
|
||||
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
|
||||
ApiProvider::Openai => "OPENAI_API_KEY",
|
||||
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
|
||||
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
|
||||
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY",
|
||||
ApiProvider::Volcengine => "VOLCENGINE_API_KEY",
|
||||
@@ -512,7 +513,8 @@ mod tests {
|
||||
"Ollama",
|
||||
"Hugging Face",
|
||||
"Together AI",
|
||||
"OpenAI Codex (ChatGPT)"
|
||||
"OpenAI Codex (ChatGPT)",
|
||||
"Anthropic"
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -547,7 +549,7 @@ mod tests {
|
||||
let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
|
||||
|
||||
picker.handle_key(key(KeyCode::Up));
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::OpenaiCodex);
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::Anthropic);
|
||||
|
||||
picker.handle_key(key(KeyCode::Down));
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::Deepseek);
|
||||
|
||||
@@ -4998,7 +4998,10 @@ fn push_assistant_message(
|
||||
) {
|
||||
let mut blocks = Vec::new();
|
||||
if let Some(thinking) = thinking {
|
||||
blocks.push(ContentBlock::Thinking { thinking });
|
||||
blocks.push(ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature: None,
|
||||
});
|
||||
}
|
||||
if !text.is_empty() {
|
||||
blocks.push(ContentBlock::Text {
|
||||
@@ -7223,6 +7226,7 @@ fn render(f: &mut Frame, app: &mut App) {
|
||||
crate::config::ApiProvider::DeepseekCN => None,
|
||||
crate::config::ApiProvider::NvidiaNim => Some("NIM"),
|
||||
crate::config::ApiProvider::Openai => Some("OpenAI"),
|
||||
crate::config::ApiProvider::Anthropic => Some("Claude"),
|
||||
crate::config::ApiProvider::Atlascloud => Some("Atlas"),
|
||||
crate::config::ApiProvider::WanjieArk => Some("Wanjie"),
|
||||
crate::config::ApiProvider::Volcengine => Some("Volc"),
|
||||
@@ -8274,6 +8278,7 @@ async fn apply_provider_picker_api_key(
|
||||
ApiProvider::Huggingface => &mut providers.huggingface,
|
||||
ApiProvider::Together => &mut providers.together,
|
||||
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
};
|
||||
entry.api_key = Some(api_key);
|
||||
}
|
||||
@@ -8333,6 +8338,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider,
|
||||
ApiProvider::Huggingface => &mut providers.huggingface,
|
||||
ApiProvider::Together => &mut providers.together,
|
||||
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
};
|
||||
entry.auth_mode = Some(auth_mode);
|
||||
}
|
||||
|
||||
@@ -498,7 +498,7 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
|
||||
for block in &msg.content {
|
||||
match block {
|
||||
ContentBlock::Text { text, .. } => total += text.len(),
|
||||
ContentBlock::Thinking { thinking } => total += thinking.len(),
|
||||
ContentBlock::Thinking { thinking, .. } => total += thinking.len(),
|
||||
ContentBlock::ToolUse { input, .. } => total += input.to_string().len(),
|
||||
ContentBlock::ToolResult { content, .. } => total += content.len(),
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
|
||||
@@ -79,6 +79,7 @@ fn assistant_thinking(thinking: &str, text: &str) -> Message {
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: thinking.to_string(),
|
||||
signature: None,
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
@@ -246,7 +247,7 @@ async fn reasoning_replay_required_on_subsequent_turn() {
|
||||
.content
|
||||
.iter()
|
||||
.find_map(|b| match b {
|
||||
ContentBlock::Thinking { thinking } => Some(thinking.clone()),
|
||||
ContentBlock::Thinking { thinking, .. } => Some(thinking.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.expect("Thinking block present");
|
||||
|
||||
@@ -32,6 +32,7 @@ fn assistant_thinking_tool_call(
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: thinking.to_string(),
|
||||
signature: None,
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
id: id.to_string(),
|
||||
|
||||
@@ -462,6 +462,9 @@ Remaining variables:
|
||||
- `ARCEE_API_KEY`
|
||||
- `ARCEE_BASE_URL`
|
||||
- `ARCEE_MODEL`
|
||||
- `ANTHROPIC_API_KEY`
|
||||
- `ANTHROPIC_BASE_URL`
|
||||
- `ANTHROPIC_MODEL`
|
||||
- `MOONSHOT_API_KEY` or `KIMI_API_KEY`
|
||||
- `MOONSHOT_BASE_URL` or `KIMI_BASE_URL`
|
||||
- `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, or `KIMI_MODEL`
|
||||
|
||||
+4
-2
@@ -30,8 +30,8 @@ The canonical provider IDs are:
|
||||
|
||||
`deepseek`, `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`,
|
||||
`openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`,
|
||||
`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`, and
|
||||
`huggingface`.
|
||||
`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`,
|
||||
`huggingface`, `together`, `openai-codex`, and `anthropic`.
|
||||
|
||||
Use any of these surfaces to select a provider:
|
||||
|
||||
@@ -137,6 +137,7 @@ endpoint.
|
||||
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. |
|
||||
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
|
||||
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
|
||||
| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
|
||||
|
||||
### Hugging Face Provider vs MCP vs Hub
|
||||
|
||||
@@ -219,6 +220,7 @@ endpoint when the endpoint supports model listing.
|
||||
| `huggingface` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | no |
|
||||
| `together` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
|
||||
| `openai-codex` | `gpt-5.5` | yes | yes |
|
||||
| `anthropic` | `claude-opus-4-8`, `claude-sonnet-4-6`, `claude-haiku-4-5` | yes | yes for `claude-opus-4-8` and `claude-sonnet-4-6`; no for `claude-haiku-4-5` |
|
||||
|
||||
AtlasCloud keeps the same default model as the config layer and adds
|
||||
provider-scoped aliases for the Pro and Flash rows. Other AtlasCloud model IDs
|
||||
|
||||
Reference in New Issue
Block a user