Merge PR #3054 from Hmbown: native Anthropic Messages API adapter

feat(client): native Anthropic Messages API adapter — cache_control, thinking blocks, tool streaming (#3014)
This commit is contained in:
Hunter Bown
2026-06-10 22:46:01 -07:00
committed by GitHub
34 changed files with 1281 additions and 33 deletions
+12 -1
View File
@@ -20,7 +20,7 @@
# `api_key` / `base_url` are
# still read as DeepSeek defaults when `[providers.deepseek]` is absent
# (backward compatibility).
provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface
provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface | together | openai-codex | anthropic
api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
base_url = "https://api.deepseek.com/beta"
# provider = "deepseek-cn" # legacy alias (official host is still https://api.deepseek.com)
@@ -440,6 +440,17 @@ max_subagents = 10 # optional (1-20)
# base_url = "https://chatgpt.com/backend-api"
# model = "gpt-5.5"
# ─────────────────────────────────────────────────────────────────────────────────
# Anthropic Provider (native Messages API)
# Talks to https://api.anthropic.com/v1/messages with x-api-key auth — not an
# OpenAI-compatible route. Models: claude-opus-4-8, claude-sonnet-4-6 (default),
# claude-haiku-4-5. Env vars: ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL,
# ANTHROPIC_MODEL.
[providers.anthropic]
# api_key = "sk-ant-..."
# base_url = "https://api.anthropic.com"
# model = "claude-sonnet-4-6"
# ─────────────────────────────────────────────────────────────────────────────────
# Web Search Provider
# ─────────────────────────────────────────────────────────────────────────────────
+22
View File
@@ -607,6 +607,28 @@ impl Default for ModelRegistry {
supports_tools: true,
supports_reasoning: true,
},
// Anthropic native Messages API models (#3014)
ModelInfo {
id: "claude-opus-4-8".to_string(),
provider: ProviderKind::Anthropic,
aliases: vec!["opus".to_string(), "claude-opus".to_string()],
supports_tools: true,
supports_reasoning: true,
},
ModelInfo {
id: "claude-sonnet-4-6".to_string(),
provider: ProviderKind::Anthropic,
aliases: vec!["sonnet".to_string(), "claude-sonnet".to_string()],
supports_tools: true,
supports_reasoning: true,
},
ModelInfo {
id: "claude-haiku-4-5".to_string(),
provider: ProviderKind::Anthropic,
aliases: vec!["haiku".to_string(), "claude-haiku".to_string()],
supports_tools: true,
supports_reasoning: false,
},
// MiniMax 2.7 (OpenRouter)
ModelInfo {
id: "minimax/minimax-2.7".to_string(),
+2
View File
@@ -771,6 +771,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
ProviderKind::Huggingface => "huggingface",
ProviderKind::Together => "together",
ProviderKind::OpenaiCodex => "openai-codex",
ProviderKind::Anthropic => "anthropic",
}
}
@@ -895,6 +896,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] {
],
ProviderKind::Together => &["TOGETHER_API_KEY"],
ProviderKind::OpenaiCodex => &["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"],
ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"],
}
}
+29 -3
View File
@@ -27,6 +27,8 @@ const DEFAULT_OPENAI_MODEL: &str = "deepseek-v4-pro";
const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com/beta";
const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1";
const DEFAULT_OPENAI_CODEX_MODEL: &str = "gpt-5.5";
const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
const DEFAULT_OPENAI_CODEX_BASE_URL: &str = "https://chatgpt.com/backend-api";
const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
const DEFAULT_ATLASCLOUD_MODEL: &str = "deepseek-ai/deepseek-v4-flash";
@@ -152,10 +154,12 @@ pub enum ProviderKind {
alias = "chatgpt_codex"
)]
OpenaiCodex,
#[serde(alias = "claude")]
Anthropic,
}
impl ProviderKind {
pub const ALL: [Self; 20] = [
pub const ALL: [Self; 21] = [
Self::Deepseek,
Self::NvidiaNim,
Self::Openai,
@@ -176,6 +180,7 @@ impl ProviderKind {
Self::Huggingface,
Self::Together,
Self::OpenaiCodex,
Self::Anthropic,
];
#[must_use]
@@ -201,6 +206,7 @@ impl ProviderKind {
Self::Huggingface => "huggingface",
Self::Together => "together",
Self::OpenaiCodex => "openai-codex",
Self::Anthropic => "anthropic",
}
}
@@ -231,6 +237,7 @@ impl ProviderKind {
"ollama" | "ollama-local" => Some(Self::Ollama),
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
"together" | "together-ai" | "together_ai" => Some(Self::Together),
"anthropic" | "claude" => Some(Self::Anthropic),
"openai-codex" | "openai_codex" | "openaicodex" | "codex" | "chatgpt"
| "chatgpt-codex" | "chatgpt_codex" | "chatgptcodex" => Some(Self::OpenaiCodex),
_ => None,
@@ -312,6 +319,8 @@ pub struct ProvidersToml {
alias = "chatgpt-codex"
)]
pub openai_codex: ProviderConfigToml,
#[serde(default)]
pub anthropic: ProviderConfigToml,
}
/// Sibling `permissions.toml` schema.
@@ -361,6 +370,7 @@ impl ProvidersToml {
ProviderKind::Huggingface => &self.huggingface,
ProviderKind::Together => &self.together,
ProviderKind::OpenaiCodex => &self.openai_codex,
ProviderKind::Anthropic => &self.anthropic,
}
}
@@ -385,6 +395,7 @@ impl ProvidersToml {
ProviderKind::Huggingface => &mut self.huggingface,
ProviderKind::Together => &mut self.together,
ProviderKind::OpenaiCodex => &mut self.openai_codex,
ProviderKind::Anthropic => &mut self.anthropic,
}
}
}
@@ -2022,6 +2033,7 @@ impl ConfigToml {
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL.to_string(),
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL.to_string(),
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL.to_string(),
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL.to_string(),
})
};
// CLI flag wins outright. Otherwise: config-file → injected secrets/env.
@@ -2454,6 +2466,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
ProviderKind::Together => DEFAULT_TOGETHER_MODEL,
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_MODEL,
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_MODEL,
}
}
@@ -2479,6 +2492,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ProviderKind::Together => DEFAULT_TOGETHER_BASE_URL,
ProviderKind::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
}
}
@@ -3231,6 +3245,8 @@ struct EnvRuntimeOverrides {
together_model: Option<String>,
openai_codex_base_url: Option<String>,
openai_codex_model: Option<String>,
anthropic_base_url: Option<String>,
anthropic_model: Option<String>,
}
impl EnvRuntimeOverrides {
@@ -3394,6 +3410,12 @@ impl EnvRuntimeOverrides {
.or_else(|_| std::env::var("CODEX_MODEL"))
.ok()
.filter(|v| !v.trim().is_empty()),
anthropic_base_url: std::env::var("ANTHROPIC_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
anthropic_model: std::env::var("ANTHROPIC_MODEL")
.ok()
.filter(|v| !v.trim().is_empty()),
}
}
@@ -3422,6 +3444,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Huggingface => self.huggingface_base_url.clone(),
ProviderKind::Together => self.together_base_url.clone(),
ProviderKind::OpenaiCodex => self.openai_codex_base_url.clone(),
ProviderKind::Anthropic => self.anthropic_base_url.clone(),
}
}
@@ -3441,6 +3464,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Huggingface => self.huggingface_model.clone(),
ProviderKind::Together => self.together_model.clone(),
ProviderKind::OpenaiCodex => self.openai_codex_model.clone(),
ProviderKind::Anthropic => self.anthropic_model.clone(),
_ => None,
}?;
@@ -5132,10 +5156,12 @@ unix_socket_path = "/tmp/cw-hooks.sock"
);
assert!(!provider.display_name().trim().is_empty());
assert!(!provider.env_vars().is_empty());
// OpenAI Codex (ChatGPT) speaks the Responses API; every other
// built-in provider is OpenAI-compatible Chat Completions.
// OpenAI Codex (ChatGPT) speaks the Responses API and Anthropic
// speaks the native Messages API; every other built-in provider
// is OpenAI-compatible Chat Completions.
let expected_wire = match kind {
ProviderKind::OpenaiCodex => provider::WireFormat::Responses,
ProviderKind::Anthropic => provider::WireFormat::AnthropicMessages,
_ => provider::WireFormat::ChatCompletions,
};
assert_eq!(provider.wire(), expected_wire);
+39 -1
View File
@@ -27,6 +27,8 @@ pub enum WireFormat {
ChatCompletions,
/// OpenAI Responses API (`/responses`).
Responses,
/// Native Anthropic Messages API (`/v1/messages`).
AnthropicMessages,
}
/// Static metadata for a built-in model provider.
@@ -320,6 +322,39 @@ impl Provider for OpenaiCodex {
}
}
/// Native Anthropic Messages API provider (#3014).
pub struct Anthropic;
impl Provider for Anthropic {
fn kind(&self) -> ProviderKind {
ProviderKind::Anthropic
}
fn display_name(&self) -> &'static str {
"Anthropic"
}
fn default_base_url(&self) -> &'static str {
crate::DEFAULT_ANTHROPIC_BASE_URL
}
fn default_model(&self) -> &'static str {
crate::DEFAULT_ANTHROPIC_MODEL
}
fn env_vars(&self) -> &'static [&'static str] {
&["ANTHROPIC_API_KEY"]
}
fn provider_config_key(&self) -> &'static str {
"anthropic"
}
fn wire(&self) -> WireFormat {
WireFormat::AnthropicMessages
}
}
static DEEPSEEK: Deepseek = Deepseek;
static NVIDIA_NIM: NvidiaNim = NvidiaNim;
static OPENAI: Openai = Openai;
@@ -340,8 +375,9 @@ static OLLAMA: Ollama = Ollama;
static HUGGINGFACE: Huggingface = Huggingface;
static TOGETHER: Together = Together;
static OPENAI_CODEX: OpenaiCodex = OpenaiCodex;
static ANTHROPIC: Anthropic = Anthropic;
static PROVIDER_REGISTRY: [&dyn Provider; 20] = [
static PROVIDER_REGISTRY: [&dyn Provider; 21] = [
&DEEPSEEK,
&NVIDIA_NIM,
&OPENAI,
@@ -362,6 +398,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 20] = [
&HUGGINGFACE,
&TOGETHER,
&OPENAI_CODEX,
&ANTHROPIC,
];
/// Return all built-in provider metadata entries in `ProviderKind::ALL` order.
@@ -410,5 +447,6 @@ pub fn provider_for_kind(kind: ProviderKind) -> &'static dyn Provider {
ProviderKind::Huggingface => &HUGGINGFACE,
ProviderKind::Together => &TOGETHER,
ProviderKind::OpenaiCodex => &OPENAI_CODEX,
ProviderKind::Anthropic => &ANTHROPIC,
}
}
+1
View File
@@ -754,6 +754,7 @@ pub fn env_for(name: &str) -> Option<String> {
"vllm" | "v-llm" => &["VLLM_API_KEY"],
"ollama" | "ollama-local" => &["OLLAMA_API_KEY"],
"openai" => &["OPENAI_API_KEY"],
"anthropic" | "claude" => &["ANTHROPIC_API_KEY"],
"atlascloud" | "atlas-cloud" | "atlas_cloud" | "atlas" => &["ATLASCLOUD_API_KEY"],
"volcengine" | "volcengine-ark" | "volcengine_ark" | "ark" | "volc-ark"
| "volcengineark" => &[
+45 -2
View File
@@ -717,7 +717,18 @@ fn build_default_headers(
let mut headers = HeaderMap::new();
headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
let api_key = api_key.trim();
let auth_header_name = if !api_key.is_empty()
if api_provider == ApiProvider::Anthropic {
// #3014: the Messages API authenticates with `x-api-key` (never
// `Authorization: Bearer`) and pins the wire contract via
// `anthropic-version`.
headers.insert(
HeaderName::from_static("anthropic-version"),
HeaderValue::from_static("2023-06-01"),
);
}
let auth_header_name = if !api_key.is_empty() && api_provider == ApiProvider::Anthropic {
Some(HeaderName::from_static("x-api-key"))
} else if !api_key.is_empty()
&& api_provider == ApiProvider::XiaomiMimo
&& (xiaomi_mimo_base_url_uses_token_plan(base_url)
|| xiaomi_mimo_api_key_uses_token_plan(api_key))
@@ -1141,6 +1152,9 @@ impl LlmClient for DeepSeekClient {
if self.api_provider == ApiProvider::OpenaiCodex {
return self.handle_responses_message(request).await;
}
if self.api_provider == ApiProvider::Anthropic {
return self.handle_anthropic_message(request).await;
}
self.create_message_chat(&request).await
}
@@ -1151,6 +1165,9 @@ impl LlmClient for DeepSeekClient {
if self.api_provider == ApiProvider::OpenaiCodex {
return self.handle_responses_stream(request).await;
}
if self.api_provider == ApiProvider::Anthropic {
return self.handle_anthropic_stream(request).await;
}
self.handle_chat_completion_stream(request).await
}
}
@@ -1260,6 +1277,11 @@ pub(super) fn apply_reasoning_effort(
// #3024: Ollama OpenAI-compat endpoint accepts think param.
body["think"] = json!(false);
}
ApiProvider::Anthropic => {
// #3014: thinking/effort shaping happens natively inside
// client/anthropic.rs (adaptive thinking + output_config),
// not via OpenAI-dialect fields.
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": false,
@@ -1327,6 +1349,11 @@ pub(super) fn apply_reasoning_effort(
// #3024: Ollama think param.
body["think"] = json!(true);
}
ApiProvider::Anthropic => {
// #3014: thinking/effort shaping happens natively inside
// client/anthropic.rs (adaptive thinking + output_config),
// not via OpenAI-dialect fields.
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": true,
@@ -1375,6 +1402,11 @@ pub(super) fn apply_reasoning_effort(
// #3024: Ollama think param.
body["think"] = json!(true);
}
ApiProvider::Anthropic => {
// #3014: thinking/effort shaping happens natively inside
// client/anthropic.rs (adaptive thinking + output_config),
// not via OpenAI-dialect fields.
}
ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({
"thinking": true,
@@ -1500,6 +1532,7 @@ impl DeepSeekClient {
}
}
mod anthropic;
mod chat;
mod responses;
@@ -1867,6 +1900,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "plan".to_string(),
},
ContentBlock::Text {
@@ -1905,6 +1939,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "plan".to_string(),
},
ContentBlock::Text {
@@ -1954,6 +1989,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need to call a tool".to_string(),
},
ContentBlock::ToolUse {
@@ -2005,6 +2041,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need to call a tool".to_string(),
},
ContentBlock::ToolUse {
@@ -2075,6 +2112,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Internal explanation plan".to_string(),
},
ContentBlock::Text {
@@ -2118,6 +2156,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "I should explain step by step.".to_string(),
},
ContentBlock::Text {
@@ -2760,7 +2799,7 @@ mod tests {
assert!(matches!(
response.content.first(),
Some(ContentBlock::Thinking { thinking }) if thinking == "thinking via NIM"
Some(ContentBlock::Thinking { thinking, .. }) if thinking == "thinking via NIM"
));
assert!(matches!(
response.content.get(1),
@@ -2902,6 +2941,7 @@ mod tests {
let message = Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Thinking {
signature: None,
thinking: "plan".to_string(),
}],
};
@@ -3045,6 +3085,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need to inspect the directory".to_string(),
},
ContentBlock::ToolUse {
@@ -3085,6 +3126,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need to search".to_string(),
},
ContentBlock::ToolUse {
@@ -3174,6 +3216,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need to list files".to_string(),
},
ContentBlock::ToolUse {
+958
View File
@@ -0,0 +1,958 @@
//! Native Anthropic Messages API adapter (#3014).
//!
//! CodeWhale's internal wire types are already Anthropic-shaped (the harness
//! speaks Messages internally and translates *out* to OpenAI dialects), so
//! this adapter is mostly native serialization plus an SSE pass-through:
//! `StreamEvent` deserializes Anthropic's `message_start` /
//! `content_block_*` / `message_delta` / `message_stop` / `ping` events
//! directly. What the adapter adds on top:
//!
//! - request shaping: adaptive thinking + `output_config.effort` from
//! CodeWhale's `reasoning_effort` tiers, sampling-parameter rules for
//! models that reject them, and `cache_control` breakpoint placement
//! aligned with the prefix-zone model in `prefix_cache.rs`;
//! - usage normalization (#2961): `prompt_cache_hit_tokens` comes from
//! `cache_read_input_tokens`, `prompt_cache_miss_tokens` is `input_tokens`
//! plus `cache_creation_input_tokens`, and the normalized `input_tokens`
//! is the sum of all three (total prompt, the DeepSeek convention);
//! - signed-thinking handling: `signature_delta` is captured into
//! [`crate::models::Delta::SignatureDelta`] and assistant thinking blocks
//! replay verbatim (signature included); unsigned thinking blocks are
//! dropped from replay because the API rejects them.
//!
//! Modeled on `client/responses.rs` (separate file per dialect, no protocol
//! hacks in the shared paths).
use anyhow::{Context, Result};
use serde_json::{Value, json};
use crate::llm_client::StreamEventBox;
use crate::logging;
use crate::models::{ContentBlock, MessageRequest, MessageResponse, StreamEvent, Usage};
use super::{DeepSeekClient, ERROR_BODY_MAX_BYTES, bounded_error_text};
/// Maximum `cache_control` breakpoints Anthropic accepts per request.
const MAX_CACHE_BREAKPOINTS: usize = 4;
impl DeepSeekClient {
/// Build the native Messages API request body from a [`MessageRequest`].
pub(super) fn build_anthropic_body(&self, request: &MessageRequest, stream: bool) -> Value {
let mut body = json!({
"model": request.model,
"max_tokens": request.max_tokens,
"stream": stream,
});
if let Some(system) = request.system.as_ref() {
body["system"] = match system {
crate::models::SystemPrompt::Text(text) => json!(text),
crate::models::SystemPrompt::Blocks(blocks) => json!(
blocks
.iter()
.map(|block| {
let mut value = json!({
"type": "text",
"text": block.text,
});
if let Some(cache) = block.cache_control.as_ref() {
value["cache_control"] = json!({ "type": cache.cache_type });
}
value
})
.collect::<Vec<_>>()
),
};
}
body["messages"] = json!(
request
.messages
.iter()
.filter_map(message_to_anthropic)
.collect::<Vec<_>>()
);
if let Some(tools) = request.tools.as_ref()
&& !tools.is_empty()
{
body["tools"] = json!(
tools
.iter()
.map(|tool| {
let mut value = json!({
"name": tool.name,
"description": tool.description,
"input_schema": tool.input_schema,
});
if let Some(strict) = tool.strict {
value["strict"] = json!(strict);
}
if let Some(cache) = tool.cache_control.as_ref() {
value["cache_control"] = json!({ "type": cache.cache_type });
}
value
})
.collect::<Vec<_>>()
);
}
if let Some(tool_choice) = request.tool_choice.as_ref() {
body["tool_choice"] = anthropic_tool_choice(tool_choice);
}
// Thinking + effort shaping. "off" omits thinking entirely; any other
// tier enables adaptive thinking, with `output_config.effort` only on
// models the capability matrix marks as thinking-capable.
let thinking_capable = crate::models::model_supports_reasoning(&request.model);
let effort = request
.reasoning_effort
.as_deref()
.map(|raw| raw.trim().to_ascii_lowercase());
match effort.as_deref() {
Some("off" | "disabled" | "none" | "false") => {}
Some(level) if thinking_capable => {
body["thinking"] = json!({ "type": "adaptive" });
let mapped = match level {
"low" | "minimal" => "low",
"medium" | "mid" => "medium",
"max" | "xhigh" | "highest" => "max",
_ => "high",
};
body["output_config"] = json!({ "effort": mapped });
}
None if thinking_capable => {
body["thinking"] = json!({ "type": "adaptive" });
}
_ => {}
}
// Sampling parameters: Claude 4.7+ rejects temperature/top_p
// entirely; earlier models reject the two together. Send at most one
// (temperature wins), or neither for models that forbid them.
if !anthropic_model_rejects_sampling(&request.model) {
if let Some(temperature) = request.temperature {
body["temperature"] = json!(temperature);
} else if let Some(top_p) = request.top_p {
body["top_p"] = json!(top_p);
}
}
apply_anthropic_cache_breakpoints(&mut body);
body
}
async fn send_anthropic_request(&self, body: &Value) -> Result<reqwest::Response> {
let url = anthropic_messages_url(&self.base_url);
self.wait_for_rate_limit().await;
let response = self
.http_client
.post(&url)
.header("Accept", "text/event-stream")
.json(body)
.send()
.await
.context("Anthropic Messages API request failed")?;
let status = response.status();
if !status.is_success() {
let raw = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
let (error_type, message) = parse_anthropic_error_envelope(&raw);
self.mark_request_failure(&format!("anthropic status={status}"))
.await;
anyhow::bail!("Anthropic API error (HTTP {status} {error_type}): {message}");
}
self.mark_request_success().await;
Ok(response)
}
/// Handle a streaming Messages API request.
pub(super) async fn handle_anthropic_stream(
&self,
request: MessageRequest,
) -> Result<StreamEventBox> {
let body = self.build_anthropic_body(&request, true);
let response = self.send_anthropic_request(&body).await?;
let stream_idle_timeout = self.stream_idle_timeout;
let byte_stream = response.bytes_stream();
let stream = async_stream::stream! {
use futures_util::StreamExt;
let mut buffer = String::new();
tokio::pin!(byte_stream);
loop {
let chunk = match tokio::time::timeout(stream_idle_timeout, byte_stream.next()).await {
Ok(Some(Ok(chunk))) => chunk,
Ok(Some(Err(e))) => {
yield Err(anyhow::anyhow!("Stream read error: {e}"));
return;
}
Ok(None) => break,
Err(_) => {
yield Err(anyhow::anyhow!("Stream idle timeout"));
return;
}
};
buffer.push_str(&String::from_utf8_lossy(&chunk));
while let Some(line_end) = buffer.find('\n') {
let line = buffer[..line_end].trim().to_string();
buffer = buffer[line_end + 1..].to_string();
// `event:` lines are redundant (the data payload carries
// `type`) and comment/heartbeat lines are ignorable.
let Some(data) = line.strip_prefix("data: ") else {
continue;
};
match convert_anthropic_sse_data(data) {
Some(Ok(StreamEvent::Error { error })) => {
let (error_type, message) = anthropic_error_fields(&error);
yield Err(anyhow::anyhow!(
"Anthropic stream error ({error_type}): {message}"
));
return;
}
Some(Ok(event)) => {
let is_stop = matches!(event, StreamEvent::MessageStop);
yield Ok(event);
if is_stop {
return;
}
}
Some(Err(e)) => {
logging::warn(format!("Failed to parse Anthropic SSE event: {e}"));
}
None => {}
}
}
}
};
Ok(Box::pin(stream))
}
/// Handle a non-streaming Messages API request.
pub(super) async fn handle_anthropic_message(
&self,
request: MessageRequest,
) -> Result<MessageResponse> {
let body = self.build_anthropic_body(&request, false);
let response = self.send_anthropic_request(&body).await?;
let mut value: Value = response
.json()
.await
.context("Failed to parse Anthropic Messages response")?;
if let Some(usage) = value.get_mut("usage") {
*usage = json!(parse_anthropic_usage(usage));
}
serde_json::from_value(value).context("Failed to decode Anthropic Messages response")
}
}
/// Build the `/v1/messages` endpoint URL, tolerating base URLs that already
/// carry a `/v1` suffix.
fn anthropic_messages_url(base_url: &str) -> String {
let trimmed = base_url.trim_end_matches('/');
if trimmed.ends_with("/v1") {
format!("{trimmed}/messages")
} else {
format!("{trimmed}/v1/messages")
}
}
/// Models that reject `temperature` / `top_p` outright (Claude 4.7+).
fn anthropic_model_rejects_sampling(model: &str) -> bool {
let lower = model.to_ascii_lowercase();
lower.contains("opus-4-7")
|| lower.contains("opus-4-8")
|| lower.contains("fable")
|| lower.contains("mythos")
}
/// Convert the engine's `tool_choice` value (OpenAI-style string or object)
/// to the Anthropic object form.
fn anthropic_tool_choice(tool_choice: &Value) -> Value {
match tool_choice.as_str() {
Some("auto") => json!({ "type": "auto" }),
Some("none") => json!({ "type": "none" }),
Some("any" | "required") => json!({ "type": "any" }),
Some(name) => json!({ "type": "tool", "name": name }),
None => tool_choice.clone(),
}
}
/// Convert one internal message to the Anthropic wire shape. Returns `None`
/// when no blocks survive conversion (Anthropic rejects empty content).
fn message_to_anthropic(message: &crate::models::Message) -> Option<Value> {
let blocks: Vec<Value> = message
.content
.iter()
.filter_map(content_block_to_anthropic)
.collect();
if blocks.is_empty() {
return None;
}
Some(json!({ "role": message.role, "content": blocks }))
}
fn content_block_to_anthropic(block: &ContentBlock) -> Option<Value> {
match block {
ContentBlock::Text {
text,
cache_control,
} => {
let mut value = json!({ "type": "text", "text": text });
if let Some(cache) = cache_control {
value["cache_control"] = json!({ "type": cache.cache_type });
}
Some(value)
}
ContentBlock::Thinking {
thinking,
signature,
} => {
// Anthropic rejects unsigned thinking blocks on replay (and the
// DeepSeek-era "(reasoning omitted)" placeholders mean nothing to
// it), so only signed blocks are replayed — verbatim, signature
// included.
signature.as_ref().map(|signature| {
json!({
"type": "thinking",
"thinking": thinking,
"signature": signature,
})
})
}
ContentBlock::ToolUse {
id, name, input, ..
} => Some(json!({
"type": "tool_use",
"id": id,
"name": name,
"input": input,
})),
ContentBlock::ToolResult {
tool_use_id,
content,
is_error,
..
} => {
let mut value = json!({
"type": "tool_result",
"tool_use_id": tool_use_id,
"content": content,
});
if let Some(is_error) = is_error {
value["is_error"] = json!(is_error);
}
Some(value)
}
ContentBlock::ImageUrl { image_url } => Some(json!({
"type": "image",
"source": { "type": "url", "url": image_url.url },
})),
// Server-tool block types are DeepSeek/internal concepts with no
// Anthropic client-side wire equivalent.
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => None,
}
}
/// Enforce the prefix-zone breakpoint policy (#3014):
/// 1. the last tool in the catalog (or, with no tools, the last system
/// block) — caches the immutable prefix;
/// 2. the last content block of the most recent user turn — caches the
/// append-only history.
///
/// Caller-provided breakpoints are preserved, but the total is capped at
/// [`MAX_CACHE_BREAKPOINTS`] by dropping the earliest markers first (the
/// latest markers cover the longest prefixes).
fn apply_anthropic_cache_breakpoints(body: &mut Value) {
// Place breakpoint 1: prefer the last tool; otherwise last system block.
let mut placed_prefix = false;
if let Some(tools) = body.get_mut("tools").and_then(Value::as_array_mut)
&& let Some(last) = tools.last_mut()
{
last["cache_control"] = json!({ "type": "ephemeral" });
placed_prefix = true;
}
if !placed_prefix
&& let Some(system) = body.get_mut("system").and_then(Value::as_array_mut)
&& let Some(last) = system.last_mut()
{
last["cache_control"] = json!({ "type": "ephemeral" });
}
// Place breakpoint 2: last content block of the latest user message.
if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut)
&& let Some(last_user) = messages
.iter_mut()
.rev()
.find(|message| message.get("role").and_then(Value::as_str) == Some("user"))
&& let Some(last_block) = last_user
.get_mut("content")
.and_then(Value::as_array_mut)
.and_then(|blocks| blocks.last_mut())
{
last_block["cache_control"] = json!({ "type": "ephemeral" });
}
// Cap at MAX_CACHE_BREAKPOINTS in render order (tools → system →
// messages), dropping the earliest extras.
let mut marked: Vec<*mut Value> = Vec::new();
let collect = |value: Option<&mut Value>| {
let Some(array) = value.and_then(Value::as_array_mut) else {
return Vec::new();
};
array
.iter_mut()
.filter(|item| item.get("cache_control").is_some())
.map(|item| item as *mut Value)
.collect::<Vec<_>>()
};
marked.extend(collect(body.get_mut("tools")));
marked.extend(collect(body.get_mut("system")));
if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) {
for message in messages.iter_mut() {
if let Some(blocks) = message.get_mut("content").and_then(Value::as_array_mut) {
marked.extend(
blocks
.iter_mut()
.filter(|block| block.get("cache_control").is_some())
.map(|block| block as *mut Value),
);
}
}
}
if marked.len() > MAX_CACHE_BREAKPOINTS {
let excess = marked.len() - MAX_CACHE_BREAKPOINTS;
for pointer in marked.into_iter().take(excess) {
// SAFETY: the pointers were collected from `body`, which is
// exclusively borrowed for the duration of this function, and
// each pointer targets a distinct JSON node.
unsafe {
if let Some(map) = (*pointer).as_object_mut() {
map.remove("cache_control");
}
}
}
}
}
/// Convert one SSE `data:` payload into a [`StreamEvent`], normalizing usage
/// objects to the #2961 convention. Returns `None` for ignorable payloads.
fn convert_anthropic_sse_data(data: &str) -> Option<Result<StreamEvent>> {
let trimmed = data.trim();
if trimmed.is_empty() {
return None;
}
let mut value: Value = match serde_json::from_str(trimmed) {
Ok(value) => value,
Err(e) => return Some(Err(anyhow::anyhow!("invalid SSE JSON: {e}"))),
};
match value.get("type").and_then(Value::as_str) {
Some("message_start") => {
if let Some(usage) = value
.get_mut("message")
.and_then(|message| message.get_mut("usage"))
{
*usage = json!(parse_anthropic_usage(usage));
}
}
Some("message_delta") => {
if let Some(usage) = value.get_mut("usage") {
*usage = json!(parse_anthropic_usage(usage));
}
}
// Tolerate unknown event types (e.g. future additions) silently.
Some(known)
if !matches!(
known,
"message_start"
| "content_block_start"
| "content_block_delta"
| "content_block_stop"
| "message_delta"
| "message_stop"
| "ping"
| "error"
) =>
{
return None;
}
_ => {}
}
Some(serde_json::from_value(value).map_err(|e| anyhow::anyhow!("unrecognized SSE event: {e}")))
}
/// Map Anthropic's usage payload onto the normalized [`Usage`] convention
/// (#2961): hit = cache reads, miss = uncached input + cache writes,
/// `input_tokens` = the total prompt across all three.
fn parse_anthropic_usage(usage: &Value) -> Usage {
let field = |name: &str| {
usage
.get(name)
.and_then(Value::as_u64)
.and_then(|value| u32::try_from(value).ok())
.unwrap_or(0)
};
let input_raw = field("input_tokens");
let cache_creation = field("cache_creation_input_tokens");
let cache_read = field("cache_read_input_tokens");
let output = field("output_tokens");
Usage {
input_tokens: input_raw
.saturating_add(cache_creation)
.saturating_add(cache_read),
output_tokens: output,
prompt_cache_hit_tokens: Some(cache_read),
prompt_cache_miss_tokens: Some(input_raw.saturating_add(cache_creation)),
reasoning_tokens: None,
reasoning_replay_tokens: None,
server_tool_use: None,
}
}
/// Extract `error.type` / `error.message` from an Anthropic error envelope
/// (`{"type":"error","error":{"type":...,"message":...}}`), falling back to
/// the raw body so nothing is swallowed.
fn parse_anthropic_error_envelope(raw: &str) -> (String, String) {
let Ok(value) = serde_json::from_str::<Value>(raw) else {
return ("unknown".to_string(), raw.to_string());
};
let error = value.get("error").unwrap_or(&value);
anthropic_error_fields(error)
}
fn anthropic_error_fields(error: &Value) -> (String, String) {
let error_type = error
.get("type")
.and_then(Value::as_str)
.unwrap_or("unknown")
.to_string();
let message = error
.get("message")
.and_then(Value::as_str)
.map(str::to_string)
.unwrap_or_else(|| error.to_string());
(error_type, message)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::{CacheControl, Message, SystemBlock, SystemPrompt, Tool};
fn request_with(
model: &str,
reasoning_effort: Option<&str>,
temperature: Option<f32>,
top_p: Option<f32>,
) -> MessageRequest {
MessageRequest {
model: model.to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "hello".to_string(),
cache_control: None,
}],
}],
max_tokens: 1024,
system: Some(SystemPrompt::Blocks(vec![SystemBlock {
block_type: "text".to_string(),
text: "be helpful".to_string(),
cache_control: Some(CacheControl {
cache_type: "ephemeral".to_string(),
}),
}])),
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: reasoning_effort.map(str::to_string),
stream: Some(true),
temperature,
top_p,
}
}
fn test_client() -> DeepSeekClient {
let _ = rustls::crypto::ring::default_provider().install_default();
let config = crate::config::Config {
provider: Some("anthropic".to_string()),
providers: Some(crate::config::ProvidersConfig {
anthropic: crate::config::ProviderConfig {
api_key: Some("test-key".to_string()),
..Default::default()
},
..Default::default()
}),
..Default::default()
};
DeepSeekClient::new(&config).expect("anthropic client constructs")
}
#[test]
fn body_keeps_native_cache_control_on_system_and_tools() {
let client = test_client();
let mut request = request_with("claude-sonnet-4-6", Some("high"), None, None);
request.tools = Some(vec![Tool {
tool_type: None,
name: "read_file".to_string(),
description: "Read a file".to_string(),
input_schema: json!({"type": "object", "additionalProperties": false}),
allowed_callers: None,
defer_loading: None,
input_examples: None,
strict: Some(true),
cache_control: None,
}]);
let body = client.build_anthropic_body(&request, true);
assert_eq!(
body.pointer("/system/0/cache_control/type")
.and_then(Value::as_str),
Some("ephemeral"),
"system cache_control must survive natively: {body}"
);
assert_eq!(
body.pointer("/tools/0/strict").and_then(Value::as_bool),
Some(true)
);
assert_eq!(
body.pointer("/tools/0/cache_control/type")
.and_then(Value::as_str),
Some("ephemeral"),
"breakpoint 1 lands on the last tool: {body}"
);
// Breakpoint 2 lands on the latest user turn's last block.
assert_eq!(
body.pointer("/messages/0/content/0/cache_control/type")
.and_then(Value::as_str),
Some("ephemeral")
);
}
#[test]
fn body_maps_reasoning_effort_to_adaptive_thinking_and_effort() {
let client = test_client();
let body = client.build_anthropic_body(
&request_with("claude-sonnet-4-6", Some("high"), None, None),
true,
);
assert_eq!(
body.pointer("/thinking/type").and_then(Value::as_str),
Some("adaptive")
);
assert_eq!(
body.pointer("/output_config/effort")
.and_then(Value::as_str),
Some("high")
);
let body = client.build_anthropic_body(
&request_with("claude-opus-4-8", Some("xhigh"), None, None),
true,
);
assert_eq!(
body.pointer("/output_config/effort")
.and_then(Value::as_str),
Some("max")
);
let body = client.build_anthropic_body(
&request_with("claude-sonnet-4-6", Some("off"), None, None),
true,
);
assert!(body.get("thinking").is_none(), "off omits thinking: {body}");
assert!(body.get("output_config").is_none());
// Haiku is not thinking-capable: no thinking, no effort.
let body = client.build_anthropic_body(
&request_with("claude-haiku-4-5", Some("high"), None, None),
true,
);
assert!(body.get("thinking").is_none(), "{body}");
assert!(body.get("output_config").is_none(), "{body}");
}
#[test]
fn body_drops_sampling_params_for_models_that_reject_them() {
let client = test_client();
let body = client.build_anthropic_body(
&request_with("claude-opus-4-8", None, Some(0.7), Some(0.9)),
true,
);
assert!(body.get("temperature").is_none(), "{body}");
assert!(body.get("top_p").is_none(), "{body}");
// Older models accept ONE of temperature / top_p (temperature wins).
let body = client.build_anthropic_body(
&request_with("claude-sonnet-4-6", None, Some(0.7), Some(0.9)),
true,
);
assert_eq!(
body.get("temperature").and_then(Value::as_f64),
Some(f64::from(0.7f32))
);
assert!(body.get("top_p").is_none(), "never send both: {body}");
}
#[test]
fn body_replays_signed_thinking_and_drops_unsigned_placeholders() {
let client = test_client();
let mut request = request_with("claude-sonnet-4-6", None, None, None);
request.messages = vec![
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "do the thing".to_string(),
cache_control: None,
}],
},
Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "signed reasoning".to_string(),
signature: Some("sig-abc".to_string()),
},
ContentBlock::Thinking {
thinking: "(reasoning omitted)".to_string(),
signature: None,
},
ContentBlock::ToolUse {
id: "toolu_1".to_string(),
name: "read_file".to_string(),
input: json!({"path": "a.txt"}),
caller: None,
},
],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "toolu_1".to_string(),
content: "contents".to_string(),
is_error: None,
content_blocks: None,
}],
},
];
let body = client.build_anthropic_body(&request, true);
let assistant = &body["messages"][1]["content"];
assert_eq!(assistant.as_array().map(Vec::len), Some(2));
assert_eq!(
assistant[0]["signature"].as_str(),
Some("sig-abc"),
"signed thinking replays verbatim: {assistant}"
);
assert_eq!(assistant[1]["type"].as_str(), Some("tool_use"));
assert!(
assistant[1].get("caller").is_none(),
"internal caller metadata must not reach the wire"
);
assert_eq!(
body["messages"][2]["content"][0]["type"].as_str(),
Some("tool_result")
);
}
#[test]
fn breakpoints_are_capped_at_four_dropping_earliest() {
let client = test_client();
let mut request = request_with("claude-sonnet-4-6", None, None, None);
// Five caller-marked user turns + the two placed breakpoints.
request.messages = (0..5)
.map(|i| Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!("turn {i}"),
cache_control: Some(CacheControl {
cache_type: "ephemeral".to_string(),
}),
}],
})
.collect();
let body = client.build_anthropic_body(&request, true);
let mut count = 0;
if body.pointer("/system/0/cache_control").is_some() {
count += 1;
}
for message in body["messages"].as_array().unwrap() {
for block in message["content"].as_array().unwrap() {
if block.get("cache_control").is_some() {
count += 1;
}
}
}
assert!(
count <= MAX_CACHE_BREAKPOINTS,
"breakpoints must be capped at {MAX_CACHE_BREAKPOINTS}, got {count}: {body}"
);
// The latest user turn keeps its marker (longest prefix coverage).
assert!(
body.pointer("/messages/4/content/0/cache_control")
.is_some(),
"{body}"
);
}
#[test]
fn sse_fixture_decodes_text_thinking_signature_and_tool_use() {
use crate::models::{ContentBlockStart, Delta};
let events = [
r#"{"type":"message_start","message":{"id":"msg_01","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":2045,"cache_read_input_tokens":18000,"output_tokens":1}}}"#,
r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#,
r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Let me check"}}"#,
r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig-xyz"}}"#,
r#"{"type":"content_block_stop","index":0}"#,
r#"{"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}"#,
r#"{"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"Reading the file."}}"#,
r#"{"type":"content_block_stop","index":1}"#,
r#"{"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_9","name":"read_file","input":{}}}"#,
r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"path\":"}}"#,
r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"a.txt\"}"}}"#,
r#"{"type":"content_block_stop","index":2}"#,
r#"{"type":"ping"}"#,
r#"{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":42}}"#,
r#"{"type":"message_stop"}"#,
];
let decoded: Vec<StreamEvent> = events
.iter()
.map(|data| {
convert_anthropic_sse_data(data)
.expect("known event")
.expect("decodes")
})
.collect();
// message_start usage normalized to the #2961 convention.
let StreamEvent::MessageStart { message } = &decoded[0] else {
panic!("expected MessageStart, got {:?}", decoded[0]);
};
assert_eq!(message.usage.input_tokens, 3 + 2045 + 18000);
assert_eq!(message.usage.prompt_cache_hit_tokens, Some(18000));
assert_eq!(message.usage.prompt_cache_miss_tokens, Some(3 + 2045));
assert!(matches!(
&decoded[1],
StreamEvent::ContentBlockStart {
content_block: ContentBlockStart::Thinking { .. },
..
}
));
assert!(matches!(
&decoded[3],
StreamEvent::ContentBlockDelta {
delta: Delta::SignatureDelta { signature },
..
} if signature == "sig-xyz"
));
assert!(matches!(
&decoded[6],
StreamEvent::ContentBlockDelta {
delta: Delta::TextDelta { text },
..
} if text == "Reading the file."
));
let mut tool_json = String::new();
for event in &decoded {
if let StreamEvent::ContentBlockDelta {
delta: Delta::InputJsonDelta { partial_json },
..
} = event
{
tool_json.push_str(partial_json);
}
}
assert_eq!(
serde_json::from_str::<Value>(&tool_json).expect("accumulated tool args parse"),
json!({"path": "a.txt"})
);
assert!(matches!(&decoded[12], StreamEvent::Ping));
let StreamEvent::MessageDelta { delta, usage } = &decoded[13] else {
panic!("expected MessageDelta");
};
assert_eq!(delta.stop_reason.as_deref(), Some("tool_use"));
assert_eq!(usage.as_ref().map(|u| u.output_tokens), Some(42));
assert!(matches!(&decoded[14], StreamEvent::MessageStop));
}
#[test]
fn sse_error_event_and_unknown_events_are_handled() {
let error = convert_anthropic_sse_data(
r#"{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}"#,
)
.expect("error event decodes")
.expect("error event is a StreamEvent");
let StreamEvent::Error { error } = error else {
panic!("expected StreamEvent::Error");
};
let (error_type, message) = anthropic_error_fields(&error);
assert_eq!(error_type, "overloaded_error");
assert_eq!(message, "Overloaded");
assert!(
convert_anthropic_sse_data(r#"{"type":"content_block_started_v2","index":0}"#)
.is_none(),
"unknown event types are tolerated"
);
assert!(convert_anthropic_sse_data(" ").is_none());
}
#[test]
fn usage_mapping_handles_missing_cache_fields() {
let usage = parse_anthropic_usage(&json!({"input_tokens": 10, "output_tokens": 5}));
assert_eq!(usage.input_tokens, 10);
assert_eq!(usage.output_tokens, 5);
assert_eq!(usage.prompt_cache_hit_tokens, Some(0));
assert_eq!(usage.prompt_cache_miss_tokens, Some(10));
}
#[test]
fn error_envelope_parses_type_and_message() {
let (error_type, message) = parse_anthropic_error_envelope(
r#"{"type":"error","error":{"type":"rate_limit_error","message":"Too many requests"},"request_id":"req_1"}"#,
);
assert_eq!(error_type, "rate_limit_error");
assert_eq!(message, "Too many requests");
let (error_type, message) = parse_anthropic_error_envelope("upstream blew up");
assert_eq!(error_type, "unknown");
assert_eq!(message, "upstream blew up");
}
#[test]
fn messages_url_tolerates_v1_suffix() {
assert_eq!(
anthropic_messages_url("https://api.anthropic.com"),
"https://api.anthropic.com/v1/messages"
);
assert_eq!(
anthropic_messages_url("https://api.anthropic.com/"),
"https://api.anthropic.com/v1/messages"
);
assert_eq!(
anthropic_messages_url("https://gateway.example/v1"),
"https://gateway.example/v1/messages"
);
}
}
+3 -2
View File
@@ -1415,7 +1415,7 @@ fn build_chat_messages_with_reasoning(
},
}));
}
ContentBlock::Thinking { thinking } => thinking_parts.push(thinking.clone()),
ContentBlock::Thinking { thinking, .. } => thinking_parts.push(thinking.clone()),
ContentBlock::ToolUse {
id,
name,
@@ -2045,6 +2045,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty())
{
content_blocks.push(ContentBlock::Thinking {
signature: None,
thinking: reasoning.to_string(),
});
}
@@ -2143,7 +2144,7 @@ fn build_stream_events(response: &MessageResponse) -> Vec<StreamEvent> {
}
events.push(StreamEvent::ContentBlockStop { index });
}
ContentBlock::Thinking { thinking } => {
ContentBlock::Thinking { thinking, .. } => {
events.push(StreamEvent::ContentBlockStart {
index,
content_block: ContentBlockStart::Thinking {
+12 -6
View File
@@ -401,9 +401,10 @@ impl DeepSeekClient {
text,
cache_control: None,
},
ContentBlockStart::Thinking { thinking } => {
ContentBlock::Thinking { thinking }
}
ContentBlockStart::Thinking { thinking } => ContentBlock::Thinking {
thinking,
signature: None,
},
ContentBlockStart::ToolUse {
id,
name,
@@ -433,8 +434,9 @@ impl DeepSeekClient {
}
}
Delta::ThinkingDelta { thinking } => {
if let Some(ContentBlock::Thinking { thinking: existing }) =
response.content.get_mut(i)
if let Some(ContentBlock::Thinking {
thinking: existing, ..
}) = response.content.get_mut(i)
{
existing.push_str(&thinking);
}
@@ -444,6 +446,10 @@ impl DeepSeekClient {
buf.push_str(&partial_json);
}
}
Delta::SignatureDelta { .. } => {
// Anthropic-native signature deltas never occur on
// the Responses bridge (#3014).
}
}
}
StreamEvent::ContentBlockStop { index } => {
@@ -551,7 +557,7 @@ fn convert_messages_to_responses_input(request: &MessageRequest) -> Vec<Value> {
"arguments": serde_json::to_string(input).unwrap_or_default(),
}));
}
ContentBlock::Thinking { thinking } => {
ContentBlock::Thinking { thinking, .. } => {
items.push(json!({
"type": "reasoning",
"summary": [{
+4 -2
View File
@@ -1477,10 +1477,12 @@ mod tests {
#[test]
fn config_command_provider_rejects_unknown_provider() {
let mut app = create_test_app();
let result = config_command(&mut app, Some("provider anthropic"));
// "anthropic" became a real provider in #3014; probe with an id that
// stays unknown.
let result = config_command(&mut app, Some("provider not-a-provider"));
assert!(result.is_error);
let msg = result.message.unwrap();
assert!(msg.contains("Unknown provider 'anthropic'"));
assert!(msg.contains("Unknown provider 'not-a-provider'"));
assert!(msg.contains("openrouter"));
assert!(msg.contains("xiaomi-mimo"));
}
+3 -1
View File
@@ -140,7 +140,9 @@ mod tests {
#[test]
fn unknown_provider_returns_error() {
let mut app = create_test_app();
let result = provider(&mut app, Some("anthropic"));
// "anthropic" became a real provider in #3014; probe with an id that
// stays unknown.
let result = provider(&mut app, Some("not-a-provider"));
let msg = result.message.expect("expected error message");
assert!(msg.contains("Unknown provider"));
assert!(msg.contains("openrouter"));
+2 -1
View File
@@ -607,7 +607,7 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
ContentBlock::Text { text, .. } => text.len() / 4,
// Historical reasoning blocks are UI/session metadata for DeepSeek.
// Only current-turn tool-call reasoning is sent back to the API.
ContentBlock::Thinking { thinking } if include_thinking => thinking.len() / 4,
ContentBlock::Thinking { thinking, .. } if include_thinking => thinking.len() / 4,
ContentBlock::Thinking { .. } => 0,
ContentBlock::ToolUse { input, .. } => serde_json::to_string(input)
.map(|s| s.len() / 4)
@@ -1958,6 +1958,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: thinking.clone(),
},
ContentBlock::ToolUse {
+65
View File
@@ -163,6 +163,10 @@ pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[
"deepseek/deepseek-v4-flash",
];
pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"];
pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8";
pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5";
pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
@@ -188,6 +192,7 @@ pub enum ApiProvider {
Huggingface,
Together,
OpenaiCodex,
Anthropic,
}
impl ApiProvider {
@@ -237,6 +242,7 @@ impl ApiProvider {
"ollama" | "ollama-local" => Some(Self::Ollama),
"huggingface" | "hugging-face" | "hugging_face" | "hf" => Some(Self::Huggingface),
"together" | "together-ai" | "together_ai" => Some(Self::Together),
"anthropic" | "claude" => Some(Self::Anthropic),
"openai-codex" | "openai_codex" | "openaicodex" | "codex" | "chatgpt"
| "chatgpt-codex" | "chatgpt_codex" | "chatgptcodex" => Some(Self::OpenaiCodex),
_ => None,
@@ -267,6 +273,7 @@ impl ApiProvider {
Self::Huggingface => "huggingface",
Self::Together => "together",
Self::OpenaiCodex => "openai-codex",
Self::Anthropic => "anthropic",
}
}
@@ -295,6 +302,7 @@ impl ApiProvider {
Self::Huggingface => "Hugging Face",
Self::Together => "Together AI",
Self::OpenaiCodex => "OpenAI Codex (ChatGPT)",
Self::Anthropic => "Anthropic",
}
}
@@ -322,6 +330,7 @@ impl ApiProvider {
Self::Huggingface,
Self::Together,
Self::OpenaiCodex,
Self::Anthropic,
]
}
}
@@ -378,6 +387,8 @@ pub struct ModelAliasDeprecation {
pub enum RequestPayloadMode {
/// Standard OpenAI-compatible `/v1/chat/completions` payload.
ChatCompletions,
/// Native Anthropic Messages API `/v1/messages` payload (#3014).
AnthropicMessages,
}
/// Resolve the provider capability for a given [`ApiProvider`] and resolved
@@ -387,6 +398,23 @@ pub enum RequestPayloadMode {
/// in the API payload (after normalization / provider-specific mapping).
#[must_use]
pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability {
if matches!(provider, ApiProvider::Anthropic) {
return ProviderCapability {
provider,
resolved_model: resolved_model.to_string(),
// 200K is the conservative Anthropic floor; 4.6+ models resolve
// their 1M windows from models.rs rows (#3014).
context_window: crate::models::context_window_for_model(resolved_model)
.unwrap_or(200_000),
max_output: crate::models::max_output_tokens_for_model(resolved_model)
.unwrap_or(64_000),
thinking_supported: crate::models::model_supports_reasoning(resolved_model),
cache_telemetry_supported: true,
request_payload_mode: RequestPayloadMode::AnthropicMessages,
alias_deprecation: None,
};
}
// #3023: Delete the Openai/Atlascloud/Moonshot early-return so these
// providers use the generic model-based path below, which correctly
// resolves context windows, output limits, and thinking support from
@@ -819,6 +847,11 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati
ApiProvider::Openai | ApiProvider::Atlascloud => OFFICIAL_DEEPSEEK_MODELS.to_vec(),
ApiProvider::Together => vec![DEFAULT_TOGETHER_MODEL],
ApiProvider::OpenaiCodex => vec![DEFAULT_OPENAI_CODEX_MODEL],
ApiProvider::Anthropic => vec![
ANTHROPIC_OPUS_MODEL,
DEFAULT_ANTHROPIC_MODEL,
ANTHROPIC_HAIKU_MODEL,
],
}
}
@@ -1962,6 +1995,8 @@ pub struct ProvidersConfig {
pub together: ProviderConfig,
#[serde(default, alias = "openai-codex", alias = "codex", alias = "chatgpt")]
pub openai_codex: ProviderConfig,
#[serde(default, alias = "claude")]
pub anthropic: ProviderConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
@@ -2127,6 +2162,7 @@ impl Config {
ApiProvider::NvidiaNim => "providers.nvidia_nim",
ApiProvider::Together => "providers.together",
ApiProvider::OpenaiCodex => "providers.openai_codex",
ApiProvider::Anthropic => "providers.anthropic",
ApiProvider::Deepseek | ApiProvider::DeepseekCN => return,
};
tracing::warn!(
@@ -2276,6 +2312,7 @@ impl Config {
ApiProvider::Huggingface => &providers.huggingface,
ApiProvider::Together => &providers.together,
ApiProvider::OpenaiCodex => &providers.openai_codex,
ApiProvider::Anthropic => &providers.anthropic,
})
}
@@ -2302,6 +2339,7 @@ impl Config {
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
ApiProvider::Anthropic => &mut providers.anthropic,
}
}
@@ -2425,6 +2463,7 @@ impl Config {
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_MODEL,
ApiProvider::Together => DEFAULT_TOGETHER_MODEL,
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_MODEL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL,
}
.to_string()
}
@@ -2448,6 +2487,7 @@ impl Config {
.filter(|base| base.contains("integrate.api.nvidia.com"))
.cloned(),
ApiProvider::Openai
| ApiProvider::Anthropic
| ApiProvider::Atlascloud
| ApiProvider::WanjieArk
| ApiProvider::Openrouter
@@ -2511,6 +2551,7 @@ impl Config {
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
}
.to_string()
})
@@ -2560,6 +2601,7 @@ impl Config {
ApiProvider::Huggingface => "huggingface",
ApiProvider::Together => "together",
ApiProvider::OpenaiCodex => "openai_codex",
ApiProvider::Anthropic => "anthropic",
};
// 0. DeepSeek compatibility slot. The legacy top-level `api_key`
@@ -2726,6 +2768,11 @@ impl Config {
"Together AI API key not found. Run 'codewhale auth set --provider together', \
set TOGETHER_API_KEY, or add [providers.together] api_key in ~/.codewhale/config.toml."
),
ApiProvider::Anthropic => anyhow::bail!(
"Anthropic API key not found. Run 'codewhale auth set --provider anthropic', \
set ANTHROPIC_API_KEY, or add [providers.anthropic] api_key in ~/.codewhale/config.toml. \
Keys are created at https://platform.claude.com/."
),
ApiProvider::OpenaiCodex => anyhow::bail!(
"OpenAI Codex OAuth credentials not found.\n\
\n\
@@ -3437,6 +3484,13 @@ fn apply_env_overrides(config: &mut Config) {
.openai
.base_url = Some(value);
}
ApiProvider::Anthropic => {
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.anthropic
.base_url = Some(value);
}
ApiProvider::Openrouter => {
config
.providers
@@ -3757,6 +3811,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
ApiProvider::Anthropic => &mut providers.anthropic,
};
let mut provider_headers = entry.http_headers.clone().unwrap_or_default();
provider_headers.extend(headers);
@@ -3953,6 +4008,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
ApiProvider::Anthropic => &mut providers.anthropic,
};
entry.model = Some(value);
}
@@ -4277,6 +4333,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str {
ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL,
ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL,
ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
}
}
@@ -4684,6 +4741,7 @@ fn merge_providers(
deepseek_cn: merge_provider_config(base.deepseek_cn, override_cfg.deepseek_cn),
nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim),
openai: merge_provider_config(base.openai, override_cfg.openai),
anthropic: merge_provider_config(base.anthropic, override_cfg.anthropic),
atlascloud: merge_provider_config(base.atlascloud, override_cfg.atlascloud),
wanjie_ark: merge_provider_config(base.wanjie_ark, override_cfg.wanjie_ark),
openrouter: merge_provider_config(base.openrouter, override_cfg.openrouter),
@@ -5152,6 +5210,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool {
|| std::env::var("NVIDIA_NIM_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
ApiProvider::Openai => std::env::var("OPENAI_API_KEY").is_ok_and(|k| !k.trim().is_empty()),
ApiProvider::Anthropic => {
std::env::var("ANTHROPIC_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
ApiProvider::Atlascloud => {
std::env::var("ATLASCLOUD_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
@@ -5216,6 +5277,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
ApiProvider::Openai => "OPENAI_API_KEY",
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY",
ApiProvider::Openrouter => "OPENROUTER_API_KEY",
@@ -5342,6 +5404,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
}
ApiProvider::NvidiaNim => "providers.nvidia_nim",
ApiProvider::Openai => "providers.openai",
ApiProvider::Anthropic => "providers.anthropic",
ApiProvider::Atlascloud => "providers.atlascloud",
ApiProvider::WanjieArk => "providers.wanjie_ark",
ApiProvider::Openrouter => "providers.openrouter",
@@ -5386,6 +5449,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
}
ApiProvider::NvidiaNim => "nvidia_nim",
ApiProvider::Openai => "openai",
ApiProvider::Anthropic => "anthropic",
ApiProvider::Atlascloud => "atlascloud",
ApiProvider::WanjieArk => "wanjie_ark",
ApiProvider::Openrouter => "openrouter",
@@ -5483,6 +5547,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> {
}
ApiProvider::NvidiaNim => Ok("nvidia_nim"),
ApiProvider::Openai => Ok("openai"),
ApiProvider::Anthropic => Ok("anthropic"),
ApiProvider::Atlascloud => Ok("atlascloud"),
ApiProvider::WanjieArk => Ok("wanjie_ark"),
ApiProvider::Volcengine => Ok("volcengine"),
+1
View File
@@ -204,6 +204,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static
}
ApiProvider::NvidiaNim => Ok("nvidia_nim"),
ApiProvider::Openai => Ok("openai"),
ApiProvider::Anthropic => Ok("anthropic"),
ApiProvider::Atlascloud => Ok("atlascloud"),
ApiProvider::WanjieArk => Ok("wanjie_ark"),
ApiProvider::Volcengine => Ok("volcengine"),
+1
View File
@@ -604,6 +604,7 @@ impl Engine {
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY",
ApiProvider::Openai => "OPENAI_API_KEY",
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY",
ApiProvider::Volcengine => "VOLCENGINE_API_KEY/VOLCENGINE_ARK_API_KEY/ARK_API_KEY",
+1
View File
@@ -1680,6 +1680,7 @@ async fn session_update_preserves_reasoning_tool_only_turn() {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "Need a tool before answering.".to_string(),
},
ContentBlock::ToolUse {
+23 -1
View File
@@ -430,6 +430,9 @@ impl Engine {
let mut current_text_raw = String::new();
let mut current_text_visible = String::new();
let mut current_thinking = String::new();
// #3014: Anthropic signed-thinking signature for the current
// thinking block; must be replayed verbatim in tool loops.
let mut current_thinking_signature: Option<String> = None;
let mut tool_uses: Vec<ToolUseState> = Vec::new();
let mut usage = Usage {
input_tokens: 0,
@@ -757,6 +760,14 @@ impl Engine {
.await;
}
}
Delta::SignatureDelta { signature } => {
// #3014: capture (and concatenate, defensively)
// the signed-thinking signature for replay.
match current_thinking_signature.as_mut() {
Some(existing) => existing.push_str(&signature),
None => current_thinking_signature = Some(signature),
}
}
Delta::InputJsonDelta { partial_json } => {
if let Some(&tool_idx) = current_tool_indices.get(&index)
&& let Some(tool_state) = tool_uses.get_mut(tool_idx)
@@ -857,6 +868,14 @@ impl Engine {
}
}
StreamEvent::MessageStop | StreamEvent::Ping => {}
StreamEvent::Error { error } => {
// #3014: Anthropic SSE error event. The adapter
// surfaces fatal errors as stream Err items; this
// defensive arm keeps any passed-through error
// visible instead of silently dropped.
crate::logging::warn(format!("Provider stream error event: {error}"));
stream_errors += 1;
}
}
}
@@ -944,7 +963,10 @@ impl Engine {
None
};
if let Some(thinking) = thinking_to_persist {
content_blocks.push(ContentBlock::Thinking { thinking });
content_blocks.push(ContentBlock::Thinking {
thinking,
signature: current_thinking_signature.clone(),
});
}
let mut final_text = current_text_visible.clone();
if tool_uses.is_empty() && tool_parser::has_tool_call_markers(&current_text_raw) {
+5
View File
@@ -2026,6 +2026,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
"OPENAI_API_KEY",
"codewhale auth set --provider openai --api-key \"...\"",
),
crate::config::ApiProvider::Anthropic => (
"ANTHROPIC_API_KEY",
"codewhale auth set --provider anthropic --api-key \"...\"",
),
crate::config::ApiProvider::Atlascloud => (
"ATLASCLOUD_API_KEY",
"codewhale auth set --provider atlascloud --api-key \"...\"",
@@ -2100,6 +2104,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
match config.api_provider() {
crate::config::ApiProvider::NvidiaNim => "nvidia_nim",
crate::config::ApiProvider::Openai => "openai",
crate::config::ApiProvider::Anthropic => "anthropic",
crate::config::ApiProvider::Atlascloud => "atlascloud",
crate::config::ApiProvider::WanjieArk => "wanjie_ark",
crate::config::ApiProvider::Volcengine => "volcengine",
+1 -1
View File
@@ -468,7 +468,7 @@ fn message_response_text(response: &MessageResponse) -> String {
ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => {
append_router_text(&mut out, text);
}
ContentBlock::Thinking { thinking } => {
ContentBlock::Thinking { thinking, .. } => {
append_router_text(&mut out, thinking);
}
ContentBlock::ToolUse { name, .. } => {
+24 -2
View File
@@ -91,7 +91,15 @@ pub enum ContentBlock {
#[serde(rename = "image_url")]
ImageUrl { image_url: ImageUrlContent },
#[serde(rename = "thinking")]
Thinking { thinking: String },
Thinking {
thinking: String,
/// Anthropic signed-thinking signature (#3014). Only populated on the
/// native Messages dialect and serde-skipped when absent so OpenAI
/// dialects are unaffected. Anthropic rejects tool loops that drop or
/// modify signed thinking blocks, so replay this verbatim.
#[serde(skip_serializing_if = "Option::is_none", default)]
signature: Option<String>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@@ -249,6 +257,9 @@ pub fn context_window_for_model(model: &str) -> Option<u32> {
fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
match model_lower {
// Anthropic 4.6+ models carry a 1M window; Haiku stays at 200K (#3014).
"claude-opus-4-8" | "claude-sonnet-4-6" => Some(1_000_000),
"claude-haiku-4-5" => Some(200_000),
"trinity-mini" => Some(128_000),
"arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "trinity-large-preview" => {
Some(262_144)
@@ -289,6 +300,8 @@ pub fn max_output_tokens_for_model(model: &str) -> Option<u32> {
return Some(384_000);
}
match lower.as_str() {
"claude-opus-4-8" => Some(128_000),
"claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000),
"arcee-ai/trinity-large-thinking"
| "trinity-large-thinking"
| "moonshotai/kimi-k2.6"
@@ -326,7 +339,9 @@ pub fn model_supports_reasoning(model: &str) -> bool {
}
matches!(
lower.as_str(),
"arcee-ai/trinity-large-thinking"
"claude-opus-4-8"
| "claude-sonnet-4-6"
| "arcee-ai/trinity-large-thinking"
| "trinity-large-thinking"
| "google/gemma-4-31b-it"
| "google/gemma-4-31b-it:free"
@@ -441,6 +456,9 @@ pub enum StreamEvent {
MessageStop,
#[serde(rename = "ping")]
Ping,
/// Anthropic SSE error event (#3014).
#[serde(rename = "error")]
Error { error: serde_json::Value },
}
#[allow(dead_code)]
@@ -480,6 +498,10 @@ pub enum Delta {
ThinkingDelta { thinking: String },
#[serde(rename = "input_json_delta")]
InputJsonDelta { partial_json: String },
/// Anthropic signed-thinking signature delta (#3014); arrives at the end
/// of a thinking block on the native Messages stream.
#[serde(rename = "signature_delta")]
SignatureDelta { signature: String },
}
#[allow(dead_code)]
+1
View File
@@ -767,6 +767,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "let me think...".to_string(),
},
ContentBlock::Text {
+1 -1
View File
@@ -346,7 +346,7 @@ fn compact_content_block(block: &ContentBlock) -> Value {
"type": "text",
"text": text,
}),
ContentBlock::Thinking { thinking } => json!({
ContentBlock::Thinking { thinking, .. } => json!({
"type": "thinking",
"redacted": true,
"chars": thinking.chars().count(),
+1
View File
@@ -952,6 +952,7 @@ mod tests {
cache_control: None,
},
ContentBlock::Thinking {
signature: None,
thinking: "skip".to_string(),
},
ContentBlock::Text {
+1 -1
View File
@@ -5345,7 +5345,7 @@ fn message_response_text(blocks: &[ContentBlock]) -> String {
}
out.push_str(text);
}
ContentBlock::Thinking { thinking } => {
ContentBlock::Thinking { thinking, .. } => {
if !out.is_empty() {
out.push('\n');
}
+1
View File
@@ -171,6 +171,7 @@ mod tests {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
signature: None,
thinking: "The user seems to be asking me to classify myself.".to_string(),
},
ContentBlock::Text {
+1 -1
View File
@@ -638,7 +638,7 @@ pub fn history_cells_from_message(msg: &Message) -> Vec<HistoryCell> {
_ => {}
}
}
ContentBlock::Thinking { thinking } => {
ContentBlock::Thinking { thinking, .. } => {
if let Some(HistoryCell::Thinking { content, .. }) = cells.last_mut() {
if !content.is_empty() {
content.push('\n');
+4 -2
View File
@@ -104,6 +104,7 @@ impl ProviderPickerView {
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
ApiProvider::NvidiaNim => "NVIDIA_API_KEY",
ApiProvider::Openai => "OPENAI_API_KEY",
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY",
ApiProvider::Volcengine => "VOLCENGINE_API_KEY",
@@ -512,7 +513,8 @@ mod tests {
"Ollama",
"Hugging Face",
"Together AI",
"OpenAI Codex (ChatGPT)"
"OpenAI Codex (ChatGPT)",
"Anthropic"
]
);
}
@@ -547,7 +549,7 @@ mod tests {
let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
picker.handle_key(key(KeyCode::Up));
assert_eq!(picker.selected_provider(), ApiProvider::OpenaiCodex);
assert_eq!(picker.selected_provider(), ApiProvider::Anthropic);
picker.handle_key(key(KeyCode::Down));
assert_eq!(picker.selected_provider(), ApiProvider::Deepseek);
+7 -1
View File
@@ -4998,7 +4998,10 @@ fn push_assistant_message(
) {
let mut blocks = Vec::new();
if let Some(thinking) = thinking {
blocks.push(ContentBlock::Thinking { thinking });
blocks.push(ContentBlock::Thinking {
thinking,
signature: None,
});
}
if !text.is_empty() {
blocks.push(ContentBlock::Text {
@@ -7223,6 +7226,7 @@ fn render(f: &mut Frame, app: &mut App) {
crate::config::ApiProvider::DeepseekCN => None,
crate::config::ApiProvider::NvidiaNim => Some("NIM"),
crate::config::ApiProvider::Openai => Some("OpenAI"),
crate::config::ApiProvider::Anthropic => Some("Claude"),
crate::config::ApiProvider::Atlascloud => Some("Atlas"),
crate::config::ApiProvider::WanjieArk => Some("Wanjie"),
crate::config::ApiProvider::Volcengine => Some("Volc"),
@@ -8274,6 +8278,7 @@ async fn apply_provider_picker_api_key(
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
ApiProvider::Anthropic => &mut providers.anthropic,
};
entry.api_key = Some(api_key);
}
@@ -8333,6 +8338,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider,
ApiProvider::Huggingface => &mut providers.huggingface,
ApiProvider::Together => &mut providers.together,
ApiProvider::OpenaiCodex => &mut providers.openai_codex,
ApiProvider::Anthropic => &mut providers.anthropic,
};
entry.auth_mode = Some(auth_mode);
}
+1 -1
View File
@@ -498,7 +498,7 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
for block in &msg.content {
match block {
ContentBlock::Text { text, .. } => total += text.len(),
ContentBlock::Thinking { thinking } => total += thinking.len(),
ContentBlock::Thinking { thinking, .. } => total += thinking.len(),
ContentBlock::ToolUse { input, .. } => total += input.to_string().len(),
ContentBlock::ToolResult { content, .. } => total += content.len(),
ContentBlock::ServerToolUse { .. }
+2 -1
View File
@@ -79,6 +79,7 @@ fn assistant_thinking(thinking: &str, text: &str) -> Message {
content: vec![
ContentBlock::Thinking {
thinking: thinking.to_string(),
signature: None,
},
ContentBlock::Text {
text: text.to_string(),
@@ -246,7 +247,7 @@ async fn reasoning_replay_required_on_subsequent_turn() {
.content
.iter()
.find_map(|b| match b {
ContentBlock::Thinking { thinking } => Some(thinking.clone()),
ContentBlock::Thinking { thinking, .. } => Some(thinking.clone()),
_ => None,
})
.expect("Thinking block present");
@@ -32,6 +32,7 @@ fn assistant_thinking_tool_call(
content: vec![
ContentBlock::Thinking {
thinking: thinking.to_string(),
signature: None,
},
ContentBlock::ToolUse {
id: id.to_string(),
+3
View File
@@ -462,6 +462,9 @@ Remaining variables:
- `ARCEE_API_KEY`
- `ARCEE_BASE_URL`
- `ARCEE_MODEL`
- `ANTHROPIC_API_KEY`
- `ANTHROPIC_BASE_URL`
- `ANTHROPIC_MODEL`
- `MOONSHOT_API_KEY` or `KIMI_API_KEY`
- `MOONSHOT_BASE_URL` or `KIMI_BASE_URL`
- `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, or `KIMI_MODEL`
+4 -2
View File
@@ -30,8 +30,8 @@ The canonical provider IDs are:
`deepseek`, `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`,
`openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`,
`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`, and
`huggingface`.
`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`,
`huggingface`, `together`, `openai-codex`, and `anthropic`.
Use any of these surfaces to select a provider:
@@ -137,6 +137,7 @@ endpoint.
| `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. |
| `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. |
| `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. |
| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. |
### Hugging Face Provider vs MCP vs Hub
@@ -219,6 +220,7 @@ endpoint when the endpoint supports model listing.
| `huggingface` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | no |
| `together` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
| `openai-codex` | `gpt-5.5` | yes | yes |
| `anthropic` | `claude-opus-4-8`, `claude-sonnet-4-6`, `claude-haiku-4-5` | yes | yes for `claude-opus-4-8` and `claude-sonnet-4-6`; no for `claude-haiku-4-5` |
AtlasCloud keeps the same default model as the config layer and adds
provider-scoped aliases for the Pro and Flash rows. Other AtlasCloud model IDs