diff --git a/config.example.toml b/config.example.toml index 0a66c369..6a0ed0af 100644 --- a/config.example.toml +++ b/config.example.toml @@ -20,7 +20,7 @@ # `api_key` / `base_url` are # still read as DeepSeek defaults when `[providers.deepseek]` is absent # (backward compatibility). -provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface +provider = "deepseek" # deepseek | deepseek-cn | nvidia-nim | openai | atlascloud | wanjie-ark | volcengine | openrouter | xiaomi-mimo | novita | fireworks | siliconflow | siliconflow-CN | arcee | moonshot | sglang | vllm | ollama | huggingface | together | openai-codex | anthropic api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty base_url = "https://api.deepseek.com/beta" # provider = "deepseek-cn" # legacy alias (official host is still https://api.deepseek.com) @@ -440,6 +440,17 @@ max_subagents = 10 # optional (1-20) # base_url = "https://chatgpt.com/backend-api" # model = "gpt-5.5" +# ───────────────────────────────────────────────────────────────────────────────── +# Anthropic Provider (native Messages API) +# Talks to https://api.anthropic.com/v1/messages with x-api-key auth — not an +# OpenAI-compatible route. Models: claude-opus-4-8, claude-sonnet-4-6 (default), +# claude-haiku-4-5. Env vars: ANTHROPIC_API_KEY, ANTHROPIC_BASE_URL, +# ANTHROPIC_MODEL. +[providers.anthropic] +# api_key = "sk-ant-..." +# base_url = "https://api.anthropic.com" +# model = "claude-sonnet-4-6" + # ───────────────────────────────────────────────────────────────────────────────── # Web Search Provider # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index a721781e..93748360 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -767,6 +767,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str { ProviderKind::Huggingface => "huggingface", ProviderKind::Together => "together", ProviderKind::OpenaiCodex => "openai-codex", + ProviderKind::Anthropic => "anthropic", } } @@ -865,6 +866,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] { ], ProviderKind::Together => &["TOGETHER_API_KEY"], ProviderKind::OpenaiCodex => &["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"], + ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"], } } diff --git a/crates/secrets/src/lib.rs b/crates/secrets/src/lib.rs index d9826b45..53e4012a 100644 --- a/crates/secrets/src/lib.rs +++ b/crates/secrets/src/lib.rs @@ -754,6 +754,7 @@ pub fn env_for(name: &str) -> Option { "vllm" | "v-llm" => &["VLLM_API_KEY"], "ollama" | "ollama-local" => &["OLLAMA_API_KEY"], "openai" => &["OPENAI_API_KEY"], + "anthropic" | "claude" => &["ANTHROPIC_API_KEY"], "atlascloud" | "atlas-cloud" | "atlas_cloud" | "atlas" => &["ATLASCLOUD_API_KEY"], "volcengine" | "volcengine-ark" | "volcengine_ark" | "ark" | "volc-ark" | "volcengineark" => &[ diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index f7eb50b8..545b3f28 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -717,7 +717,18 @@ fn build_default_headers( let mut headers = HeaderMap::new(); headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json")); let api_key = api_key.trim(); - let auth_header_name = if !api_key.is_empty() + if api_provider == ApiProvider::Anthropic { + // #3014: the Messages API authenticates with `x-api-key` (never + // `Authorization: Bearer`) and pins the wire contract via + // `anthropic-version`. + headers.insert( + HeaderName::from_static("anthropic-version"), + HeaderValue::from_static("2023-06-01"), + ); + } + let auth_header_name = if !api_key.is_empty() && api_provider == ApiProvider::Anthropic { + Some(HeaderName::from_static("x-api-key")) + } else if !api_key.is_empty() && api_provider == ApiProvider::XiaomiMimo && (xiaomi_mimo_base_url_uses_token_plan(base_url) || xiaomi_mimo_api_key_uses_token_plan(api_key)) @@ -1141,6 +1152,9 @@ impl LlmClient for DeepSeekClient { if self.api_provider == ApiProvider::OpenaiCodex { return self.handle_responses_message(request).await; } + if self.api_provider == ApiProvider::Anthropic { + return self.handle_anthropic_message(request).await; + } self.create_message_chat(&request).await } @@ -1151,6 +1165,9 @@ impl LlmClient for DeepSeekClient { if self.api_provider == ApiProvider::OpenaiCodex { return self.handle_responses_stream(request).await; } + if self.api_provider == ApiProvider::Anthropic { + return self.handle_anthropic_stream(request).await; + } self.handle_chat_completion_stream(request).await } } @@ -1254,6 +1271,11 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Huggingface | ApiProvider::Moonshot | ApiProvider::Ollama => {} + ApiProvider::Anthropic => { + // #3014: thinking/effort shaping happens natively inside + // client/anthropic.rs (adaptive thinking + output_config), + // not via OpenAI-dialect fields. + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": false, @@ -1317,6 +1339,11 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Moonshot | ApiProvider::Ollama | ApiProvider::OpenaiCodex => {} + ApiProvider::Anthropic => { + // #3014: thinking/effort shaping happens natively inside + // client/anthropic.rs (adaptive thinking + output_config), + // not via OpenAI-dialect fields. + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": true, @@ -1361,6 +1388,11 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Moonshot | ApiProvider::Ollama | ApiProvider::OpenaiCodex => {} + ApiProvider::Anthropic => { + // #3014: thinking/effort shaping happens natively inside + // client/anthropic.rs (adaptive thinking + output_config), + // not via OpenAI-dialect fields. + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": true, @@ -1486,6 +1518,7 @@ impl DeepSeekClient { } } +mod anthropic; mod chat; mod responses; @@ -1853,6 +1886,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "plan".to_string(), }, ContentBlock::Text { @@ -1891,6 +1925,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "plan".to_string(), }, ContentBlock::Text { @@ -1940,6 +1975,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need to call a tool".to_string(), }, ContentBlock::ToolUse { @@ -1991,6 +2027,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need to call a tool".to_string(), }, ContentBlock::ToolUse { @@ -2061,6 +2098,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Internal explanation plan".to_string(), }, ContentBlock::Text { @@ -2104,6 +2142,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "I should explain step by step.".to_string(), }, ContentBlock::Text { @@ -2706,7 +2745,7 @@ mod tests { assert!(matches!( response.content.first(), - Some(ContentBlock::Thinking { thinking }) if thinking == "thinking via NIM" + Some(ContentBlock::Thinking { thinking, .. }) if thinking == "thinking via NIM" )); assert!(matches!( response.content.get(1), @@ -2848,6 +2887,7 @@ mod tests { let message = Message { role: "assistant".to_string(), content: vec![ContentBlock::Thinking { + signature: None, thinking: "plan".to_string(), }], }; @@ -2991,6 +3031,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need to inspect the directory".to_string(), }, ContentBlock::ToolUse { @@ -3031,6 +3072,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need to search".to_string(), }, ContentBlock::ToolUse { @@ -3120,6 +3162,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need to list files".to_string(), }, ContentBlock::ToolUse { diff --git a/crates/tui/src/client/anthropic.rs b/crates/tui/src/client/anthropic.rs new file mode 100644 index 00000000..0511791d --- /dev/null +++ b/crates/tui/src/client/anthropic.rs @@ -0,0 +1,958 @@ +//! Native Anthropic Messages API adapter (#3014). +//! +//! CodeWhale's internal wire types are already Anthropic-shaped (the harness +//! speaks Messages internally and translates *out* to OpenAI dialects), so +//! this adapter is mostly native serialization plus an SSE pass-through: +//! `StreamEvent` deserializes Anthropic's `message_start` / +//! `content_block_*` / `message_delta` / `message_stop` / `ping` events +//! directly. What the adapter adds on top: +//! +//! - request shaping: adaptive thinking + `output_config.effort` from +//! CodeWhale's `reasoning_effort` tiers, sampling-parameter rules for +//! models that reject them, and `cache_control` breakpoint placement +//! aligned with the prefix-zone model in `prefix_cache.rs`; +//! - usage normalization (#2961): `prompt_cache_hit_tokens` comes from +//! `cache_read_input_tokens`, `prompt_cache_miss_tokens` is `input_tokens` +//! plus `cache_creation_input_tokens`, and the normalized `input_tokens` +//! is the sum of all three (total prompt, the DeepSeek convention); +//! - signed-thinking handling: `signature_delta` is captured into +//! [`crate::models::Delta::SignatureDelta`] and assistant thinking blocks +//! replay verbatim (signature included); unsigned thinking blocks are +//! dropped from replay because the API rejects them. +//! +//! Modeled on `client/responses.rs` (separate file per dialect, no protocol +//! hacks in the shared paths). + +use anyhow::{Context, Result}; +use serde_json::{Value, json}; + +use crate::llm_client::StreamEventBox; +use crate::logging; +use crate::models::{ContentBlock, MessageRequest, MessageResponse, StreamEvent, Usage}; + +use super::{DeepSeekClient, ERROR_BODY_MAX_BYTES, bounded_error_text}; + +/// Maximum `cache_control` breakpoints Anthropic accepts per request. +const MAX_CACHE_BREAKPOINTS: usize = 4; + +impl DeepSeekClient { + /// Build the native Messages API request body from a [`MessageRequest`]. + pub(super) fn build_anthropic_body(&self, request: &MessageRequest, stream: bool) -> Value { + let mut body = json!({ + "model": request.model, + "max_tokens": request.max_tokens, + "stream": stream, + }); + + if let Some(system) = request.system.as_ref() { + body["system"] = match system { + crate::models::SystemPrompt::Text(text) => json!(text), + crate::models::SystemPrompt::Blocks(blocks) => json!( + blocks + .iter() + .map(|block| { + let mut value = json!({ + "type": "text", + "text": block.text, + }); + if let Some(cache) = block.cache_control.as_ref() { + value["cache_control"] = json!({ "type": cache.cache_type }); + } + value + }) + .collect::>() + ), + }; + } + + body["messages"] = json!( + request + .messages + .iter() + .filter_map(message_to_anthropic) + .collect::>() + ); + + if let Some(tools) = request.tools.as_ref() + && !tools.is_empty() + { + body["tools"] = json!( + tools + .iter() + .map(|tool| { + let mut value = json!({ + "name": tool.name, + "description": tool.description, + "input_schema": tool.input_schema, + }); + if let Some(strict) = tool.strict { + value["strict"] = json!(strict); + } + if let Some(cache) = tool.cache_control.as_ref() { + value["cache_control"] = json!({ "type": cache.cache_type }); + } + value + }) + .collect::>() + ); + } + + if let Some(tool_choice) = request.tool_choice.as_ref() { + body["tool_choice"] = anthropic_tool_choice(tool_choice); + } + + // Thinking + effort shaping. "off" omits thinking entirely; any other + // tier enables adaptive thinking, with `output_config.effort` only on + // models the capability matrix marks as thinking-capable. + let thinking_capable = crate::models::model_supports_reasoning(&request.model); + let effort = request + .reasoning_effort + .as_deref() + .map(|raw| raw.trim().to_ascii_lowercase()); + match effort.as_deref() { + Some("off" | "disabled" | "none" | "false") => {} + Some(level) if thinking_capable => { + body["thinking"] = json!({ "type": "adaptive" }); + let mapped = match level { + "low" | "minimal" => "low", + "medium" | "mid" => "medium", + "max" | "xhigh" | "highest" => "max", + _ => "high", + }; + body["output_config"] = json!({ "effort": mapped }); + } + None if thinking_capable => { + body["thinking"] = json!({ "type": "adaptive" }); + } + _ => {} + } + + // Sampling parameters: Claude 4.7+ rejects temperature/top_p + // entirely; earlier models reject the two together. Send at most one + // (temperature wins), or neither for models that forbid them. + if !anthropic_model_rejects_sampling(&request.model) { + if let Some(temperature) = request.temperature { + body["temperature"] = json!(temperature); + } else if let Some(top_p) = request.top_p { + body["top_p"] = json!(top_p); + } + } + + apply_anthropic_cache_breakpoints(&mut body); + body + } + + async fn send_anthropic_request(&self, body: &Value) -> Result { + let url = anthropic_messages_url(&self.base_url); + self.wait_for_rate_limit().await; + let response = self + .http_client + .post(&url) + .header("Accept", "text/event-stream") + .json(body) + .send() + .await + .context("Anthropic Messages API request failed")?; + + let status = response.status(); + if !status.is_success() { + let raw = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await; + let (error_type, message) = parse_anthropic_error_envelope(&raw); + self.mark_request_failure(&format!("anthropic status={status}")) + .await; + anyhow::bail!("Anthropic API error (HTTP {status} {error_type}): {message}"); + } + self.mark_request_success().await; + Ok(response) + } + + /// Handle a streaming Messages API request. + pub(super) async fn handle_anthropic_stream( + &self, + request: MessageRequest, + ) -> Result { + let body = self.build_anthropic_body(&request, true); + let response = self.send_anthropic_request(&body).await?; + + let stream_idle_timeout = self.stream_idle_timeout; + let byte_stream = response.bytes_stream(); + + let stream = async_stream::stream! { + use futures_util::StreamExt; + + let mut buffer = String::new(); + tokio::pin!(byte_stream); + + loop { + let chunk = match tokio::time::timeout(stream_idle_timeout, byte_stream.next()).await { + Ok(Some(Ok(chunk))) => chunk, + Ok(Some(Err(e))) => { + yield Err(anyhow::anyhow!("Stream read error: {e}")); + return; + } + Ok(None) => break, + Err(_) => { + yield Err(anyhow::anyhow!("Stream idle timeout")); + return; + } + }; + + buffer.push_str(&String::from_utf8_lossy(&chunk)); + + while let Some(line_end) = buffer.find('\n') { + let line = buffer[..line_end].trim().to_string(); + buffer = buffer[line_end + 1..].to_string(); + + // `event:` lines are redundant (the data payload carries + // `type`) and comment/heartbeat lines are ignorable. + let Some(data) = line.strip_prefix("data: ") else { + continue; + }; + + match convert_anthropic_sse_data(data) { + Some(Ok(StreamEvent::Error { error })) => { + let (error_type, message) = anthropic_error_fields(&error); + yield Err(anyhow::anyhow!( + "Anthropic stream error ({error_type}): {message}" + )); + return; + } + Some(Ok(event)) => { + let is_stop = matches!(event, StreamEvent::MessageStop); + yield Ok(event); + if is_stop { + return; + } + } + Some(Err(e)) => { + logging::warn(format!("Failed to parse Anthropic SSE event: {e}")); + } + None => {} + } + } + } + }; + + Ok(Box::pin(stream)) + } + + /// Handle a non-streaming Messages API request. + pub(super) async fn handle_anthropic_message( + &self, + request: MessageRequest, + ) -> Result { + let body = self.build_anthropic_body(&request, false); + let response = self.send_anthropic_request(&body).await?; + let mut value: Value = response + .json() + .await + .context("Failed to parse Anthropic Messages response")?; + if let Some(usage) = value.get_mut("usage") { + *usage = json!(parse_anthropic_usage(usage)); + } + serde_json::from_value(value).context("Failed to decode Anthropic Messages response") + } +} + +/// Build the `/v1/messages` endpoint URL, tolerating base URLs that already +/// carry a `/v1` suffix. +fn anthropic_messages_url(base_url: &str) -> String { + let trimmed = base_url.trim_end_matches('/'); + if trimmed.ends_with("/v1") { + format!("{trimmed}/messages") + } else { + format!("{trimmed}/v1/messages") + } +} + +/// Models that reject `temperature` / `top_p` outright (Claude 4.7+). +fn anthropic_model_rejects_sampling(model: &str) -> bool { + let lower = model.to_ascii_lowercase(); + lower.contains("opus-4-7") + || lower.contains("opus-4-8") + || lower.contains("fable") + || lower.contains("mythos") +} + +/// Convert the engine's `tool_choice` value (OpenAI-style string or object) +/// to the Anthropic object form. +fn anthropic_tool_choice(tool_choice: &Value) -> Value { + match tool_choice.as_str() { + Some("auto") => json!({ "type": "auto" }), + Some("none") => json!({ "type": "none" }), + Some("any" | "required") => json!({ "type": "any" }), + Some(name) => json!({ "type": "tool", "name": name }), + None => tool_choice.clone(), + } +} + +/// Convert one internal message to the Anthropic wire shape. Returns `None` +/// when no blocks survive conversion (Anthropic rejects empty content). +fn message_to_anthropic(message: &crate::models::Message) -> Option { + let blocks: Vec = message + .content + .iter() + .filter_map(content_block_to_anthropic) + .collect(); + if blocks.is_empty() { + return None; + } + Some(json!({ "role": message.role, "content": blocks })) +} + +fn content_block_to_anthropic(block: &ContentBlock) -> Option { + match block { + ContentBlock::Text { + text, + cache_control, + } => { + let mut value = json!({ "type": "text", "text": text }); + if let Some(cache) = cache_control { + value["cache_control"] = json!({ "type": cache.cache_type }); + } + Some(value) + } + ContentBlock::Thinking { + thinking, + signature, + } => { + // Anthropic rejects unsigned thinking blocks on replay (and the + // DeepSeek-era "(reasoning omitted)" placeholders mean nothing to + // it), so only signed blocks are replayed — verbatim, signature + // included. + signature.as_ref().map(|signature| { + json!({ + "type": "thinking", + "thinking": thinking, + "signature": signature, + }) + }) + } + ContentBlock::ToolUse { + id, name, input, .. + } => Some(json!({ + "type": "tool_use", + "id": id, + "name": name, + "input": input, + })), + ContentBlock::ToolResult { + tool_use_id, + content, + is_error, + .. + } => { + let mut value = json!({ + "type": "tool_result", + "tool_use_id": tool_use_id, + "content": content, + }); + if let Some(is_error) = is_error { + value["is_error"] = json!(is_error); + } + Some(value) + } + ContentBlock::ImageUrl { image_url } => Some(json!({ + "type": "image", + "source": { "type": "url", "url": image_url.url }, + })), + // Server-tool block types are DeepSeek/internal concepts with no + // Anthropic client-side wire equivalent. + ContentBlock::ServerToolUse { .. } + | ContentBlock::ToolSearchToolResult { .. } + | ContentBlock::CodeExecutionToolResult { .. } => None, + } +} + +/// Enforce the prefix-zone breakpoint policy (#3014): +/// 1. the last tool in the catalog (or, with no tools, the last system +/// block) — caches the immutable prefix; +/// 2. the last content block of the most recent user turn — caches the +/// append-only history. +/// +/// Caller-provided breakpoints are preserved, but the total is capped at +/// [`MAX_CACHE_BREAKPOINTS`] by dropping the earliest markers first (the +/// latest markers cover the longest prefixes). +fn apply_anthropic_cache_breakpoints(body: &mut Value) { + // Place breakpoint 1: prefer the last tool; otherwise last system block. + let mut placed_prefix = false; + if let Some(tools) = body.get_mut("tools").and_then(Value::as_array_mut) + && let Some(last) = tools.last_mut() + { + last["cache_control"] = json!({ "type": "ephemeral" }); + placed_prefix = true; + } + if !placed_prefix + && let Some(system) = body.get_mut("system").and_then(Value::as_array_mut) + && let Some(last) = system.last_mut() + { + last["cache_control"] = json!({ "type": "ephemeral" }); + } + + // Place breakpoint 2: last content block of the latest user message. + if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) + && let Some(last_user) = messages + .iter_mut() + .rev() + .find(|message| message.get("role").and_then(Value::as_str) == Some("user")) + && let Some(last_block) = last_user + .get_mut("content") + .and_then(Value::as_array_mut) + .and_then(|blocks| blocks.last_mut()) + { + last_block["cache_control"] = json!({ "type": "ephemeral" }); + } + + // Cap at MAX_CACHE_BREAKPOINTS in render order (tools → system → + // messages), dropping the earliest extras. + let mut marked: Vec<*mut Value> = Vec::new(); + let collect = |value: Option<&mut Value>| { + let Some(array) = value.and_then(Value::as_array_mut) else { + return Vec::new(); + }; + array + .iter_mut() + .filter(|item| item.get("cache_control").is_some()) + .map(|item| item as *mut Value) + .collect::>() + }; + marked.extend(collect(body.get_mut("tools"))); + marked.extend(collect(body.get_mut("system"))); + if let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) { + for message in messages.iter_mut() { + if let Some(blocks) = message.get_mut("content").and_then(Value::as_array_mut) { + marked.extend( + blocks + .iter_mut() + .filter(|block| block.get("cache_control").is_some()) + .map(|block| block as *mut Value), + ); + } + } + } + if marked.len() > MAX_CACHE_BREAKPOINTS { + let excess = marked.len() - MAX_CACHE_BREAKPOINTS; + for pointer in marked.into_iter().take(excess) { + // SAFETY: the pointers were collected from `body`, which is + // exclusively borrowed for the duration of this function, and + // each pointer targets a distinct JSON node. + unsafe { + if let Some(map) = (*pointer).as_object_mut() { + map.remove("cache_control"); + } + } + } + } +} + +/// Convert one SSE `data:` payload into a [`StreamEvent`], normalizing usage +/// objects to the #2961 convention. Returns `None` for ignorable payloads. +fn convert_anthropic_sse_data(data: &str) -> Option> { + let trimmed = data.trim(); + if trimmed.is_empty() { + return None; + } + let mut value: Value = match serde_json::from_str(trimmed) { + Ok(value) => value, + Err(e) => return Some(Err(anyhow::anyhow!("invalid SSE JSON: {e}"))), + }; + + match value.get("type").and_then(Value::as_str) { + Some("message_start") => { + if let Some(usage) = value + .get_mut("message") + .and_then(|message| message.get_mut("usage")) + { + *usage = json!(parse_anthropic_usage(usage)); + } + } + Some("message_delta") => { + if let Some(usage) = value.get_mut("usage") { + *usage = json!(parse_anthropic_usage(usage)); + } + } + // Tolerate unknown event types (e.g. future additions) silently. + Some(known) + if !matches!( + known, + "message_start" + | "content_block_start" + | "content_block_delta" + | "content_block_stop" + | "message_delta" + | "message_stop" + | "ping" + | "error" + ) => + { + return None; + } + _ => {} + } + + Some(serde_json::from_value(value).map_err(|e| anyhow::anyhow!("unrecognized SSE event: {e}"))) +} + +/// Map Anthropic's usage payload onto the normalized [`Usage`] convention +/// (#2961): hit = cache reads, miss = uncached input + cache writes, +/// `input_tokens` = the total prompt across all three. +fn parse_anthropic_usage(usage: &Value) -> Usage { + let field = |name: &str| { + usage + .get(name) + .and_then(Value::as_u64) + .and_then(|value| u32::try_from(value).ok()) + .unwrap_or(0) + }; + let input_raw = field("input_tokens"); + let cache_creation = field("cache_creation_input_tokens"); + let cache_read = field("cache_read_input_tokens"); + let output = field("output_tokens"); + + Usage { + input_tokens: input_raw + .saturating_add(cache_creation) + .saturating_add(cache_read), + output_tokens: output, + prompt_cache_hit_tokens: Some(cache_read), + prompt_cache_miss_tokens: Some(input_raw.saturating_add(cache_creation)), + reasoning_tokens: None, + reasoning_replay_tokens: None, + server_tool_use: None, + } +} + +/// Extract `error.type` / `error.message` from an Anthropic error envelope +/// (`{"type":"error","error":{"type":...,"message":...}}`), falling back to +/// the raw body so nothing is swallowed. +fn parse_anthropic_error_envelope(raw: &str) -> (String, String) { + let Ok(value) = serde_json::from_str::(raw) else { + return ("unknown".to_string(), raw.to_string()); + }; + let error = value.get("error").unwrap_or(&value); + anthropic_error_fields(error) +} + +fn anthropic_error_fields(error: &Value) -> (String, String) { + let error_type = error + .get("type") + .and_then(Value::as_str) + .unwrap_or("unknown") + .to_string(); + let message = error + .get("message") + .and_then(Value::as_str) + .map(str::to_string) + .unwrap_or_else(|| error.to_string()); + (error_type, message) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::models::{CacheControl, Message, SystemBlock, SystemPrompt, Tool}; + + fn request_with( + model: &str, + reasoning_effort: Option<&str>, + temperature: Option, + top_p: Option, + ) -> MessageRequest { + MessageRequest { + model: model.to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "hello".to_string(), + cache_control: None, + }], + }], + max_tokens: 1024, + system: Some(SystemPrompt::Blocks(vec![SystemBlock { + block_type: "text".to_string(), + text: "be helpful".to_string(), + cache_control: Some(CacheControl { + cache_type: "ephemeral".to_string(), + }), + }])), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: reasoning_effort.map(str::to_string), + stream: Some(true), + temperature, + top_p, + } + } + + fn test_client() -> DeepSeekClient { + let _ = rustls::crypto::ring::default_provider().install_default(); + let config = crate::config::Config { + provider: Some("anthropic".to_string()), + providers: Some(crate::config::ProvidersConfig { + anthropic: crate::config::ProviderConfig { + api_key: Some("test-key".to_string()), + ..Default::default() + }, + ..Default::default() + }), + ..Default::default() + }; + DeepSeekClient::new(&config).expect("anthropic client constructs") + } + + #[test] + fn body_keeps_native_cache_control_on_system_and_tools() { + let client = test_client(); + let mut request = request_with("claude-sonnet-4-6", Some("high"), None, None); + request.tools = Some(vec![Tool { + tool_type: None, + name: "read_file".to_string(), + description: "Read a file".to_string(), + input_schema: json!({"type": "object", "additionalProperties": false}), + allowed_callers: None, + defer_loading: None, + input_examples: None, + strict: Some(true), + cache_control: None, + }]); + + let body = client.build_anthropic_body(&request, true); + + assert_eq!( + body.pointer("/system/0/cache_control/type") + .and_then(Value::as_str), + Some("ephemeral"), + "system cache_control must survive natively: {body}" + ); + assert_eq!( + body.pointer("/tools/0/strict").and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + body.pointer("/tools/0/cache_control/type") + .and_then(Value::as_str), + Some("ephemeral"), + "breakpoint 1 lands on the last tool: {body}" + ); + // Breakpoint 2 lands on the latest user turn's last block. + assert_eq!( + body.pointer("/messages/0/content/0/cache_control/type") + .and_then(Value::as_str), + Some("ephemeral") + ); + } + + #[test] + fn body_maps_reasoning_effort_to_adaptive_thinking_and_effort() { + let client = test_client(); + + let body = client.build_anthropic_body( + &request_with("claude-sonnet-4-6", Some("high"), None, None), + true, + ); + assert_eq!( + body.pointer("/thinking/type").and_then(Value::as_str), + Some("adaptive") + ); + assert_eq!( + body.pointer("/output_config/effort") + .and_then(Value::as_str), + Some("high") + ); + + let body = client.build_anthropic_body( + &request_with("claude-opus-4-8", Some("xhigh"), None, None), + true, + ); + assert_eq!( + body.pointer("/output_config/effort") + .and_then(Value::as_str), + Some("max") + ); + + let body = client.build_anthropic_body( + &request_with("claude-sonnet-4-6", Some("off"), None, None), + true, + ); + assert!(body.get("thinking").is_none(), "off omits thinking: {body}"); + assert!(body.get("output_config").is_none()); + + // Haiku is not thinking-capable: no thinking, no effort. + let body = client.build_anthropic_body( + &request_with("claude-haiku-4-5", Some("high"), None, None), + true, + ); + assert!(body.get("thinking").is_none(), "{body}"); + assert!(body.get("output_config").is_none(), "{body}"); + } + + #[test] + fn body_drops_sampling_params_for_models_that_reject_them() { + let client = test_client(); + + let body = client.build_anthropic_body( + &request_with("claude-opus-4-8", None, Some(0.7), Some(0.9)), + true, + ); + assert!(body.get("temperature").is_none(), "{body}"); + assert!(body.get("top_p").is_none(), "{body}"); + + // Older models accept ONE of temperature / top_p (temperature wins). + let body = client.build_anthropic_body( + &request_with("claude-sonnet-4-6", None, Some(0.7), Some(0.9)), + true, + ); + assert_eq!( + body.get("temperature").and_then(Value::as_f64), + Some(f64::from(0.7f32)) + ); + assert!(body.get("top_p").is_none(), "never send both: {body}"); + } + + #[test] + fn body_replays_signed_thinking_and_drops_unsigned_placeholders() { + let client = test_client(); + let mut request = request_with("claude-sonnet-4-6", None, None, None); + request.messages = vec![ + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "do the thing".to_string(), + cache_control: None, + }], + }, + Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: "signed reasoning".to_string(), + signature: Some("sig-abc".to_string()), + }, + ContentBlock::Thinking { + thinking: "(reasoning omitted)".to_string(), + signature: None, + }, + ContentBlock::ToolUse { + id: "toolu_1".to_string(), + name: "read_file".to_string(), + input: json!({"path": "a.txt"}), + caller: None, + }, + ], + }, + Message { + role: "user".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: "toolu_1".to_string(), + content: "contents".to_string(), + is_error: None, + content_blocks: None, + }], + }, + ]; + + let body = client.build_anthropic_body(&request, true); + let assistant = &body["messages"][1]["content"]; + assert_eq!(assistant.as_array().map(Vec::len), Some(2)); + assert_eq!( + assistant[0]["signature"].as_str(), + Some("sig-abc"), + "signed thinking replays verbatim: {assistant}" + ); + assert_eq!(assistant[1]["type"].as_str(), Some("tool_use")); + assert!( + assistant[1].get("caller").is_none(), + "internal caller metadata must not reach the wire" + ); + assert_eq!( + body["messages"][2]["content"][0]["type"].as_str(), + Some("tool_result") + ); + } + + #[test] + fn breakpoints_are_capped_at_four_dropping_earliest() { + let client = test_client(); + let mut request = request_with("claude-sonnet-4-6", None, None, None); + // Five caller-marked user turns + the two placed breakpoints. + request.messages = (0..5) + .map(|i| Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: format!("turn {i}"), + cache_control: Some(CacheControl { + cache_type: "ephemeral".to_string(), + }), + }], + }) + .collect(); + + let body = client.build_anthropic_body(&request, true); + let mut count = 0; + if body.pointer("/system/0/cache_control").is_some() { + count += 1; + } + for message in body["messages"].as_array().unwrap() { + for block in message["content"].as_array().unwrap() { + if block.get("cache_control").is_some() { + count += 1; + } + } + } + assert!( + count <= MAX_CACHE_BREAKPOINTS, + "breakpoints must be capped at {MAX_CACHE_BREAKPOINTS}, got {count}: {body}" + ); + // The latest user turn keeps its marker (longest prefix coverage). + assert!( + body.pointer("/messages/4/content/0/cache_control") + .is_some(), + "{body}" + ); + } + + #[test] + fn sse_fixture_decodes_text_thinking_signature_and_tool_use() { + use crate::models::{ContentBlockStart, Delta}; + + let events = [ + r#"{"type":"message_start","message":{"id":"msg_01","type":"message","role":"assistant","content":[],"model":"claude-sonnet-4-6","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":2045,"cache_read_input_tokens":18000,"output_tokens":1}}}"#, + r#"{"type":"content_block_start","index":0,"content_block":{"type":"thinking","thinking":""}}"#, + r#"{"type":"content_block_delta","index":0,"delta":{"type":"thinking_delta","thinking":"Let me check"}}"#, + r#"{"type":"content_block_delta","index":0,"delta":{"type":"signature_delta","signature":"sig-xyz"}}"#, + r#"{"type":"content_block_stop","index":0}"#, + r#"{"type":"content_block_start","index":1,"content_block":{"type":"text","text":""}}"#, + r#"{"type":"content_block_delta","index":1,"delta":{"type":"text_delta","text":"Reading the file."}}"#, + r#"{"type":"content_block_stop","index":1}"#, + r#"{"type":"content_block_start","index":2,"content_block":{"type":"tool_use","id":"toolu_9","name":"read_file","input":{}}}"#, + r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"{\"path\":"}}"#, + r#"{"type":"content_block_delta","index":2,"delta":{"type":"input_json_delta","partial_json":"\"a.txt\"}"}}"#, + r#"{"type":"content_block_stop","index":2}"#, + r#"{"type":"ping"}"#, + r#"{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"output_tokens":42}}"#, + r#"{"type":"message_stop"}"#, + ]; + + let decoded: Vec = events + .iter() + .map(|data| { + convert_anthropic_sse_data(data) + .expect("known event") + .expect("decodes") + }) + .collect(); + + // message_start usage normalized to the #2961 convention. + let StreamEvent::MessageStart { message } = &decoded[0] else { + panic!("expected MessageStart, got {:?}", decoded[0]); + }; + assert_eq!(message.usage.input_tokens, 3 + 2045 + 18000); + assert_eq!(message.usage.prompt_cache_hit_tokens, Some(18000)); + assert_eq!(message.usage.prompt_cache_miss_tokens, Some(3 + 2045)); + + assert!(matches!( + &decoded[1], + StreamEvent::ContentBlockStart { + content_block: ContentBlockStart::Thinking { .. }, + .. + } + )); + assert!(matches!( + &decoded[3], + StreamEvent::ContentBlockDelta { + delta: Delta::SignatureDelta { signature }, + .. + } if signature == "sig-xyz" + )); + assert!(matches!( + &decoded[6], + StreamEvent::ContentBlockDelta { + delta: Delta::TextDelta { text }, + .. + } if text == "Reading the file." + )); + let mut tool_json = String::new(); + for event in &decoded { + if let StreamEvent::ContentBlockDelta { + delta: Delta::InputJsonDelta { partial_json }, + .. + } = event + { + tool_json.push_str(partial_json); + } + } + assert_eq!( + serde_json::from_str::(&tool_json).expect("accumulated tool args parse"), + json!({"path": "a.txt"}) + ); + assert!(matches!(&decoded[12], StreamEvent::Ping)); + let StreamEvent::MessageDelta { delta, usage } = &decoded[13] else { + panic!("expected MessageDelta"); + }; + assert_eq!(delta.stop_reason.as_deref(), Some("tool_use")); + assert_eq!(usage.as_ref().map(|u| u.output_tokens), Some(42)); + assert!(matches!(&decoded[14], StreamEvent::MessageStop)); + } + + #[test] + fn sse_error_event_and_unknown_events_are_handled() { + let error = convert_anthropic_sse_data( + r#"{"type":"error","error":{"type":"overloaded_error","message":"Overloaded"}}"#, + ) + .expect("error event decodes") + .expect("error event is a StreamEvent"); + let StreamEvent::Error { error } = error else { + panic!("expected StreamEvent::Error"); + }; + let (error_type, message) = anthropic_error_fields(&error); + assert_eq!(error_type, "overloaded_error"); + assert_eq!(message, "Overloaded"); + + assert!( + convert_anthropic_sse_data(r#"{"type":"content_block_started_v2","index":0}"#) + .is_none(), + "unknown event types are tolerated" + ); + assert!(convert_anthropic_sse_data(" ").is_none()); + } + + #[test] + fn usage_mapping_handles_missing_cache_fields() { + let usage = parse_anthropic_usage(&json!({"input_tokens": 10, "output_tokens": 5})); + assert_eq!(usage.input_tokens, 10); + assert_eq!(usage.output_tokens, 5); + assert_eq!(usage.prompt_cache_hit_tokens, Some(0)); + assert_eq!(usage.prompt_cache_miss_tokens, Some(10)); + } + + #[test] + fn error_envelope_parses_type_and_message() { + let (error_type, message) = parse_anthropic_error_envelope( + r#"{"type":"error","error":{"type":"rate_limit_error","message":"Too many requests"},"request_id":"req_1"}"#, + ); + assert_eq!(error_type, "rate_limit_error"); + assert_eq!(message, "Too many requests"); + + let (error_type, message) = parse_anthropic_error_envelope("upstream blew up"); + assert_eq!(error_type, "unknown"); + assert_eq!(message, "upstream blew up"); + } + + #[test] + fn messages_url_tolerates_v1_suffix() { + assert_eq!( + anthropic_messages_url("https://api.anthropic.com"), + "https://api.anthropic.com/v1/messages" + ); + assert_eq!( + anthropic_messages_url("https://api.anthropic.com/"), + "https://api.anthropic.com/v1/messages" + ); + assert_eq!( + anthropic_messages_url("https://gateway.example/v1"), + "https://gateway.example/v1/messages" + ); + } +} diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 93b08714..5bf8e65b 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -1415,7 +1415,7 @@ fn build_chat_messages_with_reasoning( }, })); } - ContentBlock::Thinking { thinking } => thinking_parts.push(thinking.clone()), + ContentBlock::Thinking { thinking, .. } => thinking_parts.push(thinking.clone()), ContentBlock::ToolUse { id, name, @@ -2044,6 +2044,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result { reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty()) { content_blocks.push(ContentBlock::Thinking { + signature: None, thinking: reasoning.to_string(), }); } @@ -2142,7 +2143,7 @@ fn build_stream_events(response: &MessageResponse) -> Vec { } events.push(StreamEvent::ContentBlockStop { index }); } - ContentBlock::Thinking { thinking } => { + ContentBlock::Thinking { thinking, .. } => { events.push(StreamEvent::ContentBlockStart { index, content_block: ContentBlockStart::Thinking { diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index 5c3b80a1..069bf769 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -408,9 +408,10 @@ impl DeepSeekClient { text, cache_control: None, }, - ContentBlockStart::Thinking { thinking } => { - ContentBlock::Thinking { thinking } - } + ContentBlockStart::Thinking { thinking } => ContentBlock::Thinking { + thinking, + signature: None, + }, ContentBlockStart::ToolUse { id, name, @@ -440,8 +441,9 @@ impl DeepSeekClient { } } Delta::ThinkingDelta { thinking } => { - if let Some(ContentBlock::Thinking { thinking: existing }) = - response.content.get_mut(i) + if let Some(ContentBlock::Thinking { + thinking: existing, .. + }) = response.content.get_mut(i) { existing.push_str(&thinking); } @@ -451,6 +453,10 @@ impl DeepSeekClient { buf.push_str(&partial_json); } } + Delta::SignatureDelta { .. } => { + // Anthropic-native signature deltas never occur on + // the Responses bridge (#3014). + } } } StreamEvent::ContentBlockStop { index } => { @@ -538,7 +544,7 @@ fn convert_messages_to_responses_input(request: &MessageRequest) -> Vec { "arguments": serde_json::to_string(input).unwrap_or_default(), })); } - ContentBlock::Thinking { thinking } => { + ContentBlock::Thinking { thinking, .. } => { items.push(json!({ "type": "reasoning", "summary": [{ diff --git a/crates/tui/src/commands/config.rs b/crates/tui/src/commands/config.rs index 28c53268..10b0d1d5 100644 --- a/crates/tui/src/commands/config.rs +++ b/crates/tui/src/commands/config.rs @@ -1477,10 +1477,12 @@ mod tests { #[test] fn config_command_provider_rejects_unknown_provider() { let mut app = create_test_app(); - let result = config_command(&mut app, Some("provider anthropic")); + // "anthropic" became a real provider in #3014; probe with an id that + // stays unknown. + let result = config_command(&mut app, Some("provider not-a-provider")); assert!(result.is_error); let msg = result.message.unwrap(); - assert!(msg.contains("Unknown provider 'anthropic'")); + assert!(msg.contains("Unknown provider 'not-a-provider'")); assert!(msg.contains("openrouter")); assert!(msg.contains("xiaomi-mimo")); } diff --git a/crates/tui/src/commands/provider.rs b/crates/tui/src/commands/provider.rs index 911e6299..e5b002c9 100644 --- a/crates/tui/src/commands/provider.rs +++ b/crates/tui/src/commands/provider.rs @@ -140,7 +140,9 @@ mod tests { #[test] fn unknown_provider_returns_error() { let mut app = create_test_app(); - let result = provider(&mut app, Some("anthropic")); + // "anthropic" became a real provider in #3014; probe with an id that + // stays unknown. + let result = provider(&mut app, Some("not-a-provider")); let msg = result.message.expect("expected error message"); assert!(msg.contains("Unknown provider")); assert!(msg.contains("openrouter")); diff --git a/crates/tui/src/compaction.rs b/crates/tui/src/compaction.rs index 72c360cc..87611604 100644 --- a/crates/tui/src/compaction.rs +++ b/crates/tui/src/compaction.rs @@ -607,7 +607,7 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi ContentBlock::Text { text, .. } => text.len() / 4, // Historical reasoning blocks are UI/session metadata for DeepSeek. // Only current-turn tool-call reasoning is sent back to the API. - ContentBlock::Thinking { thinking } if include_thinking => thinking.len() / 4, + ContentBlock::Thinking { thinking, .. } if include_thinking => thinking.len() / 4, ContentBlock::Thinking { .. } => 0, ContentBlock::ToolUse { input, .. } => serde_json::to_string(input) .map(|s| s.len() / 4) @@ -1958,6 +1958,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: thinking.clone(), }, ContentBlock::ToolUse { diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 7f1245e1..aae5c5a3 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -163,6 +163,10 @@ pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ "deepseek/deepseek-v4-flash", ]; pub const OFFICIAL_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"]; +pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6"; +pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8"; +pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5"; +pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com"; #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "snake_case")] @@ -383,6 +387,8 @@ pub struct ModelAliasDeprecation { pub enum RequestPayloadMode { /// Standard OpenAI-compatible `/v1/chat/completions` payload. ChatCompletions, + /// Native Anthropic Messages API `/v1/messages` payload (#3014). + AnthropicMessages, } /// Resolve the provider capability for a given [`ApiProvider`] and resolved @@ -392,6 +398,23 @@ pub enum RequestPayloadMode { /// in the API payload (after normalization / provider-specific mapping). #[must_use] pub fn provider_capability(provider: ApiProvider, resolved_model: &str) -> ProviderCapability { + if matches!(provider, ApiProvider::Anthropic) { + return ProviderCapability { + provider, + resolved_model: resolved_model.to_string(), + // 200K is the conservative Anthropic floor; 4.6+ models resolve + // their 1M windows from models.rs rows (#3014). + context_window: crate::models::context_window_for_model(resolved_model) + .unwrap_or(200_000), + max_output: crate::models::max_output_tokens_for_model(resolved_model) + .unwrap_or(64_000), + thinking_supported: crate::models::model_supports_reasoning(resolved_model), + cache_telemetry_supported: true, + request_payload_mode: RequestPayloadMode::AnthropicMessages, + alias_deprecation: None, + }; + } + if matches!( provider, ApiProvider::Openai | ApiProvider::Atlascloud | ApiProvider::Moonshot @@ -831,6 +854,11 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati ApiProvider::Openai | ApiProvider::Atlascloud => OFFICIAL_DEEPSEEK_MODELS.to_vec(), ApiProvider::Together => vec![DEFAULT_TOGETHER_MODEL], ApiProvider::OpenaiCodex => vec![DEFAULT_OPENAI_CODEX_MODEL], + ApiProvider::Anthropic => vec![ + ANTHROPIC_OPUS_MODEL, + DEFAULT_ANTHROPIC_MODEL, + ANTHROPIC_HAIKU_MODEL, + ], } } @@ -1974,6 +2002,8 @@ pub struct ProvidersConfig { pub together: ProviderConfig, #[serde(default, alias = "openai-codex", alias = "codex", alias = "chatgpt")] pub openai_codex: ProviderConfig, + #[serde(default, alias = "claude")] + pub anthropic: ProviderConfig, } #[derive(Debug, Clone, Deserialize, Default)] @@ -2139,6 +2169,7 @@ impl Config { ApiProvider::NvidiaNim => "providers.nvidia_nim", ApiProvider::Together => "providers.together", ApiProvider::OpenaiCodex => "providers.openai_codex", + ApiProvider::Anthropic => "providers.anthropic", ApiProvider::Deepseek | ApiProvider::DeepseekCN => return, }; tracing::warn!( @@ -2288,6 +2319,7 @@ impl Config { ApiProvider::Huggingface => &providers.huggingface, ApiProvider::Together => &providers.together, ApiProvider::OpenaiCodex => &providers.openai_codex, + ApiProvider::Anthropic => &providers.anthropic, }) } @@ -2314,6 +2346,7 @@ impl Config { ApiProvider::Huggingface => &mut providers.huggingface, ApiProvider::Together => &mut providers.together, ApiProvider::OpenaiCodex => &mut providers.openai_codex, + ApiProvider::Anthropic => &mut providers.anthropic, } } @@ -2437,6 +2470,7 @@ impl Config { ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_MODEL, ApiProvider::Together => DEFAULT_TOGETHER_MODEL, ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_MODEL, + ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL, } .to_string() } @@ -2460,6 +2494,7 @@ impl Config { .filter(|base| base.contains("integrate.api.nvidia.com")) .cloned(), ApiProvider::Openai + | ApiProvider::Anthropic | ApiProvider::Atlascloud | ApiProvider::WanjieArk | ApiProvider::Openrouter @@ -2523,6 +2558,7 @@ impl Config { ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL, ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL, ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL, + ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, } .to_string() }) @@ -2572,6 +2608,7 @@ impl Config { ApiProvider::Huggingface => "huggingface", ApiProvider::Together => "together", ApiProvider::OpenaiCodex => "openai_codex", + ApiProvider::Anthropic => "anthropic", }; // 0. DeepSeek compatibility slot. The legacy top-level `api_key` @@ -2738,6 +2775,11 @@ impl Config { "Together AI API key not found. Run 'codewhale auth set --provider together', \ set TOGETHER_API_KEY, or add [providers.together] api_key in ~/.codewhale/config.toml." ), + ApiProvider::Anthropic => anyhow::bail!( + "Anthropic API key not found. Run 'codewhale auth set --provider anthropic', \ + set ANTHROPIC_API_KEY, or add [providers.anthropic] api_key in ~/.codewhale/config.toml. \ + Keys are created at https://platform.claude.com/." + ), ApiProvider::OpenaiCodex => anyhow::bail!( "OpenAI Codex OAuth credentials not found.\n\ \n\ @@ -3449,6 +3491,13 @@ fn apply_env_overrides(config: &mut Config) { .openai .base_url = Some(value); } + ApiProvider::Anthropic => { + config + .providers + .get_or_insert_with(ProvidersConfig::default) + .anthropic + .base_url = Some(value); + } ApiProvider::Openrouter => { config .providers @@ -3769,6 +3818,7 @@ fn apply_env_overrides(config: &mut Config) { ApiProvider::Huggingface => &mut providers.huggingface, ApiProvider::Together => &mut providers.together, ApiProvider::OpenaiCodex => &mut providers.openai_codex, + ApiProvider::Anthropic => &mut providers.anthropic, }; let mut provider_headers = entry.http_headers.clone().unwrap_or_default(); provider_headers.extend(headers); @@ -3965,6 +4015,7 @@ fn apply_env_overrides(config: &mut Config) { ApiProvider::Huggingface => &mut providers.huggingface, ApiProvider::Together => &mut providers.together, ApiProvider::OpenaiCodex => &mut providers.openai_codex, + ApiProvider::Anthropic => &mut providers.anthropic, }; entry.model = Some(value); } @@ -4289,6 +4340,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str { ApiProvider::Huggingface => DEFAULT_HUGGINGFACE_BASE_URL, ApiProvider::Together => DEFAULT_TOGETHER_BASE_URL, ApiProvider::OpenaiCodex => DEFAULT_OPENAI_CODEX_BASE_URL, + ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, } } @@ -4696,6 +4748,7 @@ fn merge_providers( deepseek_cn: merge_provider_config(base.deepseek_cn, override_cfg.deepseek_cn), nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim), openai: merge_provider_config(base.openai, override_cfg.openai), + anthropic: merge_provider_config(base.anthropic, override_cfg.anthropic), atlascloud: merge_provider_config(base.atlascloud, override_cfg.atlascloud), wanjie_ark: merge_provider_config(base.wanjie_ark, override_cfg.wanjie_ark), openrouter: merge_provider_config(base.openrouter, override_cfg.openrouter), @@ -5164,6 +5217,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool { || std::env::var("NVIDIA_NIM_API_KEY").is_ok_and(|k| !k.trim().is_empty()) } ApiProvider::Openai => std::env::var("OPENAI_API_KEY").is_ok_and(|k| !k.trim().is_empty()), + ApiProvider::Anthropic => { + std::env::var("ANTHROPIC_API_KEY").is_ok_and(|k| !k.trim().is_empty()) + } ApiProvider::Atlascloud => { std::env::var("ATLASCLOUD_API_KEY").is_ok_and(|k| !k.trim().is_empty()) } @@ -5228,6 +5284,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool { ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY", ApiProvider::NvidiaNim => "NVIDIA_API_KEY", ApiProvider::Openai => "OPENAI_API_KEY", + ApiProvider::Anthropic => "ANTHROPIC_API_KEY", ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY", ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY", ApiProvider::Openrouter => "OPENROUTER_API_KEY", @@ -5354,6 +5411,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result } ApiProvider::NvidiaNim => "providers.nvidia_nim", ApiProvider::Openai => "providers.openai", + ApiProvider::Anthropic => "providers.anthropic", ApiProvider::Atlascloud => "providers.atlascloud", ApiProvider::WanjieArk => "providers.wanjie_ark", ApiProvider::Openrouter => "providers.openrouter", @@ -5398,6 +5456,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result } ApiProvider::NvidiaNim => "nvidia_nim", ApiProvider::Openai => "openai", + ApiProvider::Anthropic => "anthropic", ApiProvider::Atlascloud => "atlascloud", ApiProvider::WanjieArk => "wanjie_ark", ApiProvider::Openrouter => "openrouter", @@ -5495,6 +5554,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> { } ApiProvider::NvidiaNim => Ok("nvidia_nim"), ApiProvider::Openai => Ok("openai"), + ApiProvider::Anthropic => Ok("anthropic"), ApiProvider::Atlascloud => Ok("atlascloud"), ApiProvider::WanjieArk => Ok("wanjie_ark"), ApiProvider::Volcengine => Ok("volcengine"), diff --git a/crates/tui/src/config_persistence.rs b/crates/tui/src/config_persistence.rs index eb1cf012..900e0e62 100644 --- a/crates/tui/src/config_persistence.rs +++ b/crates/tui/src/config_persistence.rs @@ -204,6 +204,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static } ApiProvider::NvidiaNim => Ok("nvidia_nim"), ApiProvider::Openai => Ok("openai"), + ApiProvider::Anthropic => Ok("anthropic"), ApiProvider::Atlascloud => Ok("atlascloud"), ApiProvider::WanjieArk => Ok("wanjie_ark"), ApiProvider::Volcengine => Ok("volcengine"), diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index ba9a2f88..0341ef3e 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -600,6 +600,7 @@ impl Engine { ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY", ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY", ApiProvider::Openai => "OPENAI_API_KEY", + ApiProvider::Anthropic => "ANTHROPIC_API_KEY", ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY", ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY", ApiProvider::Volcengine => "VOLCENGINE_API_KEY/VOLCENGINE_ARK_API_KEY/ARK_API_KEY", diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 95e752be..1f84cbaf 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1614,6 +1614,7 @@ async fn session_update_preserves_reasoning_tool_only_turn() { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "Need a tool before answering.".to_string(), }, ContentBlock::ToolUse { diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index afb1a9b0..1bc6d1d1 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -430,6 +430,9 @@ impl Engine { let mut current_text_raw = String::new(); let mut current_text_visible = String::new(); let mut current_thinking = String::new(); + // #3014: Anthropic signed-thinking signature for the current + // thinking block; must be replayed verbatim in tool loops. + let mut current_thinking_signature: Option = None; let mut tool_uses: Vec = Vec::new(); let mut usage = Usage { input_tokens: 0, @@ -757,6 +760,14 @@ impl Engine { .await; } } + Delta::SignatureDelta { signature } => { + // #3014: capture (and concatenate, defensively) + // the signed-thinking signature for replay. + match current_thinking_signature.as_mut() { + Some(existing) => existing.push_str(&signature), + None => current_thinking_signature = Some(signature), + } + } Delta::InputJsonDelta { partial_json } => { if let Some(&tool_idx) = current_tool_indices.get(&index) && let Some(tool_state) = tool_uses.get_mut(tool_idx) @@ -857,6 +868,14 @@ impl Engine { } } StreamEvent::MessageStop | StreamEvent::Ping => {} + StreamEvent::Error { error } => { + // #3014: Anthropic SSE error event. The adapter + // surfaces fatal errors as stream Err items; this + // defensive arm keeps any passed-through error + // visible instead of silently dropped. + crate::logging::warn(format!("Provider stream error event: {error}")); + stream_errors += 1; + } } } @@ -944,7 +963,10 @@ impl Engine { None }; if let Some(thinking) = thinking_to_persist { - content_blocks.push(ContentBlock::Thinking { thinking }); + content_blocks.push(ContentBlock::Thinking { + thinking, + signature: current_thinking_signature.clone(), + }); } let mut final_text = current_text_visible.clone(); if tool_uses.is_empty() && tool_parser::has_tool_call_markers(¤t_text_raw) { diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 8ac5f8f6..32801f57 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1990,6 +1990,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { "OPENAI_API_KEY", "codewhale auth set --provider openai --api-key \"...\"", ), + crate::config::ApiProvider::Anthropic => ( + "ANTHROPIC_API_KEY", + "codewhale auth set --provider anthropic --api-key \"...\"", + ), crate::config::ApiProvider::Atlascloud => ( "ATLASCLOUD_API_KEY", "codewhale auth set --provider atlascloud --api-key \"...\"", @@ -2064,6 +2068,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { match config.api_provider() { crate::config::ApiProvider::NvidiaNim => "nvidia_nim", crate::config::ApiProvider::Openai => "openai", + crate::config::ApiProvider::Anthropic => "anthropic", crate::config::ApiProvider::Atlascloud => "atlascloud", crate::config::ApiProvider::WanjieArk => "wanjie_ark", crate::config::ApiProvider::Volcengine => "volcengine", diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index 9e9b483e..af3980b6 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -334,7 +334,7 @@ fn message_response_text(response: &MessageResponse) -> String { ContentBlock::Text { text, .. } | ContentBlock::ToolResult { content: text, .. } => { append_router_text(&mut out, text); } - ContentBlock::Thinking { thinking } => { + ContentBlock::Thinking { thinking, .. } => { append_router_text(&mut out, thinking); } ContentBlock::ToolUse { name, .. } => { diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 80849c94..ed091521 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -91,7 +91,15 @@ pub enum ContentBlock { #[serde(rename = "image_url")] ImageUrl { image_url: ImageUrlContent }, #[serde(rename = "thinking")] - Thinking { thinking: String }, + Thinking { + thinking: String, + /// Anthropic signed-thinking signature (#3014). Only populated on the + /// native Messages dialect and serde-skipped when absent so OpenAI + /// dialects are unaffected. Anthropic rejects tool loops that drop or + /// modify signed thinking blocks, so replay this verbatim. + #[serde(skip_serializing_if = "Option::is_none", default)] + signature: Option, + }, #[serde(rename = "tool_use")] ToolUse { id: String, @@ -249,6 +257,9 @@ pub fn context_window_for_model(model: &str) -> Option { fn known_context_window_for_model(model_lower: &str) -> Option { match model_lower { + // Anthropic 4.6+ models carry a 1M window; Haiku stays at 200K (#3014). + "claude-opus-4-8" | "claude-sonnet-4-6" => Some(1_000_000), + "claude-haiku-4-5" => Some(200_000), "trinity-mini" => Some(128_000), "arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "trinity-large-preview" => { Some(262_144) @@ -285,6 +296,8 @@ pub fn max_output_tokens_for_model(model: &str) -> Option { return Some(384_000); } match lower.as_str() { + "claude-opus-4-8" => Some(128_000), + "claude-sonnet-4-6" | "claude-haiku-4-5" => Some(64_000), "arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "moonshotai/kimi-k2.6" => { Some(262_144) } @@ -314,7 +327,9 @@ pub fn model_supports_reasoning(model: &str) -> bool { } matches!( lower.as_str(), - "arcee-ai/trinity-large-thinking" + "claude-opus-4-8" + | "claude-sonnet-4-6" + | "arcee-ai/trinity-large-thinking" | "trinity-large-thinking" | "google/gemma-4-31b-it" | "google/gemma-4-31b-it:free" @@ -426,6 +441,9 @@ pub enum StreamEvent { MessageStop, #[serde(rename = "ping")] Ping, + /// Anthropic SSE error event (#3014). + #[serde(rename = "error")] + Error { error: serde_json::Value }, } #[allow(dead_code)] @@ -465,6 +483,10 @@ pub enum Delta { ThinkingDelta { thinking: String }, #[serde(rename = "input_json_delta")] InputJsonDelta { partial_json: String }, + /// Anthropic signed-thinking signature delta (#3014); arrives at the end + /// of a thinking block on the native Messages stream. + #[serde(rename = "signature_delta")] + SignatureDelta { signature: String }, } #[allow(dead_code)] diff --git a/crates/tui/src/purge.rs b/crates/tui/src/purge.rs index 9a423404..82052b14 100644 --- a/crates/tui/src/purge.rs +++ b/crates/tui/src/purge.rs @@ -767,6 +767,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "let me think...".to_string(), }, ContentBlock::Text { diff --git a/crates/tui/src/rlm/session.rs b/crates/tui/src/rlm/session.rs index 551e1bc5..3f303bf8 100644 --- a/crates/tui/src/rlm/session.rs +++ b/crates/tui/src/rlm/session.rs @@ -346,7 +346,7 @@ fn compact_content_block(block: &ContentBlock) -> Value { "type": "text", "text": text, }), - ContentBlock::Thinking { thinking } => json!({ + ContentBlock::Thinking { thinking, .. } => json!({ "type": "thinking", "redacted": true, "chars": thinking.chars().count(), diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs index 7ade29e4..3eb698ae 100644 --- a/crates/tui/src/rlm/turn.rs +++ b/crates/tui/src/rlm/turn.rs @@ -952,6 +952,7 @@ mod tests { cache_control: None, }, ContentBlock::Thinking { + signature: None, thinking: "skip".to_string(), }, ContentBlock::Text { diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 61265544..2dd84378 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -5249,7 +5249,7 @@ fn message_response_text(blocks: &[ContentBlock]) -> String { } out.push_str(text); } - ContentBlock::Thinking { thinking } => { + ContentBlock::Thinking { thinking, .. } => { if !out.is_empty() { out.push('\n'); } diff --git a/crates/tui/src/tui/auto_router.rs b/crates/tui/src/tui/auto_router.rs index 4d5414b9..b7a65140 100644 --- a/crates/tui/src/tui/auto_router.rs +++ b/crates/tui/src/tui/auto_router.rs @@ -171,6 +171,7 @@ mod tests { role: "assistant".to_string(), content: vec![ ContentBlock::Thinking { + signature: None, thinking: "The user seems to be asking me to classify myself.".to_string(), }, ContentBlock::Text { diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs index 9ca7fb54..623f9484 100644 --- a/crates/tui/src/tui/history.rs +++ b/crates/tui/src/tui/history.rs @@ -638,7 +638,7 @@ pub fn history_cells_from_message(msg: &Message) -> Vec { _ => {} } } - ContentBlock::Thinking { thinking } => { + ContentBlock::Thinking { thinking, .. } => { if let Some(HistoryCell::Thinking { content, .. }) = cells.last_mut() { if !content.is_empty() { content.push('\n'); diff --git a/crates/tui/src/tui/provider_picker.rs b/crates/tui/src/tui/provider_picker.rs index 8b51455a..6887e84f 100644 --- a/crates/tui/src/tui/provider_picker.rs +++ b/crates/tui/src/tui/provider_picker.rs @@ -104,6 +104,7 @@ impl ProviderPickerView { ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY", ApiProvider::NvidiaNim => "NVIDIA_API_KEY", ApiProvider::Openai => "OPENAI_API_KEY", + ApiProvider::Anthropic => "ANTHROPIC_API_KEY", ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY", ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY", ApiProvider::Volcengine => "VOLCENGINE_API_KEY", @@ -512,7 +513,8 @@ mod tests { "Ollama", "Hugging Face", "Together AI", - "OpenAI Codex (ChatGPT)" + "OpenAI Codex (ChatGPT)", + "Anthropic" ] ); } @@ -547,7 +549,7 @@ mod tests { let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config); picker.handle_key(key(KeyCode::Up)); - assert_eq!(picker.selected_provider(), ApiProvider::OpenaiCodex); + assert_eq!(picker.selected_provider(), ApiProvider::Anthropic); picker.handle_key(key(KeyCode::Down)); assert_eq!(picker.selected_provider(), ApiProvider::Deepseek); diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 09c13443..9c873f92 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -4944,7 +4944,10 @@ fn push_assistant_message( ) { let mut blocks = Vec::new(); if let Some(thinking) = thinking { - blocks.push(ContentBlock::Thinking { thinking }); + blocks.push(ContentBlock::Thinking { + thinking, + signature: None, + }); } if !text.is_empty() { blocks.push(ContentBlock::Text { @@ -7164,6 +7167,7 @@ fn render(f: &mut Frame, app: &mut App) { crate::config::ApiProvider::DeepseekCN => None, crate::config::ApiProvider::NvidiaNim => Some("NIM"), crate::config::ApiProvider::Openai => Some("OpenAI"), + crate::config::ApiProvider::Anthropic => Some("Claude"), crate::config::ApiProvider::Atlascloud => Some("Atlas"), crate::config::ApiProvider::WanjieArk => Some("Wanjie"), crate::config::ApiProvider::Volcengine => Some("Volc"), @@ -8224,6 +8228,7 @@ async fn apply_provider_picker_api_key( ApiProvider::Huggingface => &mut providers.huggingface, ApiProvider::Together => &mut providers.together, ApiProvider::OpenaiCodex => &mut providers.openai_codex, + ApiProvider::Anthropic => &mut providers.anthropic, }; entry.api_key = Some(api_key); } @@ -8283,6 +8288,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider, ApiProvider::Huggingface => &mut providers.huggingface, ApiProvider::Together => &mut providers.together, ApiProvider::OpenaiCodex => &mut providers.openai_codex, + ApiProvider::Anthropic => &mut providers.anthropic, }; entry.auth_mode = Some(auth_mode); } diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs index 8fdf019d..b7f99baf 100644 --- a/crates/tui/src/utils.rs +++ b/crates/tui/src/utils.rs @@ -498,7 +498,7 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize { for block in &msg.content { match block { ContentBlock::Text { text, .. } => total += text.len(), - ContentBlock::Thinking { thinking } => total += thinking.len(), + ContentBlock::Thinking { thinking, .. } => total += thinking.len(), ContentBlock::ToolUse { input, .. } => total += input.to_string().len(), ContentBlock::ToolResult { content, .. } => total += content.len(), ContentBlock::ServerToolUse { .. } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f0e0338f..4b3f830f 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -462,6 +462,9 @@ Remaining variables: - `ARCEE_API_KEY` - `ARCEE_BASE_URL` - `ARCEE_MODEL` +- `ANTHROPIC_API_KEY` +- `ANTHROPIC_BASE_URL` +- `ANTHROPIC_MODEL` - `MOONSHOT_API_KEY` or `KIMI_API_KEY` - `MOONSHOT_BASE_URL` or `KIMI_BASE_URL` - `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, or `KIMI_MODEL` diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index 89556be3..dc224534 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -30,8 +30,8 @@ The canonical provider IDs are: `deepseek`, `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, -`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`, and -`huggingface`. +`siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, `ollama`, +`huggingface`, `together`, `openai-codex`, and `anthropic`. Use any of these surfaces to select a provider: @@ -137,6 +137,7 @@ endpoint. | `huggingface` | `[providers.huggingface]` | `HUGGINGFACE_API_KEY`, `HF_TOKEN` | `HUGGINGFACE_BASE_URL`; default `https://router.huggingface.co/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Hugging Face Inference Providers OpenAI-compatible route. Org-prefixed model IDs pass through. | | `together` | `[providers.together]` | `TOGETHER_API_KEY` | `TOGETHER_BASE_URL`; default `https://api.together.xyz/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Together AI OpenAI-compatible route. `TOGETHER_MODEL` is accepted. Model aliases `deepseek-v4-pro` and `deepseek-v4-flash` normalize to Together's org-prefixed IDs. | | `openai-codex` | `[providers.openai_codex]` | OAuth via `codex login` (`~/.codex/auth.json`); env override `OPENAI_CODEX_ACCESS_TOKEN`, `CODEX_ACCESS_TOKEN` | `OPENAI_CODEX_BASE_URL`/`CODEX_BASE_URL`; default `https://chatgpt.com/backend-api` | `gpt-5.5` | **Experimental.** Reuses your existing ChatGPT/Codex CLI OAuth login and talks to the OpenAI Responses API at `/codex/responses`. The access token is read and refreshed from `~/.codex/auth.json`; no API key is stored. `OPENAI_CODEX_MODEL`/`CODEX_MODEL` and `OPENAI_CODEX_ACCOUNT_ID`/`CODEX_ACCOUNT_ID` are accepted. | +| `anthropic` | `[providers.anthropic]` | `ANTHROPIC_API_KEY` | `ANTHROPIC_BASE_URL`; default `https://api.anthropic.com` | `claude-opus-4-8`, `claude-sonnet-4-6` (default), `claude-haiku-4-5` | Native Anthropic Messages API route (`/v1/messages`, `x-api-key` + `anthropic-version: 2023-06-01`) — not OpenAI-compatible. Prompt caching via `cache_control` breakpoints, adaptive thinking + `output_config.effort`, signed thinking blocks replayed verbatim, cache telemetry normalized per #2961. `ANTHROPIC_MODEL` is accepted. | ### Hugging Face Provider vs MCP vs Hub @@ -219,6 +220,7 @@ endpoint when the endpoint supports model listing. | `huggingface` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | no | | `together` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `openai-codex` | `gpt-5.5` | yes | yes | +| `anthropic` | `claude-opus-4-8`, `claude-sonnet-4-6`, `claude-haiku-4-5` | yes | yes for `claude-opus-4-8` and `claude-sonnet-4-6`; no for `claude-haiku-4-5` | AtlasCloud keeps the same default model as the config layer and adds provider-scoped aliases for the Pro and Flash rows. Other AtlasCloud model IDs