feat(client): remove dead responses_api_proxy module and EXPERIMENTAL_RESPONSES_API_ENV plumbing
Deletes crates/tui/src/responses_api_proxy/ (443 LOC), client/responses.rs (406 LOC), and removes the ResponsesApiProxy CLI command, the EXPERIMENTAL_RESPONSES_API_ENV env var plumbing, chat_fallback_counter, use_chat_completions, RESPONSES_RECOVERY_INTERVAL, and the RequestPayloadMode::ResponsesApi variant. The experimental Responses API path was never instantiated and had no documented users; removing it simplifies the client surface for the upcoming --anthropic-wire flag. Closes #723
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,406 +0,0 @@
|
||||
//! Responses API helpers for the experimental DeepSeek endpoint.
|
||||
//!
|
||||
//! Gated behind `DEEPSEEK_EXPERIMENTAL_RESPONSES_API`. Normal traffic uses
|
||||
//! chat completions via `crate::client::chat`.
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use serde_json::{Value, json};
|
||||
|
||||
use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, Tool, ToolCaller};
|
||||
|
||||
use super::{
|
||||
DeepSeekClient, ERROR_BODY_MAX_BYTES, api_url, apply_reasoning_effort, bounded_error_text,
|
||||
from_api_tool_name, parse_usage, system_to_instructions, to_api_tool_name,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct ResponsesFallback {
|
||||
pub(super) status: u16,
|
||||
pub(super) body: String,
|
||||
}
|
||||
|
||||
impl DeepSeekClient {
|
||||
pub(super) async fn create_message_responses(
|
||||
&self,
|
||||
request: &MessageRequest,
|
||||
) -> Result<Result<MessageResponse, ResponsesFallback>> {
|
||||
let mut body = json!({
|
||||
"model": request.model,
|
||||
"input": build_responses_input(&request.messages),
|
||||
"store": false,
|
||||
"max_output_tokens": request.max_tokens,
|
||||
});
|
||||
|
||||
if let Some(instructions) = system_to_instructions(request.system.clone()) {
|
||||
body["instructions"] = json!(instructions);
|
||||
}
|
||||
if let Some(temperature) = request.temperature {
|
||||
body["temperature"] = json!(temperature);
|
||||
}
|
||||
if let Some(top_p) = request.top_p {
|
||||
body["top_p"] = json!(top_p);
|
||||
}
|
||||
if let Some(tools) = request.tools.as_ref() {
|
||||
body["tools"] = json!(tools.iter().map(tool_to_responses).collect::<Vec<_>>());
|
||||
}
|
||||
if let Some(choice) = request.tool_choice.as_ref() {
|
||||
body["tool_choice"] = choice.clone();
|
||||
}
|
||||
apply_reasoning_effort(
|
||||
&mut body,
|
||||
request.reasoning_effort.as_deref(),
|
||||
self.api_provider,
|
||||
);
|
||||
|
||||
let url = api_url(&self.base_url, "responses");
|
||||
let response = self
|
||||
.send_with_retry(|| self.http_client.post(&url).json(&body))
|
||||
.await?;
|
||||
|
||||
let status = response.status();
|
||||
|
||||
if status.as_u16() == 404 || status.as_u16() == 405 {
|
||||
let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
|
||||
return Ok(Err(ResponsesFallback {
|
||||
status: status.as_u16(),
|
||||
body,
|
||||
}));
|
||||
}
|
||||
|
||||
if !status.is_success() {
|
||||
let error_text = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
|
||||
anyhow::bail!("Failed to call DeepSeek Responses API: HTTP {status}: {error_text}");
|
||||
}
|
||||
|
||||
let response_text = response.text().await.unwrap_or_default();
|
||||
let value: Value =
|
||||
serde_json::from_str(&response_text).context("Failed to parse Responses API JSON")?;
|
||||
let message = parse_responses_message(&value)?;
|
||||
Ok(Ok(message))
|
||||
}
|
||||
}
|
||||
|
||||
fn build_responses_input(messages: &[Message]) -> Vec<Value> {
|
||||
let mut items = Vec::new();
|
||||
|
||||
for message in messages {
|
||||
let role = message.role.as_str();
|
||||
let text_type = if role == "user" {
|
||||
"input_text"
|
||||
} else {
|
||||
"output_text"
|
||||
};
|
||||
|
||||
for block in &message.content {
|
||||
match block {
|
||||
ContentBlock::Text { text, .. } => {
|
||||
items.push(json!({
|
||||
"type": "message",
|
||||
"role": role,
|
||||
"content": [{
|
||||
"type": text_type,
|
||||
"text": text,
|
||||
}]
|
||||
}));
|
||||
}
|
||||
ContentBlock::ToolUse {
|
||||
id,
|
||||
name,
|
||||
input,
|
||||
caller,
|
||||
} => {
|
||||
let args = serde_json::to_string(input).unwrap_or_else(|_| input.to_string());
|
||||
let mut item = json!({
|
||||
"type": "function_call",
|
||||
"call_id": id,
|
||||
"name": to_api_tool_name(name),
|
||||
"arguments": args,
|
||||
});
|
||||
if let Some(caller) = caller {
|
||||
item["caller"] = json!({
|
||||
"type": caller.caller_type,
|
||||
"tool_id": caller.tool_id,
|
||||
});
|
||||
}
|
||||
items.push(item);
|
||||
}
|
||||
ContentBlock::ToolResult {
|
||||
tool_use_id,
|
||||
content,
|
||||
is_error,
|
||||
..
|
||||
} => {
|
||||
let mut item = json!({
|
||||
"type": "function_call_output",
|
||||
"call_id": tool_use_id,
|
||||
"output": content,
|
||||
});
|
||||
if let Some(is_error) = is_error {
|
||||
item["is_error"] = json!(is_error);
|
||||
}
|
||||
items.push(item);
|
||||
}
|
||||
ContentBlock::Thinking { .. } => {}
|
||||
ContentBlock::ServerToolUse { id, name, input } => {
|
||||
items.push(json!({
|
||||
"type": "server_tool_use",
|
||||
"id": id,
|
||||
"name": name,
|
||||
"input": input,
|
||||
}));
|
||||
}
|
||||
ContentBlock::ToolSearchToolResult {
|
||||
tool_use_id,
|
||||
content,
|
||||
} => {
|
||||
items.push(json!({
|
||||
"type": "tool_search_tool_result",
|
||||
"tool_use_id": tool_use_id,
|
||||
"content": content,
|
||||
}));
|
||||
}
|
||||
ContentBlock::CodeExecutionToolResult {
|
||||
tool_use_id,
|
||||
content,
|
||||
} => {
|
||||
items.push(json!({
|
||||
"type": "code_execution_tool_result",
|
||||
"tool_use_id": tool_use_id,
|
||||
"content": content,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
items
|
||||
}
|
||||
|
||||
fn tool_to_responses(tool: &Tool) -> Value {
|
||||
let tool_type = tool.tool_type.as_deref().unwrap_or("function");
|
||||
let mut value = if tool_type == "function" {
|
||||
json!({
|
||||
"type": "function",
|
||||
"name": to_api_tool_name(&tool.name),
|
||||
"description": tool.description,
|
||||
"parameters": tool.input_schema,
|
||||
})
|
||||
} else if tool_type == "code_execution_20250825" {
|
||||
json!({
|
||||
"type": tool_type,
|
||||
"name": to_api_tool_name(&tool.name),
|
||||
})
|
||||
} else {
|
||||
json!({
|
||||
"type": tool_type,
|
||||
"name": to_api_tool_name(&tool.name),
|
||||
"description": tool.description,
|
||||
"input_schema": tool.input_schema,
|
||||
})
|
||||
};
|
||||
|
||||
if let Some(allowed_callers) = &tool.allowed_callers {
|
||||
value["allowed_callers"] = json!(allowed_callers);
|
||||
}
|
||||
if let Some(defer_loading) = tool.defer_loading {
|
||||
value["defer_loading"] = json!(defer_loading);
|
||||
}
|
||||
if let Some(input_examples) = &tool.input_examples {
|
||||
value["input_examples"] = json!(input_examples);
|
||||
}
|
||||
if let Some(strict) = tool.strict {
|
||||
value["strict"] = json!(strict);
|
||||
}
|
||||
value
|
||||
}
|
||||
|
||||
fn parse_responses_message(payload: &Value) -> Result<MessageResponse> {
|
||||
let id = payload
|
||||
.get("id")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("response")
|
||||
.to_string();
|
||||
let model = payload
|
||||
.get("model")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("unknown")
|
||||
.to_string();
|
||||
|
||||
let usage = parse_usage(payload.get("usage"));
|
||||
let mut content = Vec::new();
|
||||
|
||||
if let Some(output) = payload.get("output").and_then(Value::as_array) {
|
||||
for item in output {
|
||||
let item_type = item.get("type").and_then(Value::as_str).unwrap_or("");
|
||||
match item_type {
|
||||
"message" => {
|
||||
if let Some(role) = item.get("role").and_then(Value::as_str)
|
||||
&& role != "assistant"
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if let Some(content_items) = item.get("content").and_then(Value::as_array) {
|
||||
for content_item in content_items {
|
||||
let content_type = content_item
|
||||
.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("output_text");
|
||||
if content_type != "output_text" && content_type != "text" {
|
||||
continue;
|
||||
}
|
||||
if let Some(text) = content_item.get("text").and_then(Value::as_str)
|
||||
&& !text.trim().is_empty()
|
||||
{
|
||||
content.push(ContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"function_call" => {
|
||||
let call_id = item
|
||||
.get("call_id")
|
||||
.or_else(|| item.get("id"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("tool_call")
|
||||
.to_string();
|
||||
let name = item
|
||||
.get("name")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("tool")
|
||||
.to_string();
|
||||
let input = match item.get("arguments") {
|
||||
Some(Value::String(raw)) => {
|
||||
serde_json::from_str(raw).unwrap_or_else(|_| Value::String(raw.clone()))
|
||||
}
|
||||
Some(other) => other.clone(),
|
||||
None => Value::Null,
|
||||
};
|
||||
let caller = item.get("caller").and_then(|v| {
|
||||
v.get("type")
|
||||
.and_then(Value::as_str)
|
||||
.map(|caller_type| ToolCaller {
|
||||
caller_type: caller_type.to_string(),
|
||||
tool_id: v
|
||||
.get("tool_id")
|
||||
.and_then(Value::as_str)
|
||||
.map(std::string::ToString::to_string),
|
||||
})
|
||||
});
|
||||
content.push(ContentBlock::ToolUse {
|
||||
id: call_id,
|
||||
name: from_api_tool_name(&name),
|
||||
input,
|
||||
caller,
|
||||
});
|
||||
}
|
||||
"function_call_output" => {
|
||||
let tool_use_id = item
|
||||
.get("call_id")
|
||||
.or_else(|| item.get("tool_use_id"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("tool_call")
|
||||
.to_string();
|
||||
let content_text = item
|
||||
.get("output")
|
||||
.or_else(|| item.get("content"))
|
||||
.map(|v| {
|
||||
if let Some(s) = v.as_str() {
|
||||
s.to_string()
|
||||
} else {
|
||||
v.to_string()
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let is_error = item.get("is_error").and_then(Value::as_bool);
|
||||
content.push(ContentBlock::ToolResult {
|
||||
tool_use_id,
|
||||
content: content_text,
|
||||
is_error,
|
||||
content_blocks: None,
|
||||
});
|
||||
}
|
||||
"server_tool_use" => {
|
||||
let id = item
|
||||
.get("id")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("server_tool")
|
||||
.to_string();
|
||||
let name = item
|
||||
.get("name")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("server_tool")
|
||||
.to_string();
|
||||
let input = item.get("input").cloned().unwrap_or(Value::Null);
|
||||
content.push(ContentBlock::ServerToolUse { id, name, input });
|
||||
}
|
||||
"tool_search_tool_result" => {
|
||||
let tool_use_id = item
|
||||
.get("tool_use_id")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("tool_search")
|
||||
.to_string();
|
||||
let content_value = item.get("content").cloned().unwrap_or(Value::Null);
|
||||
content.push(ContentBlock::ToolSearchToolResult {
|
||||
tool_use_id,
|
||||
content: content_value,
|
||||
});
|
||||
}
|
||||
"code_execution_tool_result" => {
|
||||
let tool_use_id = item
|
||||
.get("tool_use_id")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("code_execution")
|
||||
.to_string();
|
||||
let content_value = item.get("content").cloned().unwrap_or(Value::Null);
|
||||
content.push(ContentBlock::CodeExecutionToolResult {
|
||||
tool_use_id,
|
||||
content: content_value,
|
||||
});
|
||||
}
|
||||
"reasoning" => {
|
||||
if let Some(summary) = item.get("summary").and_then(Value::as_array) {
|
||||
let summary_text = summary
|
||||
.iter()
|
||||
.filter_map(|s| s.get("text").and_then(Value::as_str))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
if !summary_text.trim().is_empty() {
|
||||
content.push(ContentBlock::Thinking {
|
||||
thinking: summary_text,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if content.is_empty()
|
||||
&& let Some(text) = payload.get("output_text").and_then(Value::as_str)
|
||||
&& !text.trim().is_empty()
|
||||
{
|
||||
content.push(ContentBlock::Text {
|
||||
text: text.to_string(),
|
||||
cache_control: None,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(MessageResponse {
|
||||
id,
|
||||
r#type: "message".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content,
|
||||
model,
|
||||
stop_reason: None,
|
||||
stop_sequence: None,
|
||||
container: payload
|
||||
.get("container")
|
||||
.cloned()
|
||||
.and_then(|v| serde_json::from_value(v).ok()),
|
||||
usage,
|
||||
})
|
||||
}
|
||||
@@ -144,8 +144,6 @@ pub struct ProviderCapability {
|
||||
pub enum RequestPayloadMode {
|
||||
/// Standard OpenAI-compatible `/v1/chat/completions` payload.
|
||||
ChatCompletions,
|
||||
/// Anthropic-style Responses API (DeepSeek experimental).
|
||||
ResponsesApi,
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -48,7 +48,6 @@ mod project_context;
|
||||
mod project_doc;
|
||||
mod prompts;
|
||||
pub mod repl;
|
||||
mod responses_api_proxy;
|
||||
mod retry_status;
|
||||
pub mod rlm;
|
||||
mod runtime_api;
|
||||
@@ -243,9 +242,6 @@ enum Commands {
|
||||
#[arg(long = "last", default_value_t = false, conflicts_with = "session_id")]
|
||||
last: bool,
|
||||
},
|
||||
/// Internal: run the responses API proxy.
|
||||
#[command(hide = true)]
|
||||
ResponsesApiProxy(responses_api_proxy::Args),
|
||||
}
|
||||
|
||||
#[derive(Args, Debug, Clone)]
|
||||
@@ -726,10 +722,6 @@ async fn main() -> Result<()> {
|
||||
let new_session_id = fork_session(session_id, last)?;
|
||||
run_interactive(&cli, &config, Some(new_session_id), None).await
|
||||
}
|
||||
Commands::ResponsesApiProxy(args) => {
|
||||
responses_api_proxy::run_main(args)?;
|
||||
Ok(())
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::net::{SocketAddr, TcpListener};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use clap::Parser;
|
||||
use reqwest::Url;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{AUTHORIZATION, HOST, HeaderMap, HeaderName, HeaderValue};
|
||||
use serde::Serialize;
|
||||
use tiny_http::{Header, Method, Request, Response, Server, StatusCode};
|
||||
|
||||
mod read_api_key;
|
||||
use read_api_key::read_auth_header_from_stdin;
|
||||
|
||||
/// CLI arguments for the proxy.
|
||||
#[derive(Debug, Clone, Parser)]
|
||||
#[command(
|
||||
name = "responses-api-proxy",
|
||||
about = "Minimal DeepSeek responses proxy"
|
||||
)]
|
||||
pub struct Args {
|
||||
/// Port to listen on. If not set, an ephemeral port is used.
|
||||
#[arg(long)]
|
||||
pub port: Option<u16>,
|
||||
|
||||
/// Path to a JSON file to write startup info (single line). Includes `{"port": 12345}`.
|
||||
#[arg(long, value_name = "FILE")]
|
||||
pub server_info: Option<PathBuf>,
|
||||
|
||||
/// Enable HTTP shutdown endpoint at GET /shutdown
|
||||
#[arg(long)]
|
||||
pub http_shutdown: bool,
|
||||
|
||||
/// Absolute URL the proxy should forward requests to.
|
||||
#[arg(long, default_value = "https://api.deepseek.com/v1/responses")]
|
||||
pub upstream_url: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct ServerInfo {
|
||||
port: u16,
|
||||
pid: u32,
|
||||
}
|
||||
|
||||
struct ForwardConfig {
|
||||
upstream_url: Url,
|
||||
host_header: HeaderValue,
|
||||
}
|
||||
|
||||
/// Entry point for the proxy server.
|
||||
pub fn run_main(args: Args) -> Result<()> {
|
||||
let auth_header = read_auth_header_from_stdin()?;
|
||||
|
||||
let upstream_url = Url::parse(&args.upstream_url).context("parsing --upstream-url")?;
|
||||
let host = match (upstream_url.host_str(), upstream_url.port()) {
|
||||
(Some(host), Some(port)) => format!("{host}:{port}"),
|
||||
(Some(host), None) => host.to_string(),
|
||||
_ => return Err(anyhow!("upstream URL must include a host")),
|
||||
};
|
||||
let host_header =
|
||||
HeaderValue::from_str(&host).context("constructing Host header from upstream URL")?;
|
||||
|
||||
let forward_config = Arc::new(ForwardConfig {
|
||||
upstream_url,
|
||||
host_header,
|
||||
});
|
||||
|
||||
let (listener, bound_addr) = bind_listener(args.port)?;
|
||||
if let Some(path) = args.server_info.as_ref() {
|
||||
write_server_info(path, bound_addr.port())?;
|
||||
}
|
||||
let server = Server::from_listener(listener, None)
|
||||
.map_err(|err| anyhow!("creating HTTP server: {err}"))?;
|
||||
let client = Arc::new(
|
||||
Client::builder()
|
||||
// Disable reqwest's 30s default so long-lived response streams keep flowing.
|
||||
.timeout(None::<Duration>)
|
||||
.build()
|
||||
.context("building reqwest client")?,
|
||||
);
|
||||
|
||||
eprintln!("responses-api-proxy listening on {bound_addr}");
|
||||
|
||||
let http_shutdown = args.http_shutdown;
|
||||
for request in server.incoming_requests() {
|
||||
let client = client.clone();
|
||||
let forward_config = forward_config.clone();
|
||||
std::thread::spawn(move || {
|
||||
if http_shutdown && request.method() == &Method::Get && request.url() == "/shutdown" {
|
||||
let _ = request.respond(Response::new_empty(StatusCode(200)));
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
if let Err(e) = forward_request(&client, auth_header, &forward_config, request) {
|
||||
eprintln!("forwarding error: {e}");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Err(anyhow!("server stopped unexpectedly"))
|
||||
}
|
||||
|
||||
fn bind_listener(port: Option<u16>) -> Result<(TcpListener, SocketAddr)> {
|
||||
let addr = SocketAddr::from(([127, 0, 0, 1], port.unwrap_or(0)));
|
||||
let listener = TcpListener::bind(addr).with_context(|| format!("failed to bind {addr}"))?;
|
||||
let bound = listener.local_addr().context("failed to read local_addr")?;
|
||||
Ok((listener, bound))
|
||||
}
|
||||
|
||||
fn write_server_info(path: &Path, port: u16) -> Result<()> {
|
||||
if let Some(parent) = path.parent()
|
||||
&& !parent.as_os_str().is_empty()
|
||||
{
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
let info = ServerInfo {
|
||||
port,
|
||||
pid: std::process::id(),
|
||||
};
|
||||
let mut data = serde_json::to_string(&info)?;
|
||||
data.push('\n');
|
||||
let mut f = File::create(path)?;
|
||||
f.write_all(data.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn forward_request(
|
||||
client: &Client,
|
||||
auth_header: &'static str,
|
||||
config: &ForwardConfig,
|
||||
mut req: Request,
|
||||
) -> Result<()> {
|
||||
// Only allow POST /v1/responses exactly, no query string.
|
||||
let method = req.method().clone();
|
||||
let url_path = req.url().to_string();
|
||||
let allow = method == Method::Post && url_path == "/v1/responses";
|
||||
|
||||
if !allow {
|
||||
let resp = Response::new_empty(StatusCode(403));
|
||||
let _ = req.respond(resp);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Read request body
|
||||
let mut body = Vec::new();
|
||||
let mut reader = req.as_reader();
|
||||
std::io::Read::read_to_end(&mut reader, &mut body)?;
|
||||
|
||||
// Build headers for upstream, forwarding everything from the incoming
|
||||
// request except Authorization (we replace it below).
|
||||
let mut headers = HeaderMap::new();
|
||||
for header in req.headers() {
|
||||
let name_ascii = header.field.as_str();
|
||||
let lower = name_ascii.to_ascii_lowercase();
|
||||
if lower.as_str() == "authorization" || lower.as_str() == "host" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let header_name = match HeaderName::from_bytes(lower.as_bytes()) {
|
||||
Ok(name) => name,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if let Ok(value) = HeaderValue::from_bytes(header.value.as_bytes()) {
|
||||
headers.append(header_name, value);
|
||||
}
|
||||
}
|
||||
|
||||
// As part of our effort to keep `auth_header` secret, we use a
|
||||
// combination of `from_static()` and `set_sensitive(true)`.
|
||||
let mut auth_header_value = HeaderValue::from_static(auth_header);
|
||||
auth_header_value.set_sensitive(true);
|
||||
headers.insert(AUTHORIZATION, auth_header_value);
|
||||
|
||||
headers.insert(HOST, config.host_header.clone());
|
||||
|
||||
let upstream_resp = client
|
||||
.post(config.upstream_url.clone())
|
||||
.headers(headers)
|
||||
.body(body)
|
||||
.send()
|
||||
.context("forwarding request to upstream")?;
|
||||
|
||||
// We have to create an adapter between a `reqwest::blocking::Response`
|
||||
// and a `tiny_http::Response`. Fortunately, `reqwest::blocking::Response`
|
||||
// implements `Read`, so we can use it directly as the body of the
|
||||
// `tiny_http::Response`.
|
||||
let status = upstream_resp.status();
|
||||
let mut response_headers = Vec::new();
|
||||
for (name, value) in upstream_resp.headers().iter() {
|
||||
// Skip headers that tiny_http manages itself.
|
||||
if matches!(
|
||||
name.as_str(),
|
||||
"content-length" | "transfer-encoding" | "connection" | "trailer" | "upgrade"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(header) = Header::from_bytes(name.as_str().as_bytes(), value.as_bytes()) {
|
||||
response_headers.push(header);
|
||||
}
|
||||
}
|
||||
|
||||
let content_length = upstream_resp.content_length().and_then(|len| {
|
||||
if len <= usize::MAX as u64 {
|
||||
Some(len as usize)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let response = Response::new(
|
||||
StatusCode(status.as_u16()),
|
||||
response_headers,
|
||||
upstream_resp,
|
||||
content_length,
|
||||
None,
|
||||
);
|
||||
|
||||
let _ = req.respond(response);
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,217 +0,0 @@
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use zeroize::Zeroize;
|
||||
|
||||
/// Use a generous buffer size to avoid truncation and to allow for longer API
|
||||
/// keys in the future.
|
||||
const BUFFER_SIZE: usize = 1024;
|
||||
const AUTH_HEADER_PREFIX: &[u8] = b"Bearer ";
|
||||
|
||||
/// Reads the auth token from stdin and returns a static `Authorization` header
|
||||
/// value with the auth token used with `Bearer`. The header value is returned
|
||||
/// as a `&'static str` whose bytes are locked in memory to avoid accidental
|
||||
/// exposure.
|
||||
#[cfg(unix)]
|
||||
pub(crate) fn read_auth_header_from_stdin() -> Result<&'static str> {
|
||||
read_auth_header_with(read_from_unix_stdin)
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub(crate) fn read_auth_header_from_stdin() -> Result<&'static str> {
|
||||
use std::io::Read;
|
||||
|
||||
// Use of `stdio::io::stdin()` has the problem mentioned in the docstring on
|
||||
// the UNIX version of `read_from_unix_stdin()`, so this should ultimately
|
||||
// be replaced the low-level Windows equivalent. Because we do not have an
|
||||
// equivalent of mlock() on Windows right now, it is not pressing until we
|
||||
// address that issue.
|
||||
read_auth_header_with(|buffer| std::io::stdin().read(buffer))
|
||||
}
|
||||
|
||||
/// We perform a low-level read with `read(2)` because `stdio::io::stdin()` has
|
||||
/// an internal BufReader:
|
||||
///
|
||||
/// <https://github.com/rust-lang/rust/blob/bcbbdcb8522fd3cb4a8dde62313b251ab107694d/library/std/src/io/stdio.rs#L250-L252>
|
||||
///
|
||||
/// that can end up retaining a copy of stdin data in memory with no way to zero
|
||||
/// it out, whereas we aim to guarantee there is exactly one copy of the API key
|
||||
/// in memory, protected by mlock(2).
|
||||
#[cfg(unix)]
|
||||
fn read_from_unix_stdin(buffer: &mut [u8]) -> std::io::Result<usize> {
|
||||
use libc::c_void;
|
||||
use libc::read;
|
||||
|
||||
// Perform a single read(2) call into the provided buffer slice.
|
||||
// Looping and newline/EOF handling are managed by the caller.
|
||||
loop {
|
||||
let result = unsafe {
|
||||
read(
|
||||
libc::STDIN_FILENO,
|
||||
buffer.as_mut_ptr().cast::<c_void>(),
|
||||
buffer.len(),
|
||||
)
|
||||
};
|
||||
|
||||
if result == 0 {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
if result < 0 {
|
||||
let err = std::io::Error::last_os_error();
|
||||
if err.kind() == std::io::ErrorKind::Interrupted {
|
||||
continue;
|
||||
}
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
return Ok(result as usize);
|
||||
}
|
||||
}
|
||||
|
||||
fn read_auth_header_with<F>(mut read_fn: F) -> Result<&'static str>
|
||||
where
|
||||
F: FnMut(&mut [u8]) -> std::io::Result<usize>,
|
||||
{
|
||||
// TAKE CARE WHEN MODIFYING THIS CODE!!!
|
||||
//
|
||||
// This function goes to great lengths to avoid leaving the API key in
|
||||
// memory longer than necessary and to avoid copying it around. We read
|
||||
// directly into a stack buffer so the only heap allocation should be the
|
||||
// one to create the String (with the exact size) for the header value,
|
||||
// which we then immediately protect with mlock(2).
|
||||
let mut buf = [0u8; BUFFER_SIZE];
|
||||
buf[..AUTH_HEADER_PREFIX.len()].copy_from_slice(AUTH_HEADER_PREFIX);
|
||||
|
||||
let prefix_len = AUTH_HEADER_PREFIX.len();
|
||||
let capacity = buf.len() - prefix_len;
|
||||
let mut total_read = 0usize; // number of bytes read into the token region
|
||||
let mut saw_newline = false;
|
||||
let mut saw_eof = false;
|
||||
|
||||
while total_read < capacity {
|
||||
let slice = &mut buf[prefix_len + total_read..];
|
||||
let read = match read_fn(slice) {
|
||||
Ok(n) => n,
|
||||
Err(err) => {
|
||||
buf.zeroize();
|
||||
return Err(err.into());
|
||||
}
|
||||
};
|
||||
|
||||
if read == 0 {
|
||||
saw_eof = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Search only the newly written region for a newline.
|
||||
let newly_written = &slice[..read];
|
||||
if let Some(pos) = newly_written.iter().position(|&b| b == b'\n') {
|
||||
total_read += pos + 1; // include the newline for trimming below
|
||||
saw_newline = true;
|
||||
break;
|
||||
}
|
||||
|
||||
total_read += read;
|
||||
|
||||
// Continue loop; if buffer fills without newline/EOF we'll error below.
|
||||
}
|
||||
|
||||
// If buffer filled and we did not see newline or EOF, error out.
|
||||
if total_read == capacity && !saw_newline && !saw_eof {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"API key is too large to fit in the {BUFFER_SIZE}-byte buffer"
|
||||
));
|
||||
}
|
||||
|
||||
let mut total = prefix_len + total_read;
|
||||
while total > prefix_len && (buf[total - 1] == b'\n' || buf[total - 1] == b'\r') {
|
||||
total -= 1;
|
||||
}
|
||||
|
||||
if total == AUTH_HEADER_PREFIX.len() {
|
||||
buf.zeroize();
|
||||
return Err(anyhow!(
|
||||
"API key must be provided via stdin (e.g. printenv DEEPSEEK_API_KEY | deepseek responses-api-proxy)"
|
||||
));
|
||||
}
|
||||
|
||||
if let Err(err) = validate_auth_header_bytes(&buf[AUTH_HEADER_PREFIX.len()..total]) {
|
||||
buf.zeroize();
|
||||
return Err(err);
|
||||
}
|
||||
|
||||
let header_str = match std::str::from_utf8(&buf[..total]) {
|
||||
Ok(value) => value,
|
||||
Err(err) => {
|
||||
// In theory, validate_auth_header_bytes() should have caught
|
||||
// any invalid UTF-8 sequences, but just in case...
|
||||
buf.zeroize();
|
||||
return Err(err).context("reading Authorization header from stdin as UTF-8");
|
||||
}
|
||||
};
|
||||
|
||||
let header_value = String::from(header_str);
|
||||
buf.zeroize();
|
||||
|
||||
let leaked: &'static mut str = header_value.leak();
|
||||
mlock_str(leaked);
|
||||
|
||||
Ok(leaked)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn mlock_str(value: &str) {
|
||||
use libc::_SC_PAGESIZE;
|
||||
use libc::c_void;
|
||||
use libc::mlock;
|
||||
use libc::sysconf;
|
||||
|
||||
if value.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let page_size = unsafe { sysconf(_SC_PAGESIZE) };
|
||||
if page_size <= 0 {
|
||||
return;
|
||||
}
|
||||
let page_size = page_size as usize;
|
||||
if page_size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let addr = value.as_ptr() as usize;
|
||||
let len = value.len();
|
||||
let start = addr & !(page_size - 1);
|
||||
let addr_end = match addr.checked_add(len) {
|
||||
Some(v) => match v.checked_add(page_size - 1) {
|
||||
Some(total) => total,
|
||||
None => return,
|
||||
},
|
||||
None => return,
|
||||
};
|
||||
let end = addr_end & !(page_size - 1);
|
||||
let size = end.saturating_sub(start);
|
||||
if size == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let _ = unsafe { mlock(start as *const c_void, size) };
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn mlock_str(_value: &str) {}
|
||||
|
||||
/// The key should match /^[A-Za-z0-9\-_]+$/. Ensure there is no funny business
|
||||
/// with NUL characters and whatnot.
|
||||
fn validate_auth_header_bytes(key_bytes: &[u8]) -> Result<()> {
|
||||
if key_bytes
|
||||
.iter()
|
||||
.all(|byte| byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'_'))
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"API key may only contain ASCII letters, numbers, '-' or '_'"
|
||||
))
|
||||
}
|
||||
Reference in New Issue
Block a user