Adds OpenAI-compatible image_url content blocks to the chat message model, wiring attached images through build_chat_messages_with_reasoning as multimodal user-content arrays. When images are present, user messages emit a content array of text + image_url parts instead of a plain string, matching the OpenAI vision API shape. - models.rs: new ImageUrlContent struct, ContentBlock::ImageUrl variant - client/chat.rs: image_parts collection, multimodal wire format for user messages, image-aware message inspection, stream-event no-op - Exhaustiveness arms added across 10 files (compaction, seam_manager, capacity_flow, purge, notifications, session_picker, utils, working_set, rlm/session, runtime_api) - Test: request_builder_emits_openai_image_url_parts_for_user_images Credit: @xyuai (PR #2587 — root cause + initial implementation) Closes: #2584 Co-authored-by: xyuai <xyuai@users.noreply.github.com>
This commit is contained in:
@@ -841,6 +841,31 @@ fn message_content_for_inspect(message: &Value) -> String {
|
||||
{
|
||||
parts.push(content.to_string());
|
||||
}
|
||||
if let Some(content) = message.get("content").and_then(Value::as_array) {
|
||||
for part in content {
|
||||
match part.get("type").and_then(Value::as_str) {
|
||||
Some("text") => {
|
||||
if let Some(text) = part.get("text").and_then(Value::as_str)
|
||||
&& !text.is_empty()
|
||||
{
|
||||
parts.push(text.to_string());
|
||||
}
|
||||
}
|
||||
Some("image_url") => {
|
||||
let url = part
|
||||
.get("image_url")
|
||||
.and_then(|image_url| image_url.get("url"))
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or("");
|
||||
parts.push(format!(
|
||||
"[image_url:{}]",
|
||||
summarize_image_url_for_inspect(url)
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(reasoning) = message.get("reasoning_content").and_then(Value::as_str)
|
||||
&& !reasoning.is_empty()
|
||||
{
|
||||
@@ -852,6 +877,13 @@ fn message_content_for_inspect(message: &Value) -> String {
|
||||
parts.join("\n")
|
||||
}
|
||||
|
||||
fn summarize_image_url_for_inspect(url: &str) -> String {
|
||||
let Some((prefix, encoded)) = url.split_once(";base64,") else {
|
||||
return first_chars(url, 96);
|
||||
};
|
||||
format!("{prefix};base64,<{} chars>", encoded.len())
|
||||
}
|
||||
|
||||
fn tool_result_inspection_for_message(message: &Value) -> Option<ToolResultInspection> {
|
||||
if message.get("role").and_then(Value::as_str) != Some("tool") {
|
||||
return None;
|
||||
@@ -1338,6 +1370,7 @@ fn build_chat_messages_with_reasoning(
|
||||
for (message_index, message) in messages.iter().enumerate() {
|
||||
let role = message.role.as_str();
|
||||
let mut text_parts = Vec::new();
|
||||
let mut image_parts = Vec::new();
|
||||
let mut thinking_parts = Vec::new();
|
||||
let mut tool_calls = Vec::new();
|
||||
let mut tool_call_infos = Vec::new();
|
||||
@@ -1356,6 +1389,14 @@ fn build_chat_messages_with_reasoning(
|
||||
text_parts.push(text.clone());
|
||||
}
|
||||
}
|
||||
ContentBlock::ImageUrl { image_url } => {
|
||||
image_parts.push(json!({
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": image_url.url.clone(),
|
||||
},
|
||||
}));
|
||||
}
|
||||
ContentBlock::Thinking { thinking } => thinking_parts.push(thinking.clone()),
|
||||
ContentBlock::ToolUse {
|
||||
id,
|
||||
@@ -1469,10 +1510,25 @@ fn build_chat_messages_with_reasoning(
|
||||
}
|
||||
} else if role == "user" {
|
||||
let content = text_parts.join("\n");
|
||||
if !content.trim().is_empty() {
|
||||
let has_text = !content.trim().is_empty();
|
||||
let has_images = !image_parts.is_empty();
|
||||
if has_text || has_images {
|
||||
let wire_content = if has_images {
|
||||
let mut parts = Vec::new();
|
||||
if has_text {
|
||||
parts.push(json!({
|
||||
"type": "text",
|
||||
"text": content,
|
||||
}));
|
||||
}
|
||||
parts.extend(image_parts);
|
||||
json!(parts)
|
||||
} else {
|
||||
json!(content)
|
||||
};
|
||||
let mut msg = json!({
|
||||
"role": "user",
|
||||
"content": content,
|
||||
"content": wire_content,
|
||||
});
|
||||
if include_tool_budget_metadata && let Some(turn_meta) = &turn_meta_budget {
|
||||
msg["_turn_meta_budget"] = turn_meta_budget_json(turn_meta);
|
||||
@@ -2098,6 +2154,7 @@ fn build_stream_events(response: &MessageResponse) -> Vec<StreamEvent> {
|
||||
events.push(StreamEvent::ContentBlockStop { index });
|
||||
}
|
||||
ContentBlock::ToolResult { .. } => {}
|
||||
ContentBlock::ImageUrl { .. } => {}
|
||||
ContentBlock::ServerToolUse { id, name, input } => {
|
||||
events.push(StreamEvent::ContentBlockStart {
|
||||
index,
|
||||
|
||||
@@ -265,7 +265,8 @@ fn message_text(msg: &Message) -> String {
|
||||
}
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
text
|
||||
@@ -289,7 +290,8 @@ fn extract_paths_from_message(message: &Message, workspace: Option<&Path>) -> Ve
|
||||
ContentBlock::Thinking { .. } => Vec::new(),
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => Vec::new(),
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => Vec::new(),
|
||||
};
|
||||
paths.extend(candidates);
|
||||
}
|
||||
@@ -562,7 +564,8 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
|
||||
ContentBlock::ToolResult { content, .. } => content.len() / 4,
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => 0,
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => 0,
|
||||
})
|
||||
.sum::<usize>()
|
||||
}
|
||||
@@ -1346,7 +1349,8 @@ fn build_formatted_summary_request(
|
||||
}
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -226,7 +226,8 @@ impl Engine {
|
||||
ContentBlock::Thinking { .. }
|
||||
| ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,6 +65,12 @@ pub struct SystemBlock {
|
||||
pub cache_control: Option<CacheControl>,
|
||||
}
|
||||
|
||||
/// OpenAI-compatible image URL payload inside a multimodal message.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct ImageUrlContent {
|
||||
pub url: String,
|
||||
}
|
||||
|
||||
/// A chat message with role and content blocks.
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
|
||||
pub struct Message {
|
||||
@@ -82,6 +88,8 @@ pub enum ContentBlock {
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
cache_control: Option<CacheControl>,
|
||||
},
|
||||
#[serde(rename = "image_url")]
|
||||
ImageUrl { image_url: ImageUrlContent },
|
||||
#[serde(rename = "thinking")]
|
||||
Thinking { thinking: String },
|
||||
#[serde(rename = "tool_use")]
|
||||
|
||||
@@ -246,6 +246,7 @@ fn format_content_block(buf: &mut String, blk_idx: usize, block: &ContentBlock)
|
||||
" [{blk_idx}] CodeExecutionToolResult (id={tool_use_id}, content={snippet})"
|
||||
);
|
||||
}
|
||||
ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -407,6 +407,7 @@ fn compact_content_block(block: &ContentBlock) -> Value {
|
||||
"tool_use_id": tool_use_id,
|
||||
"content": content,
|
||||
}),
|
||||
ContentBlock::ImageUrl { .. } => serde_json::Value::Null,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -904,6 +904,7 @@ fn session_to_detail(session: SavedSession) -> SessionDetailResponse {
|
||||
crate::models::ContentBlock::CodeExecutionToolResult { tool_use_id, content } => {
|
||||
json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content })
|
||||
}
|
||||
crate::models::ContentBlock::ImageUrl { .. } => serde_json::Value::Null,
|
||||
})
|
||||
.collect();
|
||||
json!({
|
||||
|
||||
@@ -386,7 +386,8 @@ impl SeamManager {
|
||||
}
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -699,6 +699,7 @@ pub fn latest_assistant_text(messages: &[Message]) -> Option<String> {
|
||||
| ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => None,
|
||||
| ContentBlock::ImageUrl { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
@@ -790,6 +790,9 @@ fn message_text_for_history(message: &crate::models::Message) -> String {
|
||||
| crate::models::ContentBlock::CodeExecutionToolResult { content, .. } => {
|
||||
format!("tool result: {}", truncate(&content.to_string(), 220))
|
||||
}
|
||||
crate::models::ContentBlock::ImageUrl { .. } => {
|
||||
String::from("[image]")
|
||||
}
|
||||
};
|
||||
let part = part.trim();
|
||||
if !part.is_empty() {
|
||||
|
||||
@@ -499,7 +499,8 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
|
||||
ContentBlock::ToolResult { content, .. } => total += content.len(),
|
||||
ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1046,7 +1046,8 @@ fn extract_paths_from_message(message: &Message) -> Vec<String> {
|
||||
ContentBlock::Thinking { .. }
|
||||
| ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
paths
|
||||
@@ -1211,7 +1212,8 @@ fn message_mentions_any_path(message: &Message, needles: &[String], max_scan_cha
|
||||
ContentBlock::Thinking { .. }
|
||||
| ContentBlock::ServerToolUse { .. }
|
||||
| ContentBlock::ToolSearchToolResult { .. }
|
||||
| ContentBlock::CodeExecutionToolResult { .. } => {}
|
||||
| ContentBlock::CodeExecutionToolResult { .. }
|
||||
| ContentBlock::ImageUrl { .. } => {}
|
||||
}
|
||||
}
|
||||
false
|
||||
|
||||
Reference in New Issue
Block a user