feat(tui): send /attach images as multimodal content (#2584, #2587) (#2607)

Adds OpenAI-compatible image_url content blocks to the chat message
model, wiring attached images through build_chat_messages_with_reasoning
as multimodal user-content arrays. When images are present, user
messages emit a content array of text + image_url parts instead of a
plain string, matching the OpenAI vision API shape.

- models.rs: new ImageUrlContent struct, ContentBlock::ImageUrl variant
- client/chat.rs: image_parts collection, multimodal wire format for
  user messages, image-aware message inspection, stream-event no-op
- Exhaustiveness arms added across 10 files (compaction, seam_manager,
  capacity_flow, purge, notifications, session_picker, utils,
  working_set, rlm/session, runtime_api)
- Test: request_builder_emits_openai_image_url_parts_for_user_images

Credit: @xyuai (PR #2587 — root cause + initial implementation)
Closes: #2584

Co-authored-by: xyuai <xyuai@users.noreply.github.com>
This commit is contained in:
Hunter Bown
2026-06-02 21:27:31 -07:00
committed by GitHub
parent 8981d5c5fd
commit dd26114697
12 changed files with 92 additions and 11 deletions
+59 -2
View File
@@ -841,6 +841,31 @@ fn message_content_for_inspect(message: &Value) -> String {
{
parts.push(content.to_string());
}
if let Some(content) = message.get("content").and_then(Value::as_array) {
for part in content {
match part.get("type").and_then(Value::as_str) {
Some("text") => {
if let Some(text) = part.get("text").and_then(Value::as_str)
&& !text.is_empty()
{
parts.push(text.to_string());
}
}
Some("image_url") => {
let url = part
.get("image_url")
.and_then(|image_url| image_url.get("url"))
.and_then(Value::as_str)
.unwrap_or("");
parts.push(format!(
"[image_url:{}]",
summarize_image_url_for_inspect(url)
));
}
_ => {}
}
}
}
if let Some(reasoning) = message.get("reasoning_content").and_then(Value::as_str)
&& !reasoning.is_empty()
{
@@ -852,6 +877,13 @@ fn message_content_for_inspect(message: &Value) -> String {
parts.join("\n")
}
fn summarize_image_url_for_inspect(url: &str) -> String {
let Some((prefix, encoded)) = url.split_once(";base64,") else {
return first_chars(url, 96);
};
format!("{prefix};base64,<{} chars>", encoded.len())
}
fn tool_result_inspection_for_message(message: &Value) -> Option<ToolResultInspection> {
if message.get("role").and_then(Value::as_str) != Some("tool") {
return None;
@@ -1338,6 +1370,7 @@ fn build_chat_messages_with_reasoning(
for (message_index, message) in messages.iter().enumerate() {
let role = message.role.as_str();
let mut text_parts = Vec::new();
let mut image_parts = Vec::new();
let mut thinking_parts = Vec::new();
let mut tool_calls = Vec::new();
let mut tool_call_infos = Vec::new();
@@ -1356,6 +1389,14 @@ fn build_chat_messages_with_reasoning(
text_parts.push(text.clone());
}
}
ContentBlock::ImageUrl { image_url } => {
image_parts.push(json!({
"type": "image_url",
"image_url": {
"url": image_url.url.clone(),
},
}));
}
ContentBlock::Thinking { thinking } => thinking_parts.push(thinking.clone()),
ContentBlock::ToolUse {
id,
@@ -1469,10 +1510,25 @@ fn build_chat_messages_with_reasoning(
}
} else if role == "user" {
let content = text_parts.join("\n");
if !content.trim().is_empty() {
let has_text = !content.trim().is_empty();
let has_images = !image_parts.is_empty();
if has_text || has_images {
let wire_content = if has_images {
let mut parts = Vec::new();
if has_text {
parts.push(json!({
"type": "text",
"text": content,
}));
}
parts.extend(image_parts);
json!(parts)
} else {
json!(content)
};
let mut msg = json!({
"role": "user",
"content": content,
"content": wire_content,
});
if include_tool_budget_metadata && let Some(turn_meta) = &turn_meta_budget {
msg["_turn_meta_budget"] = turn_meta_budget_json(turn_meta);
@@ -2098,6 +2154,7 @@ fn build_stream_events(response: &MessageResponse) -> Vec<StreamEvent> {
events.push(StreamEvent::ContentBlockStop { index });
}
ContentBlock::ToolResult { .. } => {}
ContentBlock::ImageUrl { .. } => {}
ContentBlock::ServerToolUse { id, name, input } => {
events.push(StreamEvent::ContentBlockStart {
index,
+8 -4
View File
@@ -265,7 +265,8 @@ fn message_text(msg: &Message) -> String {
}
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
text
@@ -289,7 +290,8 @@ fn extract_paths_from_message(message: &Message, workspace: Option<&Path>) -> Ve
ContentBlock::Thinking { .. } => Vec::new(),
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => Vec::new(),
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => Vec::new(),
};
paths.extend(candidates);
}
@@ -562,7 +564,8 @@ fn estimate_tokens_for_message(message: &Message, include_thinking: bool) -> usi
ContentBlock::ToolResult { content, .. } => content.len() / 4,
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => 0,
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => 0,
})
.sum::<usize>()
}
@@ -1346,7 +1349,8 @@ fn build_formatted_summary_request(
}
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
}
+2 -1
View File
@@ -226,7 +226,8 @@ impl Engine {
ContentBlock::Thinking { .. }
| ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
}
+8
View File
@@ -65,6 +65,12 @@ pub struct SystemBlock {
pub cache_control: Option<CacheControl>,
}
/// OpenAI-compatible image URL payload inside a multimodal message.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ImageUrlContent {
pub url: String,
}
/// A chat message with role and content blocks.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Message {
@@ -82,6 +88,8 @@ pub enum ContentBlock {
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<CacheControl>,
},
#[serde(rename = "image_url")]
ImageUrl { image_url: ImageUrlContent },
#[serde(rename = "thinking")]
Thinking { thinking: String },
#[serde(rename = "tool_use")]
+1
View File
@@ -246,6 +246,7 @@ fn format_content_block(buf: &mut String, blk_idx: usize, block: &ContentBlock)
" [{blk_idx}] CodeExecutionToolResult (id={tool_use_id}, content={snippet})"
);
}
ContentBlock::ImageUrl { .. } => {}
}
}
+1
View File
@@ -407,6 +407,7 @@ fn compact_content_block(block: &ContentBlock) -> Value {
"tool_use_id": tool_use_id,
"content": content,
}),
ContentBlock::ImageUrl { .. } => serde_json::Value::Null,
}
}
+1
View File
@@ -904,6 +904,7 @@ fn session_to_detail(session: SavedSession) -> SessionDetailResponse {
crate::models::ContentBlock::CodeExecutionToolResult { tool_use_id, content } => {
json!({ "type": "tool_result", "tool_use_id": tool_use_id, "content": content })
}
crate::models::ContentBlock::ImageUrl { .. } => serde_json::Value::Null,
})
.collect();
json!({
+2 -1
View File
@@ -386,7 +386,8 @@ impl SeamManager {
}
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
}
+1
View File
@@ -699,6 +699,7 @@ pub fn latest_assistant_text(messages: &[Message]) -> Option<String> {
| ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => None,
| ContentBlock::ImageUrl { .. } => None,
})
.collect::<Vec<_>>()
.join("\n");
+3
View File
@@ -790,6 +790,9 @@ fn message_text_for_history(message: &crate::models::Message) -> String {
| crate::models::ContentBlock::CodeExecutionToolResult { content, .. } => {
format!("tool result: {}", truncate(&content.to_string(), 220))
}
crate::models::ContentBlock::ImageUrl { .. } => {
String::from("[image]")
}
};
let part = part.trim();
if !part.is_empty() {
+2 -1
View File
@@ -499,7 +499,8 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
ContentBlock::ToolResult { content, .. } => total += content.len(),
ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
}
+4 -2
View File
@@ -1046,7 +1046,8 @@ fn extract_paths_from_message(message: &Message) -> Vec<String> {
ContentBlock::Thinking { .. }
| ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
paths
@@ -1211,7 +1212,8 @@ fn message_mentions_any_path(message: &Message, needles: &[String], max_scan_cha
ContentBlock::Thinking { .. }
| ContentBlock::ServerToolUse { .. }
| ContentBlock::ToolSearchToolResult { .. }
| ContentBlock::CodeExecutionToolResult { .. } => {}
| ContentBlock::CodeExecutionToolResult { .. }
| ContentBlock::ImageUrl { .. } => {}
}
}
false