diff --git a/crates/tui/src/child_env.rs b/crates/tui/src/child_env.rs index 70e4a2bc..dac02912 100644 --- a/crates/tui/src/child_env.rs +++ b/crates/tui/src/child_env.rs @@ -35,6 +35,8 @@ where value.as_ref().to_os_string(), ); } + #[cfg(windows)] + fill_windows_common_program_files(&mut env); env } @@ -184,6 +186,9 @@ fn is_allowed_parent_env_key(key: &OsStr) -> bool { | "PROGRAMFILES" | "PROGRAMFILES(X86)" | "PROGRAMW6432" + | "COMMONPROGRAMFILES" + | "COMMONPROGRAMFILES(X86)" + | "COMMONPROGRAMW6432" | "PROCESSOR_ARCHITECTURE" | "NUGET_PACKAGES" | "NUGET_HTTP_CACHE_PATH" @@ -266,6 +271,26 @@ fn upsert_env(env: &mut Vec<(OsString, OsString)>, key: OsString, value: OsStrin env.push((key, value)); } +#[cfg(any(windows, test))] +fn fill_windows_common_program_files(env: &mut Vec<(OsString, OsString)>) { + for (key, default) in [ + ("CommonProgramFiles", r"C:\Program Files\Common Files"), + ( + "CommonProgramFiles(x86)", + r"C:\Program Files (x86)\Common Files", + ), + ("CommonProgramW6432", r"C:\Program Files\Common Files"), + ] { + let existing = env + .iter() + .find(|(existing, _)| normalize_key(existing) == normalize_key(OsStr::new(key))) + .map(|(_, value)| value.to_string_lossy().trim().is_empty()); + if existing.unwrap_or(true) { + upsert_env(env, OsString::from(key), OsString::from(default)); + } + } +} + fn normalize_key(key: &OsStr) -> String { key.to_string_lossy().to_ascii_uppercase() } @@ -387,6 +412,9 @@ mod tests { "PROGRAMFILES", "PROGRAMFILES(X86)", "PROGRAMW6432", + "COMMONPROGRAMFILES", + "COMMONPROGRAMFILES(X86)", + "COMMONPROGRAMW6432", "PROCESSOR_ARCHITECTURE", "NUGET_PACKAGES", "DOTNET_ROOT", @@ -408,6 +436,41 @@ mod tests { ); } + #[test] + fn windows_common_program_files_defaults_replace_empty_values() { + let mut env = vec![ + (OsString::from("CommonProgramFiles"), OsString::new()), + ( + OsString::from("CommonProgramFiles(x86)"), + OsString::from(" "), + ), + ( + OsString::from("CommonProgramW6432"), + OsString::from(r"D:\Common Files"), + ), + ]; + + fill_windows_common_program_files(&mut env); + + let get = |name: &str| { + env.iter() + .find(|(key, _)| normalize_key(key) == normalize_key(OsStr::new(name))) + .map(|(_, value)| value.to_string_lossy().into_owned()) + }; + assert_eq!( + get("CommonProgramFiles").as_deref(), + Some(r"C:\Program Files\Common Files") + ); + assert_eq!( + get("CommonProgramFiles(x86)").as_deref(), + Some(r"C:\Program Files (x86)\Common Files") + ); + assert_eq!( + get("CommonProgramW6432").as_deref(), + Some(r"D:\Common Files") + ); + } + #[test] fn mcp_env_allowlist_includes_python_bootstrap_keys() { for key in [ diff --git a/crates/tui/src/tools/file.rs b/crates/tui/src/tools/file.rs index 1b0b4dfb..62ff3df1 100644 --- a/crates/tui/src/tools/file.rs +++ b/crates/tui/src/tools/file.rs @@ -10,6 +10,7 @@ use super::spec::{ }; use async_trait::async_trait; use serde_json::{Value, json}; +use std::fmt::Display; use std::fs; use std::path::{Path, PathBuf}; use std::process::{Command, Stdio}; @@ -356,7 +357,7 @@ fn read_pdf_via_pdf_extract( // pdf-extract returns pages in document order; `start`/`end` are // 1-indexed inclusive (validated above), so we convert to a // 0-indexed half-open slice with bounds clamping. - let pages = pdf_extract::extract_text_by_pages(path).map_err(|e| { + let pages = guard_pdf_extract(|| pdf_extract::extract_text_by_pages(path)).map_err(|e| { ToolError::execution_failed(format!( "pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)", path.display() @@ -379,7 +380,7 @@ fn read_pdf_via_pdf_extract( // extract_text uses an internal codepath that can hang on certain PDF // cross-reference tables or font encodings (#2641). The per-page path // avoids that hang and produces identical output when joined. - pdf_extract::extract_text_by_pages(path) + guard_pdf_extract(|| pdf_extract::extract_text_by_pages(path)) .map(|pages| pages.join("\n")) .map_err(|e| { ToolError::execution_failed(format!( @@ -391,6 +392,31 @@ fn read_pdf_via_pdf_extract( Ok(ToolResult::success(clean_pdf_text(&text))) } +fn guard_pdf_extract(extract: F) -> Result +where + E: Display, + F: FnOnce() -> Result, +{ + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(extract)) { + Ok(Ok(value)) => Ok(value), + Ok(Err(err)) => Err(err.to_string()), + Err(payload) => Err(format!( + "extractor panicked: {}", + panic_payload_message(payload.as_ref()) + )), + } +} + +fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String { + if let Some(message) = payload.downcast_ref::<&str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "unknown panic".to_string() + } +} + fn read_pdf_via_pdftotext( path: &Path, page_range: Option<(u32, u32)>, @@ -1380,6 +1406,17 @@ mod tests { assert!(single.content.contains("Recursive Language Models")); } + #[test] + fn pdf_extract_panic_is_returned_as_tool_error_text() { + let err = guard_pdf_extract(|| -> Result { + panic!("assertion failed: name == \"Identity-H\""); + }) + .expect_err("panic should become an error"); + + assert!(err.contains("extractor panicked")); + assert!(err.contains("Identity-H")); + } + #[tokio::test] async fn read_file_pdf_path_uses_pdf_extract_by_default() { if !sample_pdf_present() { diff --git a/crates/tui/src/tools/web_run.rs b/crates/tui/src/tools/web_run.rs index 73c17612..77ff58e7 100644 --- a/crates/tui/src/tools/web_run.rs +++ b/crates/tui/src/tools/web_run.rs @@ -14,6 +14,7 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::collections::{HashMap, VecDeque}; +use std::fmt::Display; use std::hash::{Hash, Hasher}; use std::sync::{Arc, OnceLock}; use std::time::{Duration, Instant}; @@ -1204,10 +1205,35 @@ fn parse_pdf_page( } fn pdf_extract_text(bytes: &[u8]) -> Result { - pdf_extract::extract_text_from_mem(bytes) + guard_pdf_extract(|| pdf_extract::extract_text_from_mem(bytes)) .map_err(|e| ToolError::execution_failed(format!("PDF extract failed: {e}"))) } +fn guard_pdf_extract(extract: F) -> Result +where + E: Display, + F: FnOnce() -> Result, +{ + match std::panic::catch_unwind(std::panic::AssertUnwindSafe(extract)) { + Ok(Ok(value)) => Ok(value), + Ok(Err(err)) => Err(err.to_string()), + Err(payload) => Err(format!( + "extractor panicked: {}", + panic_payload_message(payload.as_ref()) + )), + } +} + +fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String { + if let Some(message) = payload.downcast_ref::<&str>() { + (*message).to_string() + } else if let Some(message) = payload.downcast_ref::() { + message.clone() + } else { + "unknown panic".to_string() + } +} + fn split_pdf_pages(text: &str) -> Vec> { let raw_pages: Vec<&str> = text.split('\x0C').collect(); raw_pages @@ -1794,6 +1820,17 @@ mod tests { assert_eq!(percent_decode("foo+bar%20baz"), "foo+bar baz"); } + #[test] + fn pdf_extract_panic_is_returned_as_tool_error_text() { + let err = guard_pdf_extract(|| -> Result { + panic!("assertion failed: name == \"Identity-H\""); + }) + .expect_err("panic should become an error"); + + assert!(err.contains("extractor panicked")); + assert!(err.contains("Identity-H")); + } + #[test] fn scoped_ref_prefix_is_session_specific() { let _lock = lock_web_run_test_state();