fix(release): harden Windows shell env and PDF extraction
(cherry picked from commit 2d5f2d235849cd2fa520937a529935759e3782a3)
This commit is contained in:
@@ -35,6 +35,8 @@ where
|
||||
value.as_ref().to_os_string(),
|
||||
);
|
||||
}
|
||||
#[cfg(windows)]
|
||||
fill_windows_common_program_files(&mut env);
|
||||
env
|
||||
}
|
||||
|
||||
@@ -184,6 +186,9 @@ fn is_allowed_parent_env_key(key: &OsStr) -> bool {
|
||||
| "PROGRAMFILES"
|
||||
| "PROGRAMFILES(X86)"
|
||||
| "PROGRAMW6432"
|
||||
| "COMMONPROGRAMFILES"
|
||||
| "COMMONPROGRAMFILES(X86)"
|
||||
| "COMMONPROGRAMW6432"
|
||||
| "PROCESSOR_ARCHITECTURE"
|
||||
| "NUGET_PACKAGES"
|
||||
| "NUGET_HTTP_CACHE_PATH"
|
||||
@@ -266,6 +271,26 @@ fn upsert_env(env: &mut Vec<(OsString, OsString)>, key: OsString, value: OsStrin
|
||||
env.push((key, value));
|
||||
}
|
||||
|
||||
#[cfg(any(windows, test))]
|
||||
fn fill_windows_common_program_files(env: &mut Vec<(OsString, OsString)>) {
|
||||
for (key, default) in [
|
||||
("CommonProgramFiles", r"C:\Program Files\Common Files"),
|
||||
(
|
||||
"CommonProgramFiles(x86)",
|
||||
r"C:\Program Files (x86)\Common Files",
|
||||
),
|
||||
("CommonProgramW6432", r"C:\Program Files\Common Files"),
|
||||
] {
|
||||
let existing = env
|
||||
.iter()
|
||||
.find(|(existing, _)| normalize_key(existing) == normalize_key(OsStr::new(key)))
|
||||
.map(|(_, value)| value.to_string_lossy().trim().is_empty());
|
||||
if existing.unwrap_or(true) {
|
||||
upsert_env(env, OsString::from(key), OsString::from(default));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn normalize_key(key: &OsStr) -> String {
|
||||
key.to_string_lossy().to_ascii_uppercase()
|
||||
}
|
||||
@@ -387,6 +412,9 @@ mod tests {
|
||||
"PROGRAMFILES",
|
||||
"PROGRAMFILES(X86)",
|
||||
"PROGRAMW6432",
|
||||
"COMMONPROGRAMFILES",
|
||||
"COMMONPROGRAMFILES(X86)",
|
||||
"COMMONPROGRAMW6432",
|
||||
"PROCESSOR_ARCHITECTURE",
|
||||
"NUGET_PACKAGES",
|
||||
"DOTNET_ROOT",
|
||||
@@ -408,6 +436,41 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_common_program_files_defaults_replace_empty_values() {
|
||||
let mut env = vec![
|
||||
(OsString::from("CommonProgramFiles"), OsString::new()),
|
||||
(
|
||||
OsString::from("CommonProgramFiles(x86)"),
|
||||
OsString::from(" "),
|
||||
),
|
||||
(
|
||||
OsString::from("CommonProgramW6432"),
|
||||
OsString::from(r"D:\Common Files"),
|
||||
),
|
||||
];
|
||||
|
||||
fill_windows_common_program_files(&mut env);
|
||||
|
||||
let get = |name: &str| {
|
||||
env.iter()
|
||||
.find(|(key, _)| normalize_key(key) == normalize_key(OsStr::new(name)))
|
||||
.map(|(_, value)| value.to_string_lossy().into_owned())
|
||||
};
|
||||
assert_eq!(
|
||||
get("CommonProgramFiles").as_deref(),
|
||||
Some(r"C:\Program Files\Common Files")
|
||||
);
|
||||
assert_eq!(
|
||||
get("CommonProgramFiles(x86)").as_deref(),
|
||||
Some(r"C:\Program Files (x86)\Common Files")
|
||||
);
|
||||
assert_eq!(
|
||||
get("CommonProgramW6432").as_deref(),
|
||||
Some(r"D:\Common Files")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_env_allowlist_includes_python_bootstrap_keys() {
|
||||
for key in [
|
||||
|
||||
@@ -10,6 +10,7 @@ use super::spec::{
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use serde_json::{Value, json};
|
||||
use std::fmt::Display;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
@@ -356,7 +357,7 @@ fn read_pdf_via_pdf_extract(
|
||||
// pdf-extract returns pages in document order; `start`/`end` are
|
||||
// 1-indexed inclusive (validated above), so we convert to a
|
||||
// 0-indexed half-open slice with bounds clamping.
|
||||
let pages = pdf_extract::extract_text_by_pages(path).map_err(|e| {
|
||||
let pages = guard_pdf_extract(|| pdf_extract::extract_text_by_pages(path)).map_err(|e| {
|
||||
ToolError::execution_failed(format!(
|
||||
"pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)",
|
||||
path.display()
|
||||
@@ -379,7 +380,7 @@ fn read_pdf_via_pdf_extract(
|
||||
// extract_text uses an internal codepath that can hang on certain PDF
|
||||
// cross-reference tables or font encodings (#2641). The per-page path
|
||||
// avoids that hang and produces identical output when joined.
|
||||
pdf_extract::extract_text_by_pages(path)
|
||||
guard_pdf_extract(|| pdf_extract::extract_text_by_pages(path))
|
||||
.map(|pages| pages.join("\n"))
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!(
|
||||
@@ -391,6 +392,31 @@ fn read_pdf_via_pdf_extract(
|
||||
Ok(ToolResult::success(clean_pdf_text(&text)))
|
||||
}
|
||||
|
||||
fn guard_pdf_extract<T, E, F>(extract: F) -> Result<T, String>
|
||||
where
|
||||
E: Display,
|
||||
F: FnOnce() -> Result<T, E>,
|
||||
{
|
||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(extract)) {
|
||||
Ok(Ok(value)) => Ok(value),
|
||||
Ok(Err(err)) => Err(err.to_string()),
|
||||
Err(payload) => Err(format!(
|
||||
"extractor panicked: {}",
|
||||
panic_payload_message(payload.as_ref())
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String {
|
||||
if let Some(message) = payload.downcast_ref::<&str>() {
|
||||
(*message).to_string()
|
||||
} else if let Some(message) = payload.downcast_ref::<String>() {
|
||||
message.clone()
|
||||
} else {
|
||||
"unknown panic".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn read_pdf_via_pdftotext(
|
||||
path: &Path,
|
||||
page_range: Option<(u32, u32)>,
|
||||
@@ -1380,6 +1406,17 @@ mod tests {
|
||||
assert!(single.content.contains("Recursive Language Models"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pdf_extract_panic_is_returned_as_tool_error_text() {
|
||||
let err = guard_pdf_extract(|| -> Result<String, &'static str> {
|
||||
panic!("assertion failed: name == \"Identity-H\"");
|
||||
})
|
||||
.expect_err("panic should become an error");
|
||||
|
||||
assert!(err.contains("extractor panicked"));
|
||||
assert!(err.contains("Identity-H"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn read_file_pdf_path_uses_pdf_extract_by_default() {
|
||||
if !sample_pdf_present() {
|
||||
|
||||
@@ -14,6 +14,7 @@ use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Value, json};
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::fmt::Display;
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::sync::{Arc, OnceLock};
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -1204,10 +1205,35 @@ fn parse_pdf_page(
|
||||
}
|
||||
|
||||
fn pdf_extract_text(bytes: &[u8]) -> Result<String, ToolError> {
|
||||
pdf_extract::extract_text_from_mem(bytes)
|
||||
guard_pdf_extract(|| pdf_extract::extract_text_from_mem(bytes))
|
||||
.map_err(|e| ToolError::execution_failed(format!("PDF extract failed: {e}")))
|
||||
}
|
||||
|
||||
fn guard_pdf_extract<T, E, F>(extract: F) -> Result<T, String>
|
||||
where
|
||||
E: Display,
|
||||
F: FnOnce() -> Result<T, E>,
|
||||
{
|
||||
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(extract)) {
|
||||
Ok(Ok(value)) => Ok(value),
|
||||
Ok(Err(err)) => Err(err.to_string()),
|
||||
Err(payload) => Err(format!(
|
||||
"extractor panicked: {}",
|
||||
panic_payload_message(payload.as_ref())
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String {
|
||||
if let Some(message) = payload.downcast_ref::<&str>() {
|
||||
(*message).to_string()
|
||||
} else if let Some(message) = payload.downcast_ref::<String>() {
|
||||
message.clone()
|
||||
} else {
|
||||
"unknown panic".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn split_pdf_pages(text: &str) -> Vec<Vec<String>> {
|
||||
let raw_pages: Vec<&str> = text.split('\x0C').collect();
|
||||
raw_pages
|
||||
@@ -1794,6 +1820,17 @@ mod tests {
|
||||
assert_eq!(percent_decode("foo+bar%20baz"), "foo+bar baz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pdf_extract_panic_is_returned_as_tool_error_text() {
|
||||
let err = guard_pdf_extract(|| -> Result<String, &'static str> {
|
||||
panic!("assertion failed: name == \"Identity-H\"");
|
||||
})
|
||||
.expect_err("panic should become an error");
|
||||
|
||||
assert!(err.contains("extractor panicked"));
|
||||
assert!(err.contains("Identity-H"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scoped_ref_prefix_is_session_specific() {
|
||||
let _lock = lock_web_run_test_state();
|
||||
|
||||
Reference in New Issue
Block a user