From 1721393b971453bce9625e5654e997f5e37a22ca Mon Sep 17 00:00:00 2001 From: Hanmiao Li <894876246@qq.com> Date: Mon, 8 Jun 2026 15:27:14 +0800 Subject: [PATCH] fix(pdf): use extract_text_by_pages to avoid hang on full-PDF reads `pdf_extract::extract_text` uses an internal codepath that can hang on certain PDF cross-reference tables or font encodings. The per-page `extract_text_by_pages` path does not trigger this hang and produces identical output when joined. When `pages` is not specified, route through `extract_text_by_pages` and join all pages instead of calling `extract_text`. Fixes #2641. --- crates/tui/src/tools/file.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/crates/tui/src/tools/file.rs b/crates/tui/src/tools/file.rs index 671f1366..084c05c8 100644 --- a/crates/tui/src/tools/file.rs +++ b/crates/tui/src/tools/file.rs @@ -361,12 +361,18 @@ fn read_pdf_via_pdf_extract( } } } else { - pdf_extract::extract_text(path).map_err(|e| { - ToolError::execution_failed(format!( - "pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)", - path.display() - )) - })? + // Call extract_text_by_pages even when the caller wants every page: + // extract_text uses an internal codepath that can hang on certain PDF + // cross-reference tables or font encodings (#2641). The per-page path + // avoids that hang and produces identical output when joined. + pdf_extract::extract_text_by_pages(path) + .map(|pages| pages.join("\n")) + .map_err(|e| { + ToolError::execution_failed(format!( + "pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)", + path.display() + )) + })? }; Ok(ToolResult::success(clean_pdf_text(&text))) }