codewhale/crates/tui/src/tools/file.rs

//! File system tools: `read_file`, `write_file`, `edit_file`, `list_dir`
//!
//! These tools provide safe file system operations within the workspace,
//! with path validation to prevent escaping the workspace boundary.

use super::diff_format::make_unified_diff;
use super::spec::{
    ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec,
    lsp_diagnostics_for_paths, optional_str, required_str,
};
use async_trait::async_trait;
use serde_json::{Value, json};
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};

// === ReadFileTool ===

/// Tool for reading UTF-8 files from the workspace.
pub struct ReadFileTool;

#[async_trait]
impl ToolSpec for ReadFileTool {
    fn name(&self) -> &'static str {
        "read_file"
    }

    fn description(&self) -> &'static str {
        "Read a UTF-8 file from the workspace. Use this instead of `cat`, `head`, `tail`, or `sed -n '..p'` in `exec_shell` — it's faster, sandbox-aware, and skips the approval prompt. Plain text is returned as-is; PDFs are auto-extracted via the bundled pure-Rust extractor (no Poppler install required). Cannot read images or non-PDF binaries.\n\nFor large files, use `start_line` and `max_lines` to read in chunks. By default, returns at most 200 lines (~16KB). If `truncated=\"true\"` in the response, use `next_start_line` to continue reading. For PDFs, use `pages` instead — `start_line`/`max_lines` only apply to text files."
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the file (relative to workspace or absolute)"
                },
                "start_line": {
                    "type": "integer",
                    "description": "Starting line (1-based, default 1)"
                },
                "max_lines": {
                    "type": "integer",
                    "description": "Maximum lines to return (default 200, max 500)"
                },
                "pages": {
                    "type": "string",
                    "description": "PDF only: page range to extract, e.g. \"1-5\" or \"10\". Ignored for non-PDF files."
                }
            },
            "required": ["path"]
        })
    }

    fn capabilities(&self) -> Vec<ToolCapability> {
        vec![ToolCapability::ReadOnly, ToolCapability::Sandboxable]
    }

    fn supports_parallel(&self) -> bool {
        true
    }

    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
        // Bounded output for large files. The small-file fast path keeps the
        // historical "return contents unchanged" behavior so existing flows
        // (small configs, single source files, etc.) don't suddenly start
        // seeing wrapped output. Once a file is large or the caller asks
        // for an explicit range, we switch to a numbered, line-tagged
        // window with continuation hints so the model can page through
        // without re-loading the entire file on every turn. Harvested
        // from PR #1451 by @Oliver-ZPLiu, closes part of #1450.
        const DEFAULT_READ_LINES: usize = 200;
        const HARD_MAX_READ_LINES: usize = 500;
        const MAX_VISIBLE_BYTES: usize = 16 * 1024;
        const SMALL_FILE_LINES: usize = 200;
        const SMALL_FILE_BYTES: usize = 16 * 1024;

        let path_str = required_str(&input, "path")?;
        let file_path = context.resolve_path(path_str)?;
        let pages = optional_str(&input, "pages");

        if is_pdf(&file_path)? {
            return read_pdf(&file_path, pages);
        }

        let contents = fs::read_to_string(&file_path).map_err(|e| {
            ToolError::execution_failed(format!("Failed to read {}: {}", file_path.display(), e))
        })?;

        let total_lines = contents.lines().count();
        let total_bytes = contents.len();
        let explicit_range = input
            .get("start_line")
            .or_else(|| input.get("max_lines"))
            .is_some();

        // Small-file fast path. Only applies when the caller didn't pass an
        // explicit range — otherwise an explicit `start_line = 5` on a
        // tiny file would silently ignore the request.
        if !explicit_range && total_lines <= SMALL_FILE_LINES && total_bytes <= SMALL_FILE_BYTES {
            return Ok(ToolResult::success(contents));
        }

        let start_line = match input.get("start_line").and_then(Value::as_u64) {
            Some(0) => {
                return Err(ToolError::invalid_input(
                    "start_line must be 1-based and greater than 0".to_string(),
                ));
            }
            Some(v) => v as usize,
            None => 1,
        };

        let max_lines = match input.get("max_lines").and_then(Value::as_u64) {
            Some(0) => {
                return Err(ToolError::invalid_input(
                    "max_lines must be greater than 0".to_string(),
                ));
            }
            Some(v) => std::cmp::min(v as usize, HARD_MAX_READ_LINES),
            None => DEFAULT_READ_LINES,
        };

        // `start_line > total_lines` is not an error — it lets the model
        // page past the end without raising. Returns an empty-content
        // sentinel so subsequent reads can stop.
        if start_line > total_lines {
            let output = format!(
                "<file path=\"{path_str}\" total_lines=\"{total_lines}\" shown_lines=\"none\" truncated=\"false\">\n\
                 \n\
                 [NO CONTENT] start_line {start_line} is beyond total_lines {total_lines}.\n\
                 </file>"
            );
            return Ok(ToolResult::success(output));
        }

        let lines: Vec<&str> = contents.lines().collect();
        let zero_based_start = start_line - 1;
        let zero_based_end = std::cmp::min(zero_based_start + max_lines, total_lines);
        let shown_first = start_line;
        let shown_last = zero_based_end; // 1-based inclusive line number of the last shown line

        let mut numbered = String::new();
        for (offset, line) in lines[zero_based_start..zero_based_end].iter().enumerate() {
            let line_no = start_line + offset;
            numbered.push_str(&format!("{line_no:>6}│ {line}\n"));
        }

        // UTF-8-safe byte truncation of the rendered range.
        let truncated_by_bytes = numbered.len() > MAX_VISIBLE_BYTES;
        let shown_content = if truncated_by_bytes {
            let mut end = MAX_VISIBLE_BYTES;
            while end > 0 && !numbered.is_char_boundary(end) {
                end -= 1;
            }
            &numbered[..end]
        } else {
            &numbered
        };

        let truncated_by_lines = zero_based_end < total_lines;
        let truncated = truncated_by_lines || truncated_by_bytes;
        let next_start = zero_based_end + 1;

        let mut attrs = format!(
            "path=\"{path_str}\" total_lines=\"{total_lines}\" shown_lines=\"{shown_first}-{shown_last}\" truncated=\"{truncated}\""
        );
        if truncated_by_lines {
            attrs.push_str(&format!(" next_start_line=\"{next_start}\""));
        }

        let mut output = format!("<file {attrs}>\n{shown_content}");
        if truncated_by_lines {
            output.push_str(&format!(
                "\n[TRUNCATED] Showing lines {shown_first}-{shown_last} of {total_lines}. To continue, call read_file with path=\"{path_str}\" start_line={next_start} max_lines={max_lines}\n"
            ));
        }
        if truncated_by_bytes {
            output.push_str(
                "\n[TRUNCATED] The selected range exceeded 16KB. Continue with a smaller max_lines value.\n",
            );
        }
        output.push_str("</file>");

        Ok(ToolResult::success(output))
    }
}

/// Detect a PDF by extension OR by sniffing the `%PDF-` magic bytes.
/// Files without an extension are still recognized as PDFs when the header
/// matches.
fn is_pdf(path: &Path) -> Result<bool, ToolError> {
    if path
        .extension()
        .and_then(|e| e.to_str())
        .is_some_and(|ext| ext.eq_ignore_ascii_case("pdf"))
    {
        return Ok(true);
    }
    // Sniff first 4 bytes. Don't error if the file doesn't exist — let the
    // caller's `read_to_string` produce the canonical not-found error.
    let mut buf = [0u8; 4];
    let result = match fs::File::open(path) {
        Ok(mut f) => {
            use std::io::Read;
            f.read_exact(&mut buf).map(|_| buf)
        }
        Err(_) => return Ok(false),
    };
    Ok(matches!(result, Ok(b) if &b == b"%PDF"))
}

fn parse_pages_arg(spec: &str) -> Option<(u32, u32)> {
    let trimmed = spec.trim();
    if trimmed.is_empty() {
        return None;
    }
    if let Some((a, b)) = trimmed.split_once('-') {
        let start: u32 = a.trim().parse().ok()?;
        let end: u32 = b.trim().parse().ok()?;
        if start == 0 || end < start {
            return None;
        }
        Some((start, end))
    } else {
        let n: u32 = trimmed.parse().ok()?;
        if n == 0 {
            return None;
        }
        Some((n, n))
    }
}

fn read_pdf(path: &Path, pages: Option<&str>) -> Result<ToolResult, ToolError> {
    // Validate the `pages` spec once, up front, so both extractor paths
    // surface the same error shape on bad input.
    let page_range = match pages {
        Some(spec) => match parse_pages_arg(spec) {
            Some((start, end)) => Some((start, end)),
            None => {
                return Err(ToolError::invalid_input(format!(
                    "invalid `pages` value `{spec}` (expected `N` or `N-M`, e.g. `1-5`)"
                )));
            }
        },
        None => None,
    };

    // Default to the bundled pure-Rust `pdf-extract` reader: it removes
    // the install-poppler prerequisite that bit every new user, and the
    // crate is already a workspace dep (used by `web_run`'s URL fetch
    // path). Users with column-heavy / complex-table PDFs (academic
    // papers, financial filings) can opt into the historical
    // `pdftotext -layout` route by setting
    // `prefer_external_pdftotext = true` in `~/.config/deepseek/settings.toml`.
    let prefer_external = crate::settings::Settings::load()
        .map(|s| s.prefer_external_pdftotext)
        .unwrap_or(false);

    if prefer_external {
        read_pdf_via_pdftotext(path, page_range)
    } else {
        read_pdf_via_pdf_extract(path, page_range)
    }
}

fn read_pdf_via_pdf_extract(
    path: &Path,
    page_range: Option<(u32, u32)>,
) -> Result<ToolResult, ToolError> {
    let text = if let Some((start, end)) = page_range {
        // Page-by-page extraction so we can slice the requested window
        // without dragging every page through the caller's context.
        // pdf-extract returns pages in document order; `start`/`end` are
        // 1-indexed inclusive (validated above), so we convert to a
        // 0-indexed half-open slice with bounds clamping.
        let pages = pdf_extract::extract_text_by_pages(path).map_err(|e| {
            ToolError::execution_failed(format!(
                "pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)",
                path.display()
            ))
        })?;
        let total = pages.len();
        if total == 0 {
            String::new()
        } else {
            let start_idx = (start as usize).saturating_sub(1).min(total);
            let end_idx = (end as usize).min(total);
            if start_idx >= end_idx {
                String::new()
            } else {
                pages[start_idx..end_idx].join("\n")
            }
        }
    } else {
        pdf_extract::extract_text(path).map_err(|e| {
            ToolError::execution_failed(format!(
                "pdf-extract failed on {}: {e} (set `prefer_external_pdftotext = true` in settings.toml to retry via pdftotext)",
                path.display()
            ))
        })?
    };
    Ok(ToolResult::success(text))
}

fn read_pdf_via_pdftotext(
    path: &Path,
    page_range: Option<(u32, u32)>,
) -> Result<ToolResult, ToolError> {
    let mut cmd = Command::new("pdftotext");
    cmd.arg("-layout");

    if let Some((start, end)) = page_range {
        cmd.arg("-f").arg(start.to_string());
        cmd.arg("-l").arg(end.to_string());
    }

    cmd.arg(path).arg("-"); // output to stdout
    cmd.stdin(Stdio::null())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped());

    let child = match cmd.spawn() {
        Ok(c) => c,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
            // Structured "binary unavailable" — only reachable when the
            // user explicitly opted into the external path. Hints back at
            // both the install command and the in-tree default.
            return ToolResult::json(&json!({
                "type": "binary_unavailable",
                "path": path.display().to_string(),
                "kind": "pdf",
                "reason": "pdftotext not installed (prefer_external_pdftotext = true in settings)",
                "hint": "install poppler (macOS: `brew install poppler`; Debian/Ubuntu: `apt install poppler-utils`) — or unset `prefer_external_pdftotext` to use the bundled pure-Rust extractor"
            }))
            .map_err(|e| {
                ToolError::execution_failed(format!("failed to serialize response: {e}"))
            });
        }
        Err(e) => {
            return Err(ToolError::execution_failed(format!(
                "failed to launch pdftotext: {e}"
            )));
        }
    };

    let output = child
        .wait_with_output()
        .map_err(|e| ToolError::execution_failed(format!("pdftotext failed to complete: {e}")))?;

    if !output.status.success() {
        let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
        return Err(ToolError::execution_failed(format!(
            "pdftotext failed (exit {:?}): {stderr}",
            output.status.code()
        )));
    }

    let text = String::from_utf8_lossy(&output.stdout).to_string();
    Ok(ToolResult::success(text))
}

// === WriteFileTool ===

/// Tool for writing UTF-8 files to the workspace.
pub struct WriteFileTool;

#[async_trait]
impl ToolSpec for WriteFileTool {
    fn name(&self) -> &'static str {
        "write_file"
    }

    fn description(&self) -> &'static str {
        "Write content to a UTF-8 file in the workspace. Use this instead of heredocs (`cat <<EOF > file`) or `echo > file` in `exec_shell` — diffs render inline and approval is handled cleanly. Creates or overwrites; parent directories are auto-created."
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the file"
                },
                "content": {
                    "type": "string",
                    "description": "Content to write"
                }
            },
            "required": ["path", "content"]
        })
    }

    fn capabilities(&self) -> Vec<ToolCapability> {
        vec![
            ToolCapability::WritesFiles,
            ToolCapability::Sandboxable,
            ToolCapability::RequiresApproval,
        ]
    }

    fn approval_requirement(&self) -> ApprovalRequirement {
        ApprovalRequirement::Suggest
    }

    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
        let path_str = required_str(&input, "path")?;
        let file_content = required_str(&input, "content")?;

        let file_path = context.resolve_path(path_str)?;

        // Snapshot the existing contents (if any) before we overwrite — used
        // to render an inline diff in the tool result.
        let existed_before = file_path.exists();
        let prior_contents = if existed_before {
            fs::read_to_string(&file_path).unwrap_or_default()
        } else {
            String::new()
        };

        // Create parent directories if needed
        if let Some(parent) = file_path.parent() {
            fs::create_dir_all(parent).map_err(|e| {
                ToolError::execution_failed(format!(
                    "Failed to create directory {}: {}",
                    parent.display(),
                    e
                ))
            })?;
        }

        fs::write(&file_path, file_content).map_err(|e| {
            ToolError::execution_failed(format!("Failed to write {}: {}", file_path.display(), e))
        })?;

        let display = file_path.display().to_string();
        let diff = make_unified_diff(&display, &prior_contents, file_content);
        let summary = if existed_before {
            format!("Wrote {} bytes to {}", file_content.len(), display)
        } else {
            format!("Created {} ({} bytes)", display, file_content.len())
        };
        let body = if diff.is_empty() {
            format!("{summary}\n(no changes)")
        } else {
            format!("{diff}\n{summary}")
        };

        // Append LSP diagnostics for the written file when enabled (#428).
        let diag_block = lsp_diagnostics_for_paths(context, &[file_path]).await;
        let full_body = if diag_block.is_empty() {
            body
        } else {
            format!("{body}\n{diag_block}")
        };

        Ok(ToolResult::success(full_body))
    }
}

// === EditFileTool ===

/// Tool for search/replace editing of files.
pub struct EditFileTool;

#[async_trait]
impl ToolSpec for EditFileTool {
    fn name(&self) -> &'static str {
        "edit_file"
    }

    fn description(&self) -> &'static str {
        "Replace text in a single file via exact search/replace. Use this instead of `sed -i` in `exec_shell` for in-place edits. For multi-hunk or cross-file changes, use `apply_patch` instead. Required: 'path', 'search' (exact text to find), 'replace' (text to substitute)."
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Path to the file"
                },
                "search": {
                    "type": "string",
                    "description": "Text to search for"
                },
                "replace": {
                    "type": "string",
                    "description": "Text to replace with"
                }
            },
            "required": ["path", "search", "replace"]
        })
    }

    fn capabilities(&self) -> Vec<ToolCapability> {
        vec![
            ToolCapability::WritesFiles,
            ToolCapability::Sandboxable,
            ToolCapability::RequiresApproval,
        ]
    }

    fn approval_requirement(&self) -> ApprovalRequirement {
        ApprovalRequirement::Suggest
    }

    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
        let path_str = required_str(&input, "path")?;
        let search = required_str(&input, "search")?;
        let replace = required_str(&input, "replace")?;

        if search == replace {
            return Err(ToolError::invalid_input(
                "search and replace are identical, no change intended",
            ));
        }

        let file_path = context.resolve_path(path_str)?;

        let contents = fs::read_to_string(&file_path).map_err(|e| {
            ToolError::execution_failed(format!("Failed to read {}: {}", file_path.display(), e))
        })?;

        let count = contents.matches(search).count();
        if count == 0 {
            return Err(ToolError::execution_failed(format!(
                "Search string not found in {}",
                file_path.display()
            )));
        }

        let updated = contents.replace(search, replace);

        fs::write(&file_path, &updated).map_err(|e| {
            ToolError::execution_failed(format!("Failed to write {}: {}", file_path.display(), e))
        })?;

        let display = file_path.display().to_string();
        let diff = make_unified_diff(&display, &contents, &updated);
        let summary = format!("Replaced {count} occurrence(s) in {display}");
        let body = if diff.is_empty() {
            format!("{summary}\n(no textual changes)")
        } else {
            format!("{diff}\n{summary}")
        };

        // Append LSP diagnostics for the edited file when enabled (#428).
        let diag_block = lsp_diagnostics_for_paths(context, &[file_path]).await;
        let full_body = if diag_block.is_empty() {
            body
        } else {
            format!("{body}\n{diag_block}")
        };

        Ok(ToolResult::success(full_body))
    }
}

// === ListDirTool ===

/// Tool for listing directory contents.
pub struct ListDirTool;

#[async_trait]
impl ToolSpec for ListDirTool {
    fn name(&self) -> &'static str {
        "list_dir"
    }

    fn description(&self) -> &'static str {
        "List entries in a directory relative to the workspace. Use this instead of `ls`, `ls -la`, or `find . -maxdepth 1` in `exec_shell` for directory listings."
    }

    fn input_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "path": {
                    "type": "string",
                    "description": "Relative path (default: .)"
                }
            },
            "required": []
        })
    }

    fn capabilities(&self) -> Vec<ToolCapability> {
        vec![ToolCapability::ReadOnly, ToolCapability::Sandboxable]
    }

    fn supports_parallel(&self) -> bool {
        true
    }

    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
        let path_str = optional_str(&input, "path").unwrap_or(".");
        let dir_path = context.resolve_path(path_str)?;

        let mut entries = Vec::new();

        for entry in fs::read_dir(&dir_path).map_err(|e| {
            ToolError::execution_failed(format!(
                "Failed to read directory {}: {}",
                dir_path.display(),
                e
            ))
        })? {
            let entry = entry.map_err(|e| ToolError::execution_failed(e.to_string()))?;
            let file_type = entry
                .file_type()
                .map_err(|e| ToolError::execution_failed(e.to_string()))?;

            entries.push(json!({
                "name": entry.file_name().to_string_lossy().to_string(),
                "is_dir": file_type.is_dir(),
            }));
        }

        ToolResult::json(&entries).map_err(|e| ToolError::execution_failed(e.to_string()))
    }
}

// === Unit Tests ===

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    #[tokio::test]
    async fn test_read_file_tool() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        // Create a test file
        let test_file = tmp.path().join("test.txt");
        fs::write(&test_file, "hello world").expect("write");

        let tool = ReadFileTool;
        let result = tool
            .execute(json!({"path": "test.txt"}), &ctx)
            .await
            .expect("execute");

        assert!(result.success);
        assert_eq!(result.content, "hello world");
    }

    #[test]
    fn parse_pages_arg_accepts_single_page() {
        assert_eq!(parse_pages_arg("3"), Some((3, 3)));
        assert_eq!(parse_pages_arg("  7  "), Some((7, 7)));
    }

    #[test]
    fn parse_pages_arg_accepts_range() {
        assert_eq!(parse_pages_arg("1-5"), Some((1, 5)));
        assert_eq!(parse_pages_arg("10-20"), Some((10, 20)));
        // Whitespace around either side of the dash is tolerated so
        // hand-typed `pages: "1 - 5"` still works.
        assert_eq!(parse_pages_arg(" 1 - 5 "), Some((1, 5)));
    }

    #[test]
    fn parse_pages_arg_rejects_invalid_ranges() {
        // Caller would otherwise feed `pdftotext -f 5 -l 1`, which
        // prints nothing — fail loudly so the model can re-issue.
        assert!(parse_pages_arg("5-1").is_none(), "end < start must reject");
        // 0-indexed pages aren't a thing in pdftotext; reject so the
        // caller doesn't get a confusing "no output" silent fail.
        assert!(
            parse_pages_arg("0").is_none(),
            "zero single-page must reject"
        );
        assert!(parse_pages_arg("0-3").is_none(), "zero start must reject");
        // Empty / whitespace-only / non-numeric inputs must reject.
        assert!(parse_pages_arg("").is_none());
        assert!(parse_pages_arg("   ").is_none());
        assert!(parse_pages_arg("abc").is_none());
        assert!(parse_pages_arg("3.5").is_none(), "floats must reject");
    }

    #[test]
    fn parse_pages_arg_rejects_half_open_ranges() {
        // Half-open ranges like `1-` or `-5` are almost certainly a
        // typo for `1-N`/`N` rather than intentional input. Reject
        // them rather than silently extending to u32::MAX or 0.
        assert!(parse_pages_arg("1-").is_none());
        assert!(parse_pages_arg("-5").is_none());
        assert!(parse_pages_arg("-").is_none());
    }

    #[test]
    fn parse_pages_arg_rejects_negative_numbers() {
        // u32::parse on a negative literal returns Err, so the
        // function reports `None` rather than wrapping into a giant
        // positive number — defensive but worth pinning.
        assert!(parse_pages_arg("-3-5").is_none());
    }

    #[tokio::test]
    async fn test_read_file_not_found() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let tool = ReadFileTool;
        let result = tool.execute(json!({"path": "nonexistent.txt"}), &ctx).await;

        assert!(result.is_err());
    }

    #[tokio::test]
    async fn read_file_small_file_returns_unwrapped_contents() {
        // Small files (≤ 200 lines AND ≤ 16KB, no explicit range) keep
        // the historical "return contents unchanged" behavior so
        // existing prompts don't suddenly see <file> tags appear.
        // Harvested from #1451 — pin the fast-path contract.
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        let file = tmp.path().join("small.txt");
        fs::write(&file, "line 1\nline 2\nline 3\n").expect("write");
        let tool = ReadFileTool;
        let result = tool
            .execute(json!({ "path": "small.txt" }), &ctx)
            .await
            .expect("execute");
        assert!(result.success);
        assert_eq!(result.content, "line 1\nline 2\nline 3\n");
        assert!(
            !result.content.contains("<file"),
            "small-file fast path must not wrap output"
        );
    }

    #[tokio::test]
    async fn read_file_explicit_range_wraps_in_file_tag_with_one_based_lines() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        let file = tmp.path().join("ranged.txt");
        let body: String = (1..=10).map(|n| format!("line {n}\n")).collect();
        fs::write(&file, &body).expect("write");
        let tool = ReadFileTool;
        let result = tool
            .execute(
                json!({ "path": "ranged.txt", "start_line": 3, "max_lines": 4 }),
                &ctx,
            )
            .await
            .expect("execute");
        assert!(result.success);
        assert!(
            result.content.contains("shown_lines=\"3-6\""),
            "1-based inclusive range must be reflected in shown_lines: {}",
            result.content
        );
        assert!(
            result.content.contains("next_start_line=\"7\""),
            "next_start_line must point one past the last shown line: {}",
            result.content
        );
        assert!(
            result.content.contains("     3│ line 3"),
            "rendered lines must start at the requested line number"
        );
        assert!(
            result.content.contains("     6│ line 6"),
            "rendered lines must end at the last in-range line"
        );
        assert!(
            !result.content.contains("     7│ line 7"),
            "lines past max_lines must be excluded"
        );
        assert!(result.content.contains("truncated=\"true\""));
    }

    #[tokio::test]
    async fn read_file_range_beyond_total_returns_no_content_sentinel() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        let file = tmp.path().join("short.txt");
        fs::write(&file, "only\nthree\nlines\n").expect("write");
        let tool = ReadFileTool;
        let result = tool
            .execute(json!({ "path": "short.txt", "start_line": 99 }), &ctx)
            .await
            .expect("execute");
        assert!(
            result.success,
            "out-of-range must not raise — it's a sentinel"
        );
        assert!(result.content.contains("[NO CONTENT]"));
        assert!(result.content.contains("shown_lines=\"none\""));
        assert!(result.content.contains("truncated=\"false\""));
    }

    #[tokio::test]
    async fn read_file_rejects_zero_start_line_and_zero_max_lines() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        fs::write(tmp.path().join("any.txt"), "x\n").expect("write");
        let tool = ReadFileTool;
        let zero_start = tool
            .execute(json!({ "path": "any.txt", "start_line": 0 }), &ctx)
            .await;
        assert!(zero_start.is_err(), "start_line=0 must error (1-based)");
        let zero_max = tool
            .execute(json!({ "path": "any.txt", "max_lines": 0 }), &ctx)
            .await;
        assert!(zero_max.is_err(), "max_lines=0 must error");
    }

    #[tokio::test]
    async fn read_file_clamps_max_lines_to_hard_cap() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        let file = tmp.path().join("bigish.txt");
        let body: String = (1..=600).map(|n| format!("L{n}\n")).collect();
        fs::write(&file, &body).expect("write");
        let tool = ReadFileTool;
        let result = tool
            .execute(json!({ "path": "bigish.txt", "max_lines": 5000 }), &ctx)
            .await
            .expect("execute");
        // Hard cap is 500 lines; line 500 must appear, line 501 must not.
        assert!(
            result.content.contains("   500│ L500"),
            "line 500 should be in the window (max_lines clamped to 500)"
        );
        assert!(
            !result.content.contains("   501│ L501"),
            "line 501 must be outside the clamped window"
        );
        assert!(result.content.contains("next_start_line=\"501\""));
        assert!(result.content.contains("truncated=\"true\""));
    }

    #[tokio::test]
    async fn read_file_large_file_without_range_uses_default_window() {
        // A file over 200 lines / 16KB with no explicit range still
        // gets the default window, not the unbounded raw content —
        // this is the entire point of the patch (token-budget control).
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());
        let file = tmp.path().join("big.txt");
        let body: String = (1..=250).map(|n| format!("row {n}\n")).collect();
        fs::write(&file, &body).expect("write");
        let tool = ReadFileTool;
        let result = tool
            .execute(json!({ "path": "big.txt" }), &ctx)
            .await
            .expect("execute");
        assert!(result.content.contains("<file "));
        assert!(result.content.contains("shown_lines=\"1-200\""));
        assert!(result.content.contains("next_start_line=\"201\""));
        assert!(result.content.contains("     1│ row 1"));
        assert!(result.content.contains("   200│ row 200"));
        assert!(
            !result.content.contains("   201│ row 201"),
            "default max_lines=200 must hold"
        );
    }

    #[tokio::test]
    async fn test_read_file_missing_path() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let tool = ReadFileTool;
        let result = tool.execute(json!({}), &ctx).await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(
            err.to_string()
                .contains("Failed to validate input: missing required field 'path'")
        );
    }

    #[test]
    fn pdf_detected_by_extension() {
        let tmp = tempdir().expect("tempdir");
        let path = tmp.path().join("paper.PDF");
        fs::write(&path, b"not really a pdf, but extension says yes").unwrap();
        assert!(is_pdf(&path).unwrap());
    }

    #[test]
    fn pdf_detected_by_magic_bytes_without_extension() {
        let tmp = tempdir().expect("tempdir");
        let path = tmp.path().join("blob");
        fs::write(&path, b"%PDF-1.7\nrest of bytes").unwrap();
        assert!(is_pdf(&path).unwrap());
    }

    #[test]
    fn non_pdf_not_detected() {
        let tmp = tempdir().expect("tempdir");
        let path = tmp.path().join("notes.txt");
        fs::write(&path, "hello").unwrap();
        assert!(!is_pdf(&path).unwrap());
    }

    #[test]
    fn pages_arg_parses_single_and_range() {
        assert_eq!(parse_pages_arg("5"), Some((5, 5)));
        assert_eq!(parse_pages_arg("1-10"), Some((1, 10)));
        assert_eq!(parse_pages_arg(" 3 - 7 "), Some((3, 7)));
        assert_eq!(parse_pages_arg("0"), None);
        assert_eq!(parse_pages_arg("10-3"), None);
        assert_eq!(parse_pages_arg(""), None);
        assert_eq!(parse_pages_arg("abc"), None);
    }

    /// Sample PDF shipped with the repo for parity tests against the
    /// pure-Rust extractor. 38 pages, born-digital LaTeX (arXiv 2512.24601).
    /// Path is workspace-root-relative because the fixture lives outside
    /// the tui crate.
    const SAMPLE_PDF_PATH: &str = "../../docs/2512.24601v2.pdf";

    fn sample_pdf_present() -> bool {
        std::path::Path::new(SAMPLE_PDF_PATH).exists()
    }

    #[test]
    fn read_pdf_via_pdf_extract_finds_known_title() {
        // Skip when the fixture isn't checked out (sparse clones, shallow
        // worktrees). Local dev + CI both have it.
        if !sample_pdf_present() {
            // Fixture not present (sparse / shallow checkout). Silent
            // skip — `cargo test` reports the same `ok` either way.
            return;
        }
        let path = std::path::PathBuf::from(SAMPLE_PDF_PATH);
        let result = read_pdf_via_pdf_extract(&path, None).expect("extract whole PDF");
        assert!(result.success);
        assert!(
            result.content.contains("Recursive Language Models"),
            "pdf-extract should recover the document title; got prefix {:?}",
            &result.content.chars().take(200).collect::<String>()
        );
    }

    #[test]
    fn read_pdf_via_pdf_extract_respects_pages_window() {
        if !sample_pdf_present() {
            // Fixture not present (sparse / shallow checkout). Silent
            // skip — `cargo test` reports the same `ok` either way.
            return;
        }
        let path = std::path::PathBuf::from(SAMPLE_PDF_PATH);
        let single = read_pdf_via_pdf_extract(&path, Some((1, 1))).expect("single page");
        let two = read_pdf_via_pdf_extract(&path, Some((1, 2))).expect("two pages");
        assert!(single.success);
        assert!(two.success);
        // A two-page slice must be at least as long as the one-page slice
        // (most documents have non-trivial body text past page 1).
        assert!(
            two.content.len() >= single.content.len(),
            "expected pages 1-2 ({} bytes) >= page 1 ({} bytes)",
            two.content.len(),
            single.content.len()
        );
        // Title text lives on page 1 — must survive the window crop.
        assert!(single.content.contains("Recursive Language Models"));
    }

    #[tokio::test]
    async fn read_file_pdf_path_uses_pdf_extract_by_default() {
        if !sample_pdf_present() {
            // Fixture not present (sparse / shallow checkout). Silent
            // skip — `cargo test` reports the same `ok` either way.
            return;
        }
        // The fixture lives outside the tui crate, so we point ToolContext
        // at the workspace root and read by relative path. This exercises
        // the full ReadFileTool::execute → is_pdf → read_pdf dispatch on
        // the bundled extractor (no pdftotext required on the test host).
        let workspace = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../../");
        let ctx = ToolContext::new(workspace);
        let result = ReadFileTool
            .execute(json!({"path": "docs/2512.24601v2.pdf", "pages": "1"}), &ctx)
            .await
            .expect("execute");
        assert!(result.success);
        assert!(
            result.content.contains("Recursive Language Models"),
            "page-1 extraction must surface the title"
        );
    }

    /// Serialises tests that mutate `DEEPSEEK_CONFIG_PATH` so they don't
    /// race against each other — env vars are process-global and the
    /// settings loader inspects this var on every call.
    static DS_CONFIG_PATH_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());

    struct ConfigPathEnvGuard {
        prior: Option<std::ffi::OsString>,
    }
    impl ConfigPathEnvGuard {
        fn capture() -> Self {
            Self {
                prior: std::env::var_os("DEEPSEEK_CONFIG_PATH"),
            }
        }
    }
    impl Drop for ConfigPathEnvGuard {
        fn drop(&mut self) {
            // Safety: scoped to test process; reverts to the captured value.
            match &self.prior {
                Some(v) => unsafe { std::env::set_var("DEEPSEEK_CONFIG_PATH", v) },
                None => unsafe { std::env::remove_var("DEEPSEEK_CONFIG_PATH") },
            }
        }
    }

    #[test]
    fn read_pdf_routes_to_pdftotext_when_setting_opted_in() {
        // Two concerns in one test: with `prefer_external_pdftotext = true`
        // the dispatch must (a) call pdftotext when present, and (b) return
        // the structured `binary_unavailable` response when pdftotext is
        // missing — both branches were covered by the pre-v0.8.32 default.
        // Sync test (calls `read_pdf` directly, not the async ReadFileTool
        // wrapper) so the env-var lock is never held across an `.await`.
        let _lock = DS_CONFIG_PATH_LOCK.lock().unwrap();
        let _guard = ConfigPathEnvGuard::capture();

        let tmp = tempdir().expect("tempdir");
        let config_dir = tmp.path().join("cfg");
        fs::create_dir_all(&config_dir).unwrap();
        let config_path = config_dir.join("config.toml");
        fs::write(&config_path, "").unwrap();
        // The sibling settings.toml is what Settings::load() reads.
        fs::write(
            config_dir.join("settings.toml"),
            "prefer_external_pdftotext = true\n",
        )
        .unwrap();
        // Safety: serialised by DS_CONFIG_PATH_LOCK; reverted by guard.
        unsafe {
            std::env::set_var("DEEPSEEK_CONFIG_PATH", &config_path);
        }

        let pdf_path = tmp.path().join("doc.pdf");
        fs::write(&pdf_path, b"%PDF-1.7\n%%EOF").unwrap();
        let outcome = read_pdf(&pdf_path, None);

        let pdftotext_present = Command::new("pdftotext")
            .arg("-v")
            .stdout(Stdio::null())
            .stderr(Stdio::null())
            .status()
            .is_ok();

        if pdftotext_present {
            // pdftotext on a stub `%PDF-1.7\n%%EOF` cannot find a real
            // trailer/xref table and fails with `exit 1`. That failure
            // text mentions pdftotext explicitly — proof we routed
            // through Poppler rather than falling back to the bundled
            // extractor. Validate by inspecting the error message.
            let err = outcome.expect_err("malformed PDF must surface the pdftotext error");
            let msg = err.to_string();
            assert!(
                msg.contains("pdftotext"),
                "error message must reference pdftotext; got {msg}"
            );
        } else {
            let result = outcome.expect("binary_unavailable is a structured success, not an Err");
            assert!(result.success);
            assert!(result.content.contains("binary_unavailable"));
            assert!(result.content.contains("pdftotext"));
            assert!(
                result.content.contains("prefer_external_pdftotext"),
                "hint must reference the opt-in flag the user set"
            );
        }
    }

    #[tokio::test]
    async fn test_write_file_tool() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let tool = WriteFileTool;
        let result = tool
            .execute(
                json!({"path": "output.txt", "content": "test content"}),
                &ctx,
            )
            .await
            .expect("execute");

        assert!(result.success);
        // New file → "Created …" summary; the unified diff above the summary
        // primes the TUI's diff-aware renderer (#505).
        assert!(result.content.contains("Created"), "{}", result.content);
        assert!(result.content.contains("--- a/"), "{}", result.content);
        assert!(
            result.content.contains("+test content"),
            "{}",
            result.content
        );

        // Verify file was written
        let written = fs::read_to_string(tmp.path().join("output.txt")).expect("read");
        assert_eq!(written, "test content");
    }

    #[tokio::test]
    async fn test_write_file_creates_dirs() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let tool = WriteFileTool;
        let result = tool
            .execute(
                json!({"path": "subdir/nested/file.txt", "content": "nested content"}),
                &ctx,
            )
            .await
            .expect("execute");

        assert!(result.success);

        // Verify nested file was created
        let written = fs::read_to_string(tmp.path().join("subdir/nested/file.txt")).expect("read");
        assert_eq!(written, "nested content");
    }

    #[tokio::test]
    async fn test_edit_file_tool() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        // Create a file to edit
        let test_file = tmp.path().join("edit_me.txt");
        fs::write(&test_file, "hello world hello").expect("write");

        let tool = EditFileTool;
        let result = tool
            .execute(
                json!({"path": "edit_me.txt", "search": "hello", "replace": "hi"}),
                &ctx,
            )
            .await
            .expect("execute");

        assert!(result.success);
        assert!(result.content.contains("2 occurrence(s)"));
        // Inline diff (#505) — the unified diff lands above the summary
        // line so the TUI's diff-aware renderer kicks in.
        assert!(result.content.contains("--- a/"), "{}", result.content);
        assert!(
            result.content.contains("-hello world hello"),
            "{}",
            result.content
        );
        assert!(
            result.content.contains("+hi world hi"),
            "{}",
            result.content
        );

        // Verify edit was applied
        let edited = fs::read_to_string(&test_file).expect("read");
        assert_eq!(edited, "hi world hi");
    }

    #[tokio::test]
    async fn test_edit_file_not_found() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        // Create a file without the search string
        let test_file = tmp.path().join("no_match.txt");
        fs::write(&test_file, "foo bar baz").expect("write");

        let tool = EditFileTool;
        let result = tool
            .execute(
                json!({"path": "no_match.txt", "search": "hello", "replace": "hi"}),
                &ctx,
            )
            .await;

        assert!(result.is_err());
        let err = result.unwrap_err();
        assert!(err.to_string().contains("not found"));
    }

    #[tokio::test]
    async fn test_edit_file_rejects_identical_search_and_replace() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let test_file = tmp.path().join("same.txt");
        fs::write(&test_file, "a := \"foo\"").expect("write");

        let tool = EditFileTool;
        let result = tool
            .execute(
                json!({
                    "path": "same.txt",
                    "search": "a := \"foo\"",
                    "replace": "a := \"foo\""
                }),
                &ctx,
            )
            .await;

        assert!(result.is_err());
        let err = result.unwrap_err().to_string();
        assert!(
            err.contains("search and replace are identical"),
            "error must explain the no-op input: {err}"
        );
        let unchanged = fs::read_to_string(&test_file).expect("read");
        assert_eq!(unchanged, "a := \"foo\"");
    }

    /// #157 — When the model uses `replacement` instead of `replace`,
    /// the error should name the provided fields so the model can
    /// self-correct without a second round-trip.
    #[tokio::test]
    async fn test_edit_file_wrong_param_name_shows_provided_fields() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        let test_file = tmp.path().join("test.txt");
        fs::write(&test_file, "hello world").expect("write");

        let tool = EditFileTool;
        // Model uses `replacement` instead of `replace`.
        let result = tool
            .execute(
                json!({"path": "test.txt", "search": "hello", "replacement": "hi"}),
                &ctx,
            )
            .await;

        assert!(result.is_err());
        let err = result.unwrap_err().to_string();
        // The error must name both the missing field AND the provided ones.
        assert!(
            err.contains("missing required field 'replace'"),
            "error must name the missing field: {err}"
        );
        assert!(
            err.contains("Input provided:") || err.contains("provided:"),
            "error must list the fields the model did supply: {err}"
        );
    }

    #[tokio::test]
    async fn test_list_dir_tool() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        // Create some files and directories
        fs::write(tmp.path().join("file1.txt"), "").expect("write");
        fs::write(tmp.path().join("file2.txt"), "").expect("write");
        fs::create_dir(tmp.path().join("subdir")).expect("mkdir");

        let tool = ListDirTool;
        let result = tool.execute(json!({}), &ctx).await.expect("execute");

        assert!(result.success);
        assert!(result.content.contains("file1.txt"));
        assert!(result.content.contains("file2.txt"));
        assert!(result.content.contains("subdir"));
        assert!(result.content.contains("\"is_dir\": true"));
    }

    #[tokio::test]
    async fn test_list_dir_with_path() {
        let tmp = tempdir().expect("tempdir");
        let ctx = ToolContext::new(tmp.path().to_path_buf());

        // Create a subdirectory with files
        let subdir = tmp.path().join("mydir");
        fs::create_dir(&subdir).expect("mkdir");
        fs::write(subdir.join("nested.txt"), "").expect("write");

        let tool = ListDirTool;
        let result = tool
            .execute(json!({"path": "mydir"}), &ctx)
            .await
            .expect("execute");

        assert!(result.success);
        assert!(result.content.contains("nested.txt"));
    }

    #[test]
    fn test_read_file_tool_properties() {
        let tool = ReadFileTool;
        assert_eq!(tool.name(), "read_file");
        assert!(tool.is_read_only());
        assert!(tool.is_sandboxable());
        assert_eq!(tool.approval_requirement(), ApprovalRequirement::Auto);
    }

    #[test]
    fn test_write_file_tool_properties() {
        let tool = WriteFileTool;
        assert_eq!(tool.name(), "write_file");
        assert!(!tool.is_read_only());
        assert!(tool.is_sandboxable());
        assert_eq!(tool.approval_requirement(), ApprovalRequirement::Suggest);
    }

    #[test]
    fn test_edit_file_tool_properties() {
        let tool = EditFileTool;
        assert_eq!(tool.name(), "edit_file");
        assert!(!tool.is_read_only());
        assert!(tool.is_sandboxable());
        assert_eq!(tool.approval_requirement(), ApprovalRequirement::Suggest);
    }

    #[test]
    fn test_list_dir_tool_properties() {
        let tool = ListDirTool;
        assert_eq!(tool.name(), "list_dir");
        assert!(tool.is_read_only());
        assert!(tool.is_sandboxable());
        assert_eq!(tool.approval_requirement(), ApprovalRequirement::Auto);
    }

    #[test]
    fn test_parallel_support_flags() {
        let read_tool = ReadFileTool;
        let list_tool = ListDirTool;
        let write_tool = WriteFileTool;

        assert!(read_tool.supports_parallel());
        assert!(list_tool.supports_parallel());
        assert!(!write_tool.supports_parallel());
    }

    #[test]
    fn test_input_schemas() {
        // Verify all tools have valid JSON schemas
        let read_schema = ReadFileTool.input_schema();
        assert!(read_schema.get("type").is_some());
        assert!(read_schema.get("properties").is_some());

        let write_schema = WriteFileTool.input_schema();
        let required = write_schema
            .get("required")
            .and_then(|value| value.as_array())
            .expect("write schema should include required array");
        assert!(required.iter().any(|v| v.as_str() == Some("path")));
        assert!(required.iter().any(|v| v.as_str() == Some("content")));

        let edit_schema = EditFileTool.input_schema();
        let required = edit_schema
            .get("required")
            .and_then(|value| value.as_array())
            .expect("edit schema should include required array");
        assert_eq!(required.len(), 3);

        let list_schema = ListDirTool.input_schema();
        let required = list_schema
            .get("required")
            .and_then(|value| value.as_array())
            .expect("list schema should include required array");
        assert!(required.is_empty()); // path is optional
    }
}