diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a6466a7..b0a109e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2026 cleanly; it is purely a rendering-quality knob, not a correctness one. Set via `/set synchronized_output ` or in `~/.config/deepseek/settings.toml`. +- **`read_file` accepts `start_line` and `max_lines`** for chunked, + bounded reads of large files (#1450, harvested from PR #1451 by + **@Oliver-ZPLiu**). Default window is 200 lines / ~16 KB; the hard + cap is 500 lines. Small files (≤ 200 lines AND ≤ 16 KB) still + return their contents unchanged, so existing prompts that read + config files / single source files see no behavior change. Large + files now return a ``-wrapped, line-numbered window with + `shown_lines`, `truncated`, and `next_start_line` attributes plus + a `[TRUNCATED]` continuation hint — so the model can page through + a 50 KB file in 16 KB slices instead of dragging the whole thing + into the conversation context on every turn. PDFs continue to use + `pages`; `start_line` / `max_lines` apply to text files only. ## [0.8.30] - 2026-05-11 diff --git a/crates/tui/src/tools/file.rs b/crates/tui/src/tools/file.rs index ebbd66e8..d0d9ad55 100644 --- a/crates/tui/src/tools/file.rs +++ b/crates/tui/src/tools/file.rs @@ -26,7 +26,7 @@ impl ToolSpec for ReadFileTool { } fn description(&self) -> &'static str { - "Read a UTF-8 file from the workspace. Use this instead of `cat`, `head`, `tail`, or `sed -n '..p'` in `exec_shell` — it's faster, sandbox-aware, and skips the approval prompt. Plain text is returned as-is; PDFs are auto-extracted via `pdftotext` (poppler) when available. Cannot read images or non-PDF binaries." + "Read a UTF-8 file from the workspace. Use this instead of `cat`, `head`, `tail`, or `sed -n '..p'` in `exec_shell` — it's faster, sandbox-aware, and skips the approval prompt. Plain text is returned as-is; PDFs are auto-extracted via `pdftotext` (poppler) when available. Cannot read images or non-PDF binaries.\n\nFor large files, use `start_line` and `max_lines` to read in chunks. By default, returns at most 200 lines (~16KB). If `truncated=\"true\"` in the response, use `next_start_line` to continue reading. For PDFs, use `pages` instead — `start_line`/`max_lines` only apply to text files." } fn input_schema(&self) -> Value { @@ -37,6 +37,14 @@ impl ToolSpec for ReadFileTool { "type": "string", "description": "Path to the file (relative to workspace or absolute)" }, + "start_line": { + "type": "integer", + "description": "Starting line (1-based, default 1)" + }, + "max_lines": { + "type": "integer", + "description": "Maximum lines to return (default 200, max 500)" + }, "pages": { "type": "string", "description": "PDF only: page range to extract, e.g. \"1-5\" or \"10\". Ignored for non-PDF files." @@ -55,6 +63,20 @@ impl ToolSpec for ReadFileTool { } async fn execute(&self, input: Value, context: &ToolContext) -> Result { + // Bounded output for large files. The small-file fast path keeps the + // historical "return contents unchanged" behavior so existing flows + // (small configs, single source files, etc.) don't suddenly start + // seeing wrapped output. Once a file is large or the caller asks + // for an explicit range, we switch to a numbered, line-tagged + // window with continuation hints so the model can page through + // without re-loading the entire file on every turn. Harvested + // from PR #1451 by @Oliver-ZPLiu, closes part of #1450. + const DEFAULT_READ_LINES: usize = 200; + const HARD_MAX_READ_LINES: usize = 500; + const MAX_VISIBLE_BYTES: usize = 16 * 1024; + const SMALL_FILE_LINES: usize = 200; + const SMALL_FILE_BYTES: usize = 16 * 1024; + let path_str = required_str(&input, "path")?; let file_path = context.resolve_path(path_str)?; let pages = optional_str(&input, "pages"); @@ -67,7 +89,102 @@ impl ToolSpec for ReadFileTool { ToolError::execution_failed(format!("Failed to read {}: {}", file_path.display(), e)) })?; - Ok(ToolResult::success(contents)) + let total_lines = contents.lines().count(); + let total_bytes = contents.len(); + let explicit_range = input + .get("start_line") + .or_else(|| input.get("max_lines")) + .is_some(); + + // Small-file fast path. Only applies when the caller didn't pass an + // explicit range — otherwise an explicit `start_line = 5` on a + // tiny file would silently ignore the request. + if !explicit_range && total_lines <= SMALL_FILE_LINES && total_bytes <= SMALL_FILE_BYTES { + return Ok(ToolResult::success(contents)); + } + + let start_line = match input.get("start_line").and_then(Value::as_u64) { + Some(0) => { + return Err(ToolError::invalid_input( + "start_line must be 1-based and greater than 0".to_string(), + )); + } + Some(v) => v as usize, + None => 1, + }; + + let max_lines = match input.get("max_lines").and_then(Value::as_u64) { + Some(0) => { + return Err(ToolError::invalid_input( + "max_lines must be greater than 0".to_string(), + )); + } + Some(v) => std::cmp::min(v as usize, HARD_MAX_READ_LINES), + None => DEFAULT_READ_LINES, + }; + + // `start_line > total_lines` is not an error — it lets the model + // page past the end without raising. Returns an empty-content + // sentinel so subsequent reads can stop. + if start_line > total_lines { + let output = format!( + "\n\ + \n\ + [NO CONTENT] start_line {start_line} is beyond total_lines {total_lines}.\n\ + " + ); + return Ok(ToolResult::success(output)); + } + + let lines: Vec<&str> = contents.lines().collect(); + let zero_based_start = start_line - 1; + let zero_based_end = std::cmp::min(zero_based_start + max_lines, total_lines); + let shown_first = start_line; + let shown_last = zero_based_end; // 1-based inclusive line number of the last shown line + + let mut numbered = String::new(); + for (offset, line) in lines[zero_based_start..zero_based_end].iter().enumerate() { + let line_no = start_line + offset; + numbered.push_str(&format!("{line_no:>6}│ {line}\n")); + } + + // UTF-8-safe byte truncation of the rendered range. + let truncated_by_bytes = numbered.len() > MAX_VISIBLE_BYTES; + let shown_content = if truncated_by_bytes { + let mut end = MAX_VISIBLE_BYTES; + while end > 0 && !numbered.is_char_boundary(end) { + end -= 1; + } + &numbered[..end] + } else { + &numbered + }; + + let truncated_by_lines = zero_based_end < total_lines; + let truncated = truncated_by_lines || truncated_by_bytes; + let next_start = zero_based_end + 1; + + let mut attrs = format!( + "path=\"{path_str}\" total_lines=\"{total_lines}\" shown_lines=\"{shown_first}-{shown_last}\" truncated=\"{truncated}\"" + ); + if truncated_by_lines { + attrs.push_str(&format!(" next_start_line=\"{next_start}\"")); + } + + let mut output = format!("\n{shown_content}"); + if truncated_by_lines { + output.push_str(&format!( + "\n[TRUNCATED] Showing lines {shown_first}-{shown_last} of {total_lines}. To continue, call read_file with path=\"{path_str}\" start_line={next_start} max_lines={max_lines}\n" + )); + } + if truncated_by_bytes { + output.push_str( + "\n[TRUNCATED] The selected range exceeded 16KB. Continue with a smaller max_lines value.\n", + ); + } + output.push_str(""); + + Ok(ToolResult::success(output)) } } @@ -531,6 +648,157 @@ mod tests { assert!(result.is_err()); } + #[tokio::test] + async fn read_file_small_file_returns_unwrapped_contents() { + // Small files (≤ 200 lines AND ≤ 16KB, no explicit range) keep + // the historical "return contents unchanged" behavior so + // existing prompts don't suddenly see tags appear. + // Harvested from #1451 — pin the fast-path contract. + let tmp = tempdir().expect("tempdir"); + let ctx = ToolContext::new(tmp.path().to_path_buf()); + let file = tmp.path().join("small.txt"); + fs::write(&file, "line 1\nline 2\nline 3\n").expect("write"); + let tool = ReadFileTool; + let result = tool + .execute(json!({ "path": "small.txt" }), &ctx) + .await + .expect("execute"); + assert!(result.success); + assert_eq!(result.content, "line 1\nline 2\nline 3\n"); + assert!( + !result.content.contains("